diff --git a/.github/actions/setup-rust-runtime/action.yaml b/.github/actions/setup-rust-runtime/action.yaml
index cd18be9890315..b6fb2c898bf2f 100644
--- a/.github/actions/setup-rust-runtime/action.yaml
+++ b/.github/actions/setup-rust-runtime/action.yaml
@@ -20,8 +20,10 @@ description: 'Setup Rust Runtime Environment'
 runs:
   using: "composite"
   steps:
-    - name: Run sccache-cache
-      uses: mozilla-actions/sccache-action@v0.0.4
+    # https://github.com/apache/datafusion/issues/15535
+    # disabled because neither version nor git hash works with apache github policy
+    #- name: Run sccache-cache
+    #  uses: mozilla-actions/sccache-action@65101d47ea8028ed0c98a1cdea8dd9182e9b5133 # v0.0.8
     - name: Configure runtime env
       shell: bash
       # do not produce debug symbols to keep memory usage down
@@ -30,9 +32,11 @@ runs:
       # 
       # Set debuginfo=line-tables-only as debuginfo=0 causes immensely slow build
       # See for more details: https://github.com/rust-lang/rust/issues/119560
+      #
+      # readd the following to the run below once sccache-cache is re-enabled
+      # echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV
+      # echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV
       run: |
-        echo "RUSTC_WRAPPER=sccache" >> $GITHUB_ENV  
-        echo "SCCACHE_GHA_ENABLED=true" >> $GITHUB_ENV 
         echo "RUST_BACKTRACE=1" >> $GITHUB_ENV
         echo "RUSTFLAGS=-C debuginfo=line-tables-only -C incremental=false" >> $GITHUB_ENV
      
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 0d65b1aa809ff..491fa27c2a56a 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -26,6 +26,8 @@ on:
     paths:
       - "**/Cargo.toml"
       - "**/Cargo.lock"
+    branches:
+      - main
 
   pull_request:
     paths:
@@ -40,4 +42,6 @@ jobs:
       - name: Install cargo-audit
         run: cargo install cargo-audit
       - name: Run audit check
-        run: cargo audit
+        # Ignored until https://github.com/apache/datafusion/issues/15571
+        # ignored py03 warning until arrow 55 upgrade
+        run: cargo audit --ignore RUSTSEC-2024-0370 --ignore RUSTSEC-2025-0020
diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml
index a5d68ff079b56..d80fdb75d932d 100644
--- a/.github/workflows/extended.yml
+++ b/.github/workflows/extended.yml
@@ -47,7 +47,7 @@ on:
 permissions:
   contents: read
   checks: write
-      
+
 jobs:
 
   # Check crate compiles and base cargo check passes
@@ -58,6 +58,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
+          ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
           fetch-depth: 1
       - name: Install Rust
@@ -81,6 +82,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
+          ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
           fetch-depth: 1
       - name: Free Disk Space (Ubuntu)
@@ -114,6 +116,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
+          ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
           fetch-depth: 1
       - name: Setup Rust toolchain
@@ -134,6 +137,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
         with:
+          ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push
           submodules: true
           fetch-depth: 1
       - name: Setup Rust toolchain
@@ -161,14 +165,14 @@ jobs:
             echo "workflow_status=completed" >> $GITHUB_OUTPUT
             echo "conclusion=success" >> $GITHUB_OUTPUT
           fi
-      
+
       - name: Update check run
         uses: actions/github-script@v7
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
             const workflowRunUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
-            
+
             await github.rest.checks.update({
               owner: context.repo.owner,
               repo: context.repo.repo,
diff --git a/.github/workflows/pr_comment_commands.yml b/.github/workflows/pr_comment_commands.yml
index a20a5b15965dd..6aa6caaf34d02 100644
--- a/.github/workflows/pr_comment_commands.yml
+++ b/.github/workflows/pr_comment_commands.yml
@@ -44,12 +44,12 @@ jobs:
                 repo: context.repo.repo,
                 pull_number: context.payload.issue.number
             });
-            
+
             // Extract the branch name
             const branchName = pullRequest.head.ref;
             const headSha = pullRequest.head.sha;
             const workflowRunsUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions?query=workflow%3A%22Datafusion+extended+tests%22+branch%3A${branchName}`;
-            
+
             // Create a check run that links to the Actions tab so the run will be visible in GitHub UI
             const check = await github.rest.checks.create({
               owner: context.repo.owner,
@@ -69,7 +69,7 @@ jobs:
               owner: context.repo.owner,
               repo: context.repo.repo,
               workflow_id: 'extended.yml',
-              ref: branchName,
+              ref: 'main',
               inputs: {
                 pr_number: context.payload.issue.number.toString(),
                 check_run_id: check.data.id.toString(),
@@ -77,7 +77,7 @@ jobs:
               }
             });
 
-      - name: Add reaction to comment 
+      - name: Add reaction to comment
         uses: actions/github-script@v7
         with:
           script: |
@@ -86,4 +86,4 @@ jobs:
               repo: context.repo.repo,
               comment_id: context.payload.comment.id,
               content: 'rocket'
-            });
\ No newline at end of file
+            });
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 1e6cd97acea33..f3b7e19a4970b 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -384,25 +384,25 @@ jobs:
         run: ci/scripts/rust_docs.sh
 
   linux-wasm-pack:
-    name: build with wasm-pack
-    runs-on: ubuntu-latest
-    container:
-      image: amd64/rust
+    name: build and run with wasm-pack
+    runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
-      - name: Setup Rust toolchain
-        uses: ./.github/actions/setup-builder
-        with:
-          rust-version: stable
+      - name: Setup for wasm32
+        run: |
+          rustup target add wasm32-unknown-unknown
       - name: Install dependencies
         run: |
-          apt-get update -qq
-          apt-get install -y -qq clang
-      - name: Install wasm-pack
-        run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
-      - name: Build with wasm-pack
+          sudo apt-get update -qq
+          sudo apt-get install -y -qq clang
+      - name: Setup wasm-pack
+        run: |
+          cargo install wasm-pack
+      - name: Run tests with headless mode
         working-directory: ./datafusion/wasmtest
-        run: wasm-pack build --dev
+        run: |
+          wasm-pack test --headless --firefox
+          wasm-pack test --headless --chrome --chromedriver $CHROMEWEBDRIVER/chromedriver
 
   # verify that the benchmark queries return the correct results
   verify-benchmark-results:
@@ -693,6 +693,11 @@ jobs:
           # If you encounter an error, run './dev/update_function_docs.sh' and commit
           ./dev/update_function_docs.sh
           git diff --exit-code
+      - name: Check if runtime_configs.md has been modified
+        run: |
+          # If you encounter an error, run './dev/update_runtime_config_docs.sh' and commit
+          ./dev/update_runtime_config_docs.sh
+          git diff --exit-code
 
   # Verify MSRV for the crates which are directly used by other projects:
   # - datafusion
diff --git a/Cargo.lock b/Cargo.lock
index 8aba95bdcca4a..299ea0dc4c6fd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -246,9 +246,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1"
+checksum = "3095aaf545942ff5abd46654534f15b03a90fba78299d661e045e5d587222f0d"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -265,14 +265,14 @@ dependencies = [
  "arrow-string",
  "half",
  "pyo3",
- "rand 0.8.5",
+ "rand 0.9.0",
 ]
 
 [[package]]
 name = "arrow-arith"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a"
+checksum = "00752064ff47cee746e816ddb8450520c3a52cbad1e256f6fa861a35f86c45e7"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -284,9 +284,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5"
+checksum = "cebfe926794fbc1f49ddd0cdaf898956ca9f6e79541efce62dabccfd81380472"
 dependencies = [
  "ahash 0.8.11",
  "arrow-buffer",
@@ -301,9 +301,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e899dade2c3b7f5642eb8366cfd898958bcca099cde6dfea543c7e8d3ad88d4"
+checksum = "0303c7ec4cf1a2c60310fc4d6bbc3350cd051a17bf9e9c0a8e47b4db79277824"
 dependencies = [
  "bytes",
  "half",
@@ -312,9 +312,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a"
+checksum = "335f769c5a218ea823d3760a743feba1ef7857cba114c01399a891c2fff34285"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -333,9 +333,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65"
+checksum = "510db7dfbb4d5761826516cc611d97b3a68835d0ece95b034a052601109c0b1b"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -349,9 +349,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a329fb064477c9ec5f0870d2f5130966f91055c7c5bce2b3a084f116bc28c3b"
+checksum = "e8affacf3351a24039ea24adab06f316ded523b6f8c3dbe28fbac5f18743451b"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -361,9 +361,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-flight"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7408f2bf3b978eddda272c7699f439760ebc4ac70feca25fefa82c5b8ce808d"
+checksum = "e2e0fad280f41a918d53ba48288a246ff04202d463b3b380fbc0edecdcb52cfd"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -388,9 +388,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86"
+checksum = "69880a9e6934d9cba2b8630dd08a3463a91db8693b16b499d54026b6137af284"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -402,9 +402,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd"
+checksum = "d8dafd17a05449e31e0114d740530e0ada7379d7cb9c338fd65b09a8130960b0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -413,18 +413,20 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "lexical-core",
+ "memchr",
  "num",
  "serde",
  "serde_json",
+ "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160"
+checksum = "895644523af4e17502d42c3cb6b27cb820f0cb77954c22d75c23a85247c849e1"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -435,9 +437,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd"
+checksum = "9be8a2a4e5e7d9c822b2b8095ecd77010576d824f654d347817640acfc97d229"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -448,9 +450,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85934a9d0261e0fa5d4e2a5295107d743b543a6e0484a835d4b8db2da15306f9"
+checksum = "7450c76ab7c5a6805be3440dc2e2096010da58f7cab301fdc996a4ee3ee74e49"
 dependencies = [
  "bitflags 2.8.0",
  "serde",
@@ -458,9 +460,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c"
+checksum = "aa5f5a93c75f46ef48e4001535e7b6c922eeb0aa20b73cf58d09e13d057490d8"
 dependencies = [
  "ahash 0.8.11",
  "arrow-array",
@@ -472,9 +474,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458"
+checksum = "6e7005d858d84b56428ba2a98a107fe88c0132c61793cf6b8232a1f9bfc0452b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -1047,9 +1049,9 @@ dependencies = [
 
 [[package]]
 name = "bigdecimal"
-version = "0.4.7"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c"
+checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013"
 dependencies = [
  "autocfg",
  "libm",
@@ -1117,9 +1119,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.7.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7"
+checksum = "389a099b34312839e16420d499a9cad9650541715937ffbdd40d36f49e77eeb3"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -1361,9 +1363,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.39"
+version = "0.4.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
@@ -1371,7 +1373,7 @@ dependencies = [
  "num-traits",
  "serde",
  "wasm-bindgen",
- "windows-targets 0.52.6",
+ "windows-link",
 ]
 
 [[package]]
@@ -1446,9 +1448,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.34"
+version = "4.5.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff"
+checksum = "2df961d8c8a0d08aa9945718ccf584145eee3f3aa06cddbeac12933781102e04"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1456,9 +1458,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.34"
+version = "4.5.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489"
+checksum = "132dbda40fb6753878316a489d5a1242a8ef2f0d9e47ba01c951ea8aa7d013a5"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1640,7 +1642,7 @@ dependencies = [
  "anes",
  "cast",
  "ciborium",
- "clap 4.5.34",
+ "clap 4.5.36",
  "criterion-plot",
  "futures",
  "is-terminal",
@@ -1671,9 +1673,9 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-channel"
-version = "0.5.14"
+version = "0.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
 dependencies = [
  "crossbeam-utils",
 ]
@@ -1807,7 +1809,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1877,7 +1879,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "datafusion",
@@ -1901,7 +1903,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1925,7 +1927,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1947,14 +1949,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "assert_cmd",
  "async-trait",
  "aws-config",
  "aws-credential-types",
- "clap 4.5.34",
+ "clap 4.5.36",
  "ctor",
  "datafusion",
  "dirs",
@@ -1976,7 +1978,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "apache-avro",
@@ -1986,7 +1988,7 @@ dependencies = [
  "chrono",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "insta",
  "libc",
  "log",
@@ -2003,7 +2005,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "futures",
  "log",
@@ -2012,7 +2014,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-compression",
@@ -2020,6 +2022,7 @@ dependencies = [
  "bytes",
  "bzip2 0.5.2",
  "chrono",
+ "criterion",
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-execution",
@@ -2046,7 +2049,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -2071,7 +2074,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2094,7 +2097,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2117,7 +2120,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2147,11 +2150,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "46.0.1"
+version = "47.0.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2160,6 +2163,7 @@ dependencies = [
  "bytes",
  "dashmap",
  "datafusion",
+ "datafusion-ffi",
  "datafusion-proto",
  "env_logger",
  "futures",
@@ -2180,7 +2184,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2198,7 +2202,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2210,7 +2214,7 @@ dependencies = [
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
  "env_logger",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "paste",
  "recursive",
  "serde_json",
@@ -2219,21 +2223,22 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "itertools 0.14.0",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-ffi"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "abi_stable",
  "arrow",
+ "arrow-schema",
  "async-ffi",
  "async-trait",
  "datafusion",
@@ -2248,7 +2253,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2277,7 +2282,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2298,7 +2303,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2311,7 +2316,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2332,7 +2337,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2346,7 +2351,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2362,7 +2367,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2370,7 +2375,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -2379,11 +2384,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "chrono",
+ "criterion",
  "ctor",
  "datafusion-common",
  "datafusion-expr",
@@ -2393,7 +2399,8 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-sql",
  "env_logger",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
+ "insta",
  "itertools 0.14.0",
  "log",
  "recursive",
@@ -2403,7 +2410,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2416,7 +2423,8 @@ dependencies = [
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
+ "insta",
  "itertools 0.14.0",
  "log",
  "paste",
@@ -2427,7 +2435,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2439,7 +2447,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2458,7 +2466,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "ahash 0.8.11",
  "arrow",
@@ -2479,7 +2487,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2488,12 +2496,13 @@ dependencies = [
  "rand 0.8.5",
  "rstest",
  "rstest_reuse",
+ "tempfile",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-proto"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2516,7 +2525,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2529,7 +2538,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2551,7 +2560,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2563,7 +2572,8 @@ dependencies = [
  "datafusion-functions-nested",
  "datafusion-functions-window",
  "env_logger",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
+ "insta",
  "log",
  "paste",
  "recursive",
@@ -2574,14 +2584,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "bigdecimal",
  "bytes",
  "chrono",
- "clap 4.5.34",
+ "clap 4.5.36",
  "datafusion",
  "env_logger",
  "futures",
@@ -2605,7 +2615,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2625,7 +2635,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "46.0.1"
+version = "47.0.0"
 dependencies = [
  "chrono",
  "console_error_panic_hook",
@@ -2795,9 +2805,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.7"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697"
+checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
 dependencies = [
  "anstream",
  "anstyle",
@@ -2918,21 +2928,22 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
 [[package]]
 name = "flatbuffers"
-version = "24.12.23"
+version = "25.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096"
+checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.8.0",
  "rustc_version",
 ]
 
 [[package]]
 name = "flate2"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
+checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece"
 dependencies = [
  "crc32fast",
+ "libz-rs-sys",
  "miniz_oxide",
 ]
 
@@ -3179,7 +3190,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.2.0",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -3188,9 +3199,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.5.0"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1"
+checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -3628,9 +3639,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.8.0"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058"
+checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
 dependencies = [
  "equivalent",
  "hashbrown 0.15.2",
@@ -3867,9 +3878,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.171"
+version = "0.2.172"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
+checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
 
 [[package]]
 name = "libflate"
@@ -3923,9 +3934,9 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
 
 [[package]]
 name = "libmimalloc-sys"
-version = "0.1.40"
+version = "0.1.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07d0e07885d6a754b9c7993f2625187ad694ee985d60f23355ff0e7077261502"
+checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4"
 dependencies = [
  "cc",
  "libc",
@@ -3950,10 +3961,19 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
 dependencies = [
  "anstream",
  "anstyle",
- "clap 4.5.34",
+ "clap 4.5.36",
  "escape8259",
 ]
 
+[[package]]
+name = "libz-rs-sys"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6489ca9bd760fe9642d7644e827b0c9add07df89857b0416ee15c1cc1a3b8c5a"
+dependencies = [
+ "zlib-rs",
+]
+
 [[package]]
 name = "linked-hash-map"
 version = "0.5.6"
@@ -4000,7 +4020,7 @@ version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5"
 dependencies = [
- "twox-hash",
+ "twox-hash 1.6.3",
 ]
 
 [[package]]
@@ -4047,9 +4067,9 @@ dependencies = [
 
 [[package]]
 name = "mimalloc"
-version = "0.1.44"
+version = "0.1.46"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99585191385958383e13f6b822e6b6d8d9cf928e7d286ceb092da92b43c87bc1"
+checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af"
 dependencies = [
  "libmimalloc-sys",
 ]
@@ -4078,9 +4098,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.8.4"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b"
+checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a"
 dependencies = [
  "adler2",
 ]
@@ -4245,6 +4265,15 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
 
+[[package]]
+name = "objc2-core-foundation"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daeaf60f25471d26948a1c2f840e3f7d86f4109e3af4e8e4b5cd70c39690d925"
+dependencies = [
+ "bitflags 2.8.0",
+]
+
 [[package]]
 name = "object"
 version = "0.36.7"
@@ -4256,18 +4285,21 @@ dependencies = [
 
 [[package]]
 name = "object_store"
-version = "0.11.2"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf"
+checksum = "e9ce831b09395f933addbc56d894d889e4b226eba304d4e7adbab591e26daf1e"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
  "bytes",
  "chrono",
+ "form_urlencoded",
  "futures",
+ "http 1.2.0",
+ "http-body-util",
  "humantime",
  "hyper",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "md-5",
  "parking_lot",
  "percent-encoding",
@@ -4278,7 +4310,8 @@ dependencies = [
  "rustls-pemfile",
  "serde",
  "serde_json",
- "snafu",
+ "serde_urlencoded",
+ "thiserror 2.0.12",
  "tokio",
  "tracing",
  "url",
@@ -4361,9 +4394,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.2.1"
+version = "55.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd"
+checksum = "cd31a8290ac5b19f09ad77ee7a1e6a541f1be7674ad410547d5f1eef6eef4a9c"
 dependencies = [
  "ahash 0.8.11",
  "arrow-array",
@@ -4391,9 +4424,8 @@ dependencies = [
  "snap",
  "thrift",
  "tokio",
- "twox-hash",
+ "twox-hash 2.1.0",
  "zstd",
- "zstd-sys",
 ]
 
 [[package]]
@@ -4486,7 +4518,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
 ]
 
 [[package]]
@@ -4840,9 +4872,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.23.5"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7778bffd85cf38175ac1f545509665d0b9b92a198ca7941f131f85f7a4f9a872"
+checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219"
 dependencies = [
  "cfg-if",
  "indoc",
@@ -4858,9 +4890,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.23.5"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94f6cbe86ef3bf18998d9df6e0f3fc1050a8c5efa409bf712e661a4366e010fb"
+checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999"
 dependencies = [
  "once_cell",
  "target-lexicon",
@@ -4868,9 +4900,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.23.5"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9f1b4c431c0bb1c8fb0a338709859eed0d030ff6daa34368d3b152a63dfdd8d"
+checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -4878,9 +4910,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.23.5"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbc2201328f63c4710f68abdf653c89d8dbc2858b88c5d88b0ff38a75288a9da"
+checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
@@ -4890,9 +4922,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.23.5"
+version = "0.24.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fca6726ad0f3da9c9de093d6f116a93c1a38e417ed73bf138472cf4064f72028"
+checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -5689,7 +5721,7 @@ dependencies = [
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "serde",
  "serde_derive",
  "serde_json",
@@ -5715,7 +5747,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "itoa",
  "ryu",
  "serde",
@@ -5790,27 +5822,6 @@ version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
 
-[[package]]
-name = "snafu"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019"
-dependencies = [
- "snafu-derive",
-]
-
-[[package]]
-name = "snafu-derive"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
-dependencies = [
- "heck 0.5.0",
- "proc-macro2",
- "quote",
- "syn 2.0.100",
-]
-
 [[package]]
 name = "snap"
 version = "1.1.1"
@@ -5847,9 +5858,9 @@ dependencies = [
 
 [[package]]
 name = "sqllogictest"
-version = "0.28.0"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b2f0b80fc250ed3fdd82fc88c0ada5ad62ee1ed5314ac5474acfa52082f518"
+checksum = "ee6199c1e008acc669b1e5873c138bf3ad4f8709ccd5c5d88913e664ae4f75de"
 dependencies = [
  "async-trait",
  "educe",
@@ -6108,15 +6119,14 @@ dependencies = [
 
 [[package]]
 name = "sysinfo"
-version = "0.33.1"
+version = "0.34.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fc858248ea01b66f19d8e8a6d55f41deaf91e9d495246fd01368d99935c6c01"
+checksum = "a4b93974b3d3aeaa036504b8eefd4c039dced109171c1ae973f1dc63b2c7e4b2"
 dependencies = [
- "core-foundation-sys",
  "libc",
  "memchr",
  "ntapi",
- "rayon",
+ "objc2-core-foundation",
  "windows",
 ]
 
@@ -6128,9 +6138,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
 [[package]]
 name = "target-lexicon"
-version = "0.12.16"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
+checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
 name = "tempfile"
@@ -6461,7 +6471,7 @@ version = "0.22.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
 dependencies = [
- "indexmap 2.8.0",
+ "indexmap 2.9.0",
  "toml_datetime",
  "winnow",
 ]
@@ -6631,6 +6641,12 @@ dependencies = [
  "static_assertions",
 ]
 
+[[package]]
+name = "twox-hash"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908"
+
 [[package]]
 name = "typed-arena"
 version = "2.0.2"
@@ -7123,6 +7139,12 @@ dependencies = [
  "syn 2.0.100",
 ]
 
+[[package]]
+name = "windows-link"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
+
 [[package]]
 name = "windows-registry"
 version = "0.2.0"
@@ -7489,6 +7511,12 @@ dependencies = [
  "syn 2.0.100",
 ]
 
+[[package]]
+name = "zlib-rs"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "868b928d7949e09af2f6086dfc1e01936064cc7a819253bce650d4e2a2d63ba8"
+
 [[package]]
 name = "zstd"
 version = "0.13.3"
diff --git a/Cargo.toml b/Cargo.toml
index b6164f89d31e8..5a735666f8e7e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -75,7 +75,7 @@ repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
 rust-version = "1.82.0"
 # Define DataFusion version
-version = "46.0.1"
+version = "47.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -87,69 +87,69 @@ ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.17", default-features = false }
-arrow = { version = "54.2.1", features = [
+arrow = { version = "55.0.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "54.1.0", default-features = false }
-arrow-flight = { version = "54.2.1", features = [
+arrow-buffer = { version = "55.0.0", default-features = false }
+arrow-flight = { version = "55.0.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "54.2.0", default-features = false, features = [
+arrow-ipc = { version = "55.0.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "54.1.0", default-features = false }
-arrow-schema = { version = "54.1.0", default-features = false }
+arrow-ord = { version = "55.0.0", default-features = false }
+arrow-schema = { version = "55.0.0", default-features = false }
 async-trait = "0.1.88"
-bigdecimal = "0.4.7"
+bigdecimal = "0.4.8"
 bytes = "1.10"
 chrono = { version = "0.4.38", default-features = false }
 criterion = "0.5.1"
 ctor = "0.2.9"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "46.0.1", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "46.0.1" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "46.0.1" }
-datafusion-common = { path = "datafusion/common", version = "46.0.1", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "46.0.1" }
-datafusion-datasource = { path = "datafusion/datasource", version = "46.0.1", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "46.0.1", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "46.0.1", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "46.0.1", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "46.0.1", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "46.0.1" }
-datafusion-execution = { path = "datafusion/execution", version = "46.0.1" }
-datafusion-expr = { path = "datafusion/expr", version = "46.0.1" }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "46.0.1" }
-datafusion-ffi = { path = "datafusion/ffi", version = "46.0.1" }
-datafusion-functions = { path = "datafusion/functions", version = "46.0.1" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "46.0.1" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "46.0.1" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "46.0.1" }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "46.0.1" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "46.0.1" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "46.0.1" }
-datafusion-macros = { path = "datafusion/macros", version = "46.0.1" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "46.0.1", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "46.0.1", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "46.0.1", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "46.0.1" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "46.0.1" }
-datafusion-proto = { path = "datafusion/proto", version = "46.0.1" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "46.0.1" }
-datafusion-session = { path = "datafusion/session", version = "46.0.1" }
-datafusion-sql = { path = "datafusion/sql", version = "46.0.1" }
+datafusion = { path = "datafusion/core", version = "47.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "47.0.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "47.0.0" }
+datafusion-common = { path = "datafusion/common", version = "47.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "47.0.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "47.0.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "47.0.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "47.0.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "47.0.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "47.0.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "47.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "47.0.0" }
+datafusion-expr = { path = "datafusion/expr", version = "47.0.0" }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "47.0.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "47.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "47.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "47.0.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "47.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "47.0.0" }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "47.0.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "47.0.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "47.0.0" }
+datafusion-macros = { path = "datafusion/macros", version = "47.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "47.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "47.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "47.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "47.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "47.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "47.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "47.0.0" }
+datafusion-session = { path = "datafusion/session", version = "47.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "47.0.0" }
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
-half = { version = "2.5.0", default-features = false }
+half = { version = "2.6.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
-indexmap = "2.8.0"
+indexmap = "2.9.0"
 itertools = "0.14"
 log = "^0.4"
-object_store = { version = "0.11.0", default-features = false }
+object_store = { version = "0.12.0", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "54.2.1", default-features = false, features = [
+parquet = { version = "55.0.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
@@ -191,13 +191,20 @@ strip = false            # Retain debug info for flamegraphs
 inherits = "dev"
 incremental = false
 
-# ci turns off debug info, etc for dependencies to allow for smaller binaries making caching more effective
+# ci turns off debug info, etc. for dependencies to allow for smaller binaries making caching more effective
 [profile.ci.package."*"]
 debug = false
 debug-assertions = false
 strip = "debuginfo"
 incremental = false
 
+# release inherited profile keeping debug information and symbols
+# for mem/cpu profiling
+[profile.profiling]
+inherits = "release"
+debug = true
+strip = false
+
 [workspace.lints.clippy]
 # Detects large stack-allocated futures that may cause stack overflow crashes (see threshold in clippy.toml)
 large_futures = "warn"
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 8acaa298bd3ad..86b2e1b3b958f 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -200,6 +200,16 @@ cargo run --release --bin tpch -- convert --input ./data --output /mnt/tpch-parq
 
 Or if you want to verify and run all the queries in the benchmark, you can just run `cargo test`.
 
+#### Sorted Conversion
+
+The TPCH tables generated by the dbgen utility are sorted by their first column (their primary key for most tables, the `l_orderkey` column for the `lineitem` table.)
+
+To preserve this sorted order information during conversion (useful for benchmarking execution on pre-sorted data) include the `--sort` flag:
+
+```bash
+cargo run --release --bin tpch -- convert --input ./data --output /mnt/tpch-sorted-parquet --format parquet --sort
+```
+
 ### Comparing results between runs
 
 Any `dfbench` execution with `-o <dir>` argument will produce a
@@ -445,20 +455,29 @@ Test performance of end-to-end sort SQL queries. (While the `Sort` benchmark foc
 
 Sort integration benchmark runs whole table sort queries on TPCH `lineitem` table, with different characteristics. For example, different number of sort keys, different sort key cardinality, different number of payload columns, etc.
 
+If the TPCH tables have been converted as sorted on their first column (see [Sorted Conversion](#sorted-conversion)), you can use the `--sorted` flag to indicate that the input data is pre-sorted, allowing DataFusion to leverage that order during query execution.
+
+Additionally, an optional `--limit` flag is available for the sort benchmark. When specified, this flag appends a `LIMIT n` clause to the SQL query, effectively converting the query into a TopK query. Combining the `--sorted` and `--limit` options enables benchmarking of TopK queries on pre-sorted inputs.
+
 See [`sort_tpch.rs`](src/sort_tpch.rs) for more details.
 
 ### Sort TPCH Benchmark Example Runs
 1. Run all queries with default setting:
 ```bash
- cargo run --release --bin dfbench -- sort-tpch -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json'
+ cargo run --release --bin dfbench -- sort-tpch -p './datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json'
 ```
 
 2. Run a specific query:
 ```bash
- cargo run --release --bin dfbench -- sort-tpch -p '....../datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json' --query 2
+ cargo run --release --bin dfbench -- sort-tpch -p './datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json' --query 2
 ```
 
-3. Run all queries with `bench.sh` script:
+3. Run all queries as TopK queries on presorted data:
+```bash
+ cargo run --release --bin dfbench -- sort-tpch --sorted --limit 10 -p './datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json'
+```
+
+4. Run all queries with `bench.sh` script:
 ```bash
 ./bench.sh run sort_tpch
 ```
diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh
index 5be825eb0dafd..5d3ad3446ddb9 100755
--- a/benchmarks/bench.sh
+++ b/benchmarks/bench.sh
@@ -412,7 +412,10 @@ run_tpch() {
     echo "Running tpch benchmark..."
     # Optional query filter to run specific query
     QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
+    # debug the target command
+    set -x
     $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" --format parquet -o "${RESULTS_FILE}" $QUERY
+    set +x
 }
 
 # Runs the tpch in memory
@@ -427,9 +430,13 @@ run_tpch_mem() {
     RESULTS_FILE="${RESULTS_DIR}/tpch_mem_sf${SCALE_FACTOR}.json"
     echo "RESULTS_FILE: ${RESULTS_FILE}"
     echo "Running tpch_mem benchmark..."
+    # Optional query filter to run specific query
     QUERY=$([ -n "$ARG3" ] && echo "--query $ARG3" || echo "")
+    # debug the target command
+    set -x
     # -m means in memory
     $CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path "${TPCH_DIR}" --prefer_hash_join "${PREFER_HASH_JOIN}" -m --format parquet -o "${RESULTS_FILE}" $QUERY
+    set +x
 }
 
 # Runs the cancellation benchmark
diff --git a/benchmarks/queries/clickbench/README.md b/benchmarks/queries/clickbench/README.md
index 6797797409c1a..fdb7d1676be0f 100644
--- a/benchmarks/queries/clickbench/README.md
+++ b/benchmarks/queries/clickbench/README.md
@@ -93,12 +93,14 @@ LIMIT 10;
 
 Results look like
 
+```
 +-------------+---------------------+---+------+------+------+
 | ClientIP    | WatchID             | c | tmin | tmed | tmax |
 +-------------+---------------------+---+------+------+------+
 | 1611957945  | 6655575552203051303 | 2 | 0    | 0    | 0    |
 | -1402644643 | 8566928176839891583 | 2 | 0    | 0    | 0    |
 +-------------+---------------------+---+------+------+------+
+```
 
 
 ### Q5: Response start time distribution analysis (p95)
@@ -120,13 +122,42 @@ LIMIT 10;
 ```
 
 Results look like
-
+```
 +-------------+---------------------+---+------+------+------+
 | ClientIP    | WatchID             | c | tmin | tp95 | tmax |
 +-------------+---------------------+---+------+------+------+
 | 1611957945  | 6655575552203051303 | 2 | 0    | 0    | 0    |
 | -1402644643 | 8566928176839891583 | 2 | 0    | 0    | 0    |
 +-------------+---------------------+---+------+------+------+
+```
+
+### Q6: How many social shares meet complex multi-stage filtering criteria?
+**Question**: What is the count of sharing actions from iPhone mobile users on specific social networks, within common timezones, participating in seasonal campaigns, with high screen resolutions and closely matched UTM parameters?
+**Important Query Properties**: Simple filter with high-selectivity, Costly string matching, A large number of filters with high overhead are positioned relatively later in the process
+
+```sql
+SELECT COUNT(*) AS ShareCount
+FROM hits
+WHERE
+	-- Stage 1: High-selectivity filters (fast)
+    "IsMobile" = 1 -- Filter mobile users
+    AND "MobilePhoneModel" LIKE 'iPhone%' -- Match iPhone models
+    AND "SocialAction" = 'share' -- Identify social sharing actions
+
+	-- Stage 2: Moderate filters (cheap)
+    AND "SocialSourceNetworkID" IN (5, 12) -- Filter specific social networks
+    AND "ClientTimeZone" BETWEEN -5 AND 5 -- Restrict to common timezones
+
+	-- Stage 3: Heavy computations (expensive)
+    AND regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL -- Find campaign-specific referrers
+    AND CASE 
+        WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' 
+        THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT 
+        ELSE 0 
+    END > 1920 -- Extract and validate resolution parameter
+    AND levenshtein(CAST("UTMSource" AS STRING), CAST("UTMCampaign" AS STRING)) < 3 -- Verify UTM parameter similarity
+```
+Result is empty,Since it has already been filtered by `"SocialAction" = 'share'`.
 
 
 ## Data Notes
diff --git a/benchmarks/queries/clickbench/extended.sql b/benchmarks/queries/clickbench/extended.sql
index fbabaf2a70218..e967583fd6442 100644
--- a/benchmarks/queries/clickbench/extended.sql
+++ b/benchmarks/queries/clickbench/extended.sql
@@ -3,4 +3,5 @@ SELECT COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserCountry"), COUNT(DISTI
 SELECT "BrowserCountry",  COUNT(DISTINCT "SocialNetwork"), COUNT(DISTINCT "HitColor"), COUNT(DISTINCT "BrowserLanguage"), COUNT(DISTINCT "SocialAction") FROM hits GROUP BY 1 ORDER BY 2 DESC LIMIT 10;
 SELECT "SocialSourceNetworkID", "RegionID", COUNT(*), AVG("Age"), AVG("ParamPrice"), STDDEV("ParamPrice") as s, VAR("ParamPrice")  FROM hits GROUP BY "SocialSourceNetworkID", "RegionID" HAVING s IS NOT NULL ORDER BY s DESC LIMIT 10;
 SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, MEDIAN("ResponseStartTiming") tmed, MAX("ResponseStartTiming") tmax FROM hits WHERE "JavaEnable" = 0  GROUP BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY tmed DESC LIMIT 10;
-SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, APPROX_PERCENTILE_CONT("ResponseStartTiming", 0.95) tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10;
\ No newline at end of file
+SELECT "ClientIP", "WatchID",  COUNT(*) c, MIN("ResponseStartTiming") tmin, APPROX_PERCENTILE_CONT("ResponseStartTiming", 0.95) tp95, MAX("ResponseStartTiming") tmax FROM 'hits' WHERE "JavaEnable" = 0 GROUP BY  "ClientIP", "WatchID" HAVING c > 1 ORDER BY tp95 DESC LIMIT 10;
+SELECT COUNT(*) AS ShareCount FROM hits WHERE "IsMobile" = 1 AND "MobilePhoneModel" LIKE 'iPhone%' AND "SocialAction" = 'share' AND "SocialSourceNetworkID" IN (5, 12) AND "ClientTimeZone" BETWEEN -5 AND 5 AND regexp_match("Referer", '\/campaign\/(spring|summer)_promo') IS NOT NULL AND CASE WHEN split_part(split_part("URL", 'resolution=', 2), '&', 1) ~ '^\d+$' THEN split_part(split_part("URL", 'resolution=', 2), '&', 1)::INT ELSE 0 END > 1920 AND levenshtein(CAST("UTMSource" AS STRING), CAST("UTMCampaign" AS STRING)) < 3;
diff --git a/benchmarks/queries/clickbench/queries.sql b/benchmarks/queries/clickbench/queries.sql
index 52e72e02e1e0d..9a183cd6e259c 100644
--- a/benchmarks/queries/clickbench/queries.sql
+++ b/benchmarks/queries/clickbench/queries.sql
@@ -4,7 +4,7 @@ SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
 SELECT AVG("UserID") FROM hits;
 SELECT COUNT(DISTINCT "UserID") FROM hits;
 SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
-SELECT MIN("EventDate"::INT::DATE), MAX("EventDate"::INT::DATE) FROM hits;
+SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
 SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
 SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
 SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
@@ -21,10 +21,10 @@ SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
 SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
 SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
 SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
-SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
+SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
 SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
 SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
 SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
 SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits;
@@ -34,10 +34,10 @@ SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWi
 SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
 SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
 SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
-SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
-SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
-SELECT "URLHash", "EventDate"::INT::DATE, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate"::INT::DATE ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
-SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
-SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
+SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs
index 956bb92b6c78d..176234eca541c 100644
--- a/benchmarks/src/sort_tpch.rs
+++ b/benchmarks/src/sort_tpch.rs
@@ -63,6 +63,15 @@ pub struct RunOpt {
     /// Load the data into a MemTable before executing the query
     #[structopt(short = "m", long = "mem-table")]
     mem_table: bool,
+
+    /// Mark the first column of each table as sorted in ascending order.
+    /// The tables should have been created with the `--sort` option for this to have any effect.
+    #[structopt(short = "t", long = "sorted")]
+    sorted: bool,
+
+    /// Append a `LIMIT n` clause to the query
+    #[structopt(short = "l", long = "limit")]
+    limit: Option<usize>,
 }
 
 struct QueryResult {
@@ -163,7 +172,7 @@ impl RunOpt {
         r#"
         SELECT l_shipmode, l_comment, l_partkey
         FROM lineitem
-        ORDER BY l_shipmode;
+        ORDER BY l_shipmode
         "#,
     ];
 
@@ -212,9 +221,14 @@ impl RunOpt {
             let start = Instant::now();
 
             let query_idx = query_id - 1; // 1-indexed -> 0-indexed
-            let sql = Self::SORT_QUERIES[query_idx];
+            let base_sql = Self::SORT_QUERIES[query_idx].to_string();
+            let sql = if let Some(limit) = self.limit {
+                format!("{base_sql} LIMIT {limit}")
+            } else {
+                base_sql
+            };
 
-            let row_count = self.execute_query(&ctx, sql).await?;
+            let row_count = self.execute_query(&ctx, sql.as_str()).await?;
 
             let elapsed = start.elapsed(); //.as_secs_f64() * 1000.0;
             let ms = elapsed.as_secs_f64() * 1000.0;
@@ -315,8 +329,18 @@ impl RunOpt {
             .with_collect_stat(state.config().collect_statistics());
 
         let table_path = ListingTableUrl::parse(path)?;
-        let config = ListingTableConfig::new(table_path).with_listing_options(options);
-        let config = config.infer_schema(&state).await?;
+        let schema = options.infer_schema(&state, &table_path).await?;
+        let options = if self.sorted {
+            let key_column_name = schema.fields()[0].name();
+            options
+                .with_file_sort_order(vec![vec![col(key_column_name).sort(true, false)]])
+        } else {
+            options
+        };
+
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(options)
+            .with_schema(schema);
 
         Ok(Arc::new(ListingTable::try_new(config)?))
     }
diff --git a/benchmarks/src/tpch/convert.rs b/benchmarks/src/tpch/convert.rs
index 7f391d930045a..5219e09cd3052 100644
--- a/benchmarks/src/tpch/convert.rs
+++ b/benchmarks/src/tpch/convert.rs
@@ -22,15 +22,14 @@ use std::path::{Path, PathBuf};
 
 use datafusion::common::not_impl_err;
 
+use super::get_tbl_tpch_table_schema;
+use super::TPCH_TABLES;
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use parquet::basic::Compression;
 use parquet::file::properties::WriterProperties;
 use structopt::StructOpt;
 
-use super::get_tbl_tpch_table_schema;
-use super::TPCH_TABLES;
-
 /// Convert tpch .slt files to .parquet or .csv files
 #[derive(Debug, StructOpt)]
 pub struct ConvertOpt {
@@ -57,6 +56,10 @@ pub struct ConvertOpt {
     /// Batch size when reading CSV or Parquet files
     #[structopt(short = "s", long = "batch-size", default_value = "8192")]
     batch_size: usize,
+
+    /// Sort each table by its first column in ascending order.
+    #[structopt(short = "t", long = "sort")]
+    sort: bool,
 }
 
 impl ConvertOpt {
@@ -70,6 +73,7 @@ impl ConvertOpt {
         for table in TPCH_TABLES {
             let start = Instant::now();
             let schema = get_tbl_tpch_table_schema(table);
+            let key_column_name = schema.fields()[0].name();
 
             let input_path = format!("{input_path}/{table}.tbl");
             let options = CsvReadOptions::new()
@@ -77,6 +81,13 @@ impl ConvertOpt {
                 .has_header(false)
                 .delimiter(b'|')
                 .file_extension(".tbl");
+            let options = if self.sort {
+                // indicated that the file is already sorted by its first column to speed up the conversion
+                options
+                    .file_sort_order(vec![vec![col(key_column_name).sort(true, false)]])
+            } else {
+                options
+            };
 
             let config = SessionConfig::new().with_batch_size(self.batch_size);
             let ctx = SessionContext::new_with_config(config);
@@ -99,6 +110,11 @@ impl ConvertOpt {
             if partitions > 1 {
                 csv = csv.repartition(Partitioning::RoundRobinBatch(partitions))?
             }
+            let csv = if self.sort {
+                csv.sort_by(vec![col(key_column_name)])?
+            } else {
+                csv
+            };
 
             // create the physical plan
             let csv = csv.create_physical_plan().await?;
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index eb9db821db02f..752a5a1a6ba01 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -90,6 +90,11 @@ pub struct RunOpt {
     /// True by default.
     #[structopt(short = "j", long = "prefer_hash_join", default_value = "true")]
     prefer_hash_join: BoolDefaultTrue,
+
+    /// Mark the first column of each table as sorted in ascending order.
+    /// The tables should have been created with the `--sort` option for this to have any effect.
+    #[structopt(short = "t", long = "sorted")]
+    sorted: bool,
 }
 
 const TPCH_QUERY_START_ID: usize = 1;
@@ -275,20 +280,28 @@ impl RunOpt {
                 }
             };
 
+        let table_path = ListingTableUrl::parse(path)?;
         let options = ListingOptions::new(format)
             .with_file_extension(extension)
             .with_target_partitions(target_partitions)
             .with_collect_stat(state.config().collect_statistics());
-
-        let table_path = ListingTableUrl::parse(path)?;
-        let config = ListingTableConfig::new(table_path).with_listing_options(options);
-
-        let config = match table_format {
-            "parquet" => config.infer_schema(&state).await?,
-            "tbl" => config.with_schema(Arc::new(get_tbl_tpch_table_schema(table))),
-            "csv" => config.with_schema(Arc::new(get_tpch_table_schema(table))),
+        let schema = match table_format {
+            "parquet" => options.infer_schema(&state, &table_path).await?,
+            "tbl" => Arc::new(get_tbl_tpch_table_schema(table)),
+            "csv" => Arc::new(get_tpch_table_schema(table)),
             _ => unreachable!(),
         };
+        let options = if self.sorted {
+            let key_column_name = schema.fields()[0].name();
+            options
+                .with_file_sort_order(vec![vec![col(key_column_name).sort(true, false)]])
+        } else {
+            options
+        };
+
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(options)
+            .with_schema(schema);
 
         Ok(Arc::new(ListingTable::try_new(config)?))
     }
@@ -357,6 +370,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            sorted: false,
         };
         opt.register_tables(&ctx).await?;
         let queries = get_query_sql(query)?;
@@ -393,6 +407,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            sorted: false,
         };
         opt.register_tables(&ctx).await?;
         let queries = get_query_sql(query)?;
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index c70e3fc1caec5..e21c005cee5bf 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -39,7 +39,7 @@ arrow = { workspace = true }
 async-trait = { workspace = true }
 aws-config = "1.6.1"
 aws-credential-types = "1.2.0"
-clap = { version = "4.5.34", features = ["derive", "cargo"] }
+clap = { version = "4.5.36", features = ["derive", "cargo"] }
 datafusion = { workspace = true, features = [
     "avro",
     "crypto_expressions",
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index e21006312d85a..dad2d15f01a11 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -25,6 +25,7 @@ use datafusion::error::{DataFusionError, Result};
 use datafusion::execution::context::SessionConfig;
 use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, MemoryPool};
 use datafusion::execution::runtime_env::RuntimeEnvBuilder;
+use datafusion::execution::DiskManager;
 use datafusion::prelude::SessionContext;
 use datafusion_cli::catalog::DynamicObjectStoreCatalog;
 use datafusion_cli::functions::ParquetMetadataFunc;
@@ -37,6 +38,9 @@ use datafusion_cli::{
 };
 
 use clap::Parser;
+use datafusion::common::config_err;
+use datafusion::config::ConfigOptions;
+use datafusion::execution::disk_manager::DiskManagerConfig;
 use mimalloc::MiMalloc;
 
 #[global_allocator]
@@ -123,6 +127,14 @@ struct Args {
 
     #[clap(long, help = "Enables console syntax highlighting")]
     color: bool,
+
+    #[clap(
+        short = 'd',
+        long,
+        help = "Available disk space for spilling queries (e.g. '10g'), default to None (uses DataFusion's default value of '100g')",
+        value_parser(extract_disk_limit)
+    )]
+    disk_limit: Option<usize>,
 }
 
 #[tokio::main]
@@ -150,11 +162,7 @@ async fn main_inner() -> Result<()> {
         env::set_current_dir(p).unwrap();
     };
 
-    let mut session_config = SessionConfig::from_env()?.with_information_schema(true);
-
-    if let Some(batch_size) = args.batch_size {
-        session_config = session_config.with_batch_size(batch_size);
-    };
+    let session_config = get_session_config(&args)?;
 
     let mut rt_builder = RuntimeEnvBuilder::new();
     // set memory pool size
@@ -167,6 +175,18 @@ async fn main_inner() -> Result<()> {
         rt_builder = rt_builder.with_memory_pool(pool)
     }
 
+    // set disk limit
+    if let Some(disk_limit) = args.disk_limit {
+        let disk_manager = DiskManager::try_new(DiskManagerConfig::NewOs)?;
+
+        let disk_manager = Arc::try_unwrap(disk_manager)
+            .expect("DiskManager should be a single instance")
+            .with_max_temp_directory_size(disk_limit.try_into().unwrap())?;
+
+        let disk_config = DiskManagerConfig::new_existing(Arc::new(disk_manager));
+        rt_builder = rt_builder.with_disk_manager(disk_config);
+    }
+
     let runtime_env = rt_builder.build_arc()?;
 
     // enable dynamic file query
@@ -226,6 +246,30 @@ async fn main_inner() -> Result<()> {
     Ok(())
 }
 
+/// Get the session configuration based on the provided arguments
+/// and environment settings.
+fn get_session_config(args: &Args) -> Result<SessionConfig> {
+    // Read options from environment variables and merge with command line options
+    let mut config_options = ConfigOptions::from_env()?;
+
+    if let Some(batch_size) = args.batch_size {
+        if batch_size == 0 {
+            return config_err!("batch_size must be greater than 0");
+        }
+        config_options.execution.batch_size = batch_size;
+    };
+
+    // use easier to understand "tree" mode by default
+    // if the user hasn't specified an explain format in the environment
+    if env::var_os("DATAFUSION_EXPLAIN_FORMAT").is_none() {
+        config_options.explain.format = String::from("tree");
+    }
+
+    let session_config =
+        SessionConfig::from(config_options).with_information_schema(true);
+    Ok(session_config)
+}
+
 fn parse_valid_file(dir: &str) -> Result<String, String> {
     if Path::new(dir).is_file() {
         Ok(dir.to_string())
@@ -278,7 +322,7 @@ impl ByteUnit {
     }
 }
 
-fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
+fn parse_size_string(size: &str, label: &str) -> Result<usize, String> {
     static BYTE_SUFFIXES: LazyLock<HashMap<&'static str, ByteUnit>> =
         LazyLock::new(|| {
             let mut m = HashMap::new();
@@ -300,25 +344,33 @@ fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
     let lower = size.to_lowercase();
     if let Some(caps) = SUFFIX_REGEX.captures(&lower) {
         let num_str = caps.get(1).unwrap().as_str();
-        let num = num_str.parse::<usize>().map_err(|_| {
-            format!("Invalid numeric value in memory pool size '{}'", size)
-        })?;
+        let num = num_str
+            .parse::<usize>()
+            .map_err(|_| format!("Invalid numeric value in {} '{}'", label, size))?;
 
         let suffix = caps.get(2).map(|m| m.as_str()).unwrap_or("b");
-        let unit = &BYTE_SUFFIXES
+        let unit = BYTE_SUFFIXES
             .get(suffix)
-            .ok_or_else(|| format!("Invalid memory pool size '{}'", size))?;
-        let memory_pool_size = usize::try_from(unit.multiplier())
+            .ok_or_else(|| format!("Invalid {} '{}'", label, size))?;
+        let total_bytes = usize::try_from(unit.multiplier())
             .ok()
             .and_then(|multiplier| num.checked_mul(multiplier))
-            .ok_or_else(|| format!("Memory pool size '{}' is too large", size))?;
+            .ok_or_else(|| format!("{} '{}' is too large", label, size))?;
 
-        Ok(memory_pool_size)
+        Ok(total_bytes)
     } else {
-        Err(format!("Invalid memory pool size '{}'", size))
+        Err(format!("Invalid {} '{}'", label, size))
     }
 }
 
+pub fn extract_memory_pool_size(size: &str) -> Result<usize, String> {
+    parse_size_string(size, "memory pool size")
+}
+
+pub fn extract_disk_limit(size: &str) -> Result<usize, String> {
+    parse_size_string(size, "disk limit")
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index a54a920e97bbf..9ac09955512b8 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -59,6 +59,16 @@ fn init() {
     "batch_size",
     ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"],
 )]
+#[case::default_explain_plan(
+    "default_explain_plan",
+    // default explain format should be tree
+    ["--command", "EXPLAIN SELECT 123"],
+)]
+#[case::can_see_indent_format(
+    "can_see_indent_format",
+    // can choose the old explain format too
+    ["--command", "EXPLAIN FORMAT indent SELECT 123"],
+)]
 #[test]
 fn cli_quick_test<'a>(
     #[case] snapshot_name: &'a str,
@@ -74,6 +84,21 @@ fn cli_quick_test<'a>(
     assert_cmd_snapshot!(cmd);
 }
 
+#[test]
+fn cli_explain_environment_overrides() {
+    let mut settings = make_settings();
+    settings.set_snapshot_suffix("explain_plan_environment_overrides");
+    let _bound = settings.bind_to_scope();
+
+    let mut cmd = cli();
+
+    // should use the environment variable to override the default explain plan
+    cmd.env("DATAFUSION_EXPLAIN_FORMAT", "pgjson")
+        .args(["--command", "EXPLAIN SELECT 123"]);
+
+    assert_cmd_snapshot!(cmd);
+}
+
 #[rstest]
 #[case("csv")]
 #[case("tsv")]
diff --git a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
new file mode 100644
index 0000000000000..6b3a247dd7b82
--- /dev/null
+++ b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap
@@ -0,0 +1,44 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--command"
+    - EXPLAIN SELECT 123
+  env:
+    DATAFUSION_EXPLAIN_FORMAT: pgjson
+snapshot_kind: text
+---
+success: true
+exit_code: 0
+----- stdout -----
+[CLI_VERSION]
++--------------+-----------------------------------------+
+| plan_type    | plan                                    |
++--------------+-----------------------------------------+
+| logical_plan | [                                       |
+|              |   {                                     |
+|              |     "Plan": {                           |
+|              |       "Expressions": [                  |
+|              |         "Int64(123)"                    |
+|              |       ],                                |
+|              |       "Node Type": "Projection",        |
+|              |       "Output": [                       |
+|              |         "Int64(123)"                    |
+|              |       ],                                |
+|              |       "Plans": [                        |
+|              |         {                               |
+|              |           "Node Type": "EmptyRelation", |
+|              |           "Output": [],                 |
+|              |           "Plans": []                   |
+|              |         }                               |
+|              |       ]                                 |
+|              |     }                                   |
+|              |   }                                     |
+|              | ]                                       |
++--------------+-----------------------------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+
+----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@can_see_indent_format.snap b/datafusion-cli/tests/snapshots/cli_quick_test@can_see_indent_format.snap
new file mode 100644
index 0000000000000..b2fb64709974e
--- /dev/null
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@can_see_indent_format.snap
@@ -0,0 +1,27 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--command"
+    - EXPLAIN FORMAT indent SELECT 123
+snapshot_kind: text
+---
+success: true
+exit_code: 0
+----- stdout -----
+[CLI_VERSION]
++---------------+------------------------------------------+
+| plan_type     | plan                                     |
++---------------+------------------------------------------+
+| logical_plan  | Projection: Int64(123)                   |
+|               |   EmptyRelation                          |
+| physical_plan | ProjectionExec: expr=[123 as Int64(123)] |
+|               |   PlaceholderRowExec                     |
+|               |                                          |
++---------------+------------------------------------------+
+2 row(s) fetched. 
+[ELAPSED]
+
+
+----- stderr -----
diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap b/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap
new file mode 100644
index 0000000000000..46ee6be64f624
--- /dev/null
+++ b/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap
@@ -0,0 +1,31 @@
+---
+source: datafusion-cli/tests/cli_integration.rs
+info:
+  program: datafusion-cli
+  args:
+    - "--command"
+    - EXPLAIN SELECT 123
+snapshot_kind: text
+---
+success: true
+exit_code: 0
+----- stdout -----
+[CLI_VERSION]
++---------------+-------------------------------+
+| plan_type     | plan                          |
++---------------+-------------------------------+
+| physical_plan | ┌───────────────────────────┐ |
+|               | │       ProjectionExec      │ |
+|               | │    --------------------   │ |
+|               | │      Int64(123): 123      │ |
+|               | └─────────────┬─────────────┘ |
+|               | ┌─────────────┴─────────────┐ |
+|               | │     PlaceholderRowExec    │ |
+|               | └───────────────────────────┘ |
+|               |                               |
++---------------+-------------------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+
+----- stderr -----
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index f6b7d641d1264..2ba1673d97b99 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -62,6 +62,7 @@ bytes = { workspace = true }
 dashmap = { workspace = true }
 # note only use main datafusion crate for examples
 datafusion = { workspace = true, default-features = true }
+datafusion-ffi = { workspace = true }
 datafusion-proto = { workspace = true }
 env_logger = { workspace = true }
 futures = { workspace = true }
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index b8c303e221618..03ef3d66f9d71 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -571,7 +571,9 @@ impl ParquetFileReaderFactory for CachedParquetFileReaderFactory {
             .to_string();
 
         let object_store = Arc::clone(&self.object_store);
-        let mut inner = ParquetObjectReader::new(object_store, file_meta.object_meta);
+        let mut inner =
+            ParquetObjectReader::new(object_store, file_meta.object_meta.location)
+                .with_file_size(file_meta.object_meta.size);
 
         if let Some(hint) = metadata_size_hint {
             inner = inner.with_footer_size_hint(hint)
@@ -599,7 +601,7 @@ struct ParquetReaderWithCache {
 impl AsyncFileReader for ParquetReaderWithCache {
     fn get_bytes(
         &mut self,
-        range: Range<usize>,
+        range: Range<u64>,
     ) -> BoxFuture<'_, datafusion::parquet::errors::Result<Bytes>> {
         println!("get_bytes: {} Reading range {:?}", self.filename, range);
         self.inner.get_bytes(range)
@@ -607,7 +609,7 @@ impl AsyncFileReader for ParquetReaderWithCache {
 
     fn get_byte_ranges(
         &mut self,
-        ranges: Vec<Range<usize>>,
+        ranges: Vec<Range<u64>>,
     ) -> BoxFuture<'_, datafusion::parquet::errors::Result<Vec<Bytes>>> {
         println!(
             "get_byte_ranges: {} Reading ranges {:?}",
@@ -618,6 +620,7 @@ impl AsyncFileReader for ParquetReaderWithCache {
 
     fn get_metadata(
         &mut self,
+        _options: Option<&ArrowReaderOptions>,
     ) -> BoxFuture<'_, datafusion::parquet::errors::Result<Arc<ParquetMetaData>>> {
         println!("get_metadata: {} returning cached metadata", self.filename);
 
diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs
index 0b6bccc27b1d1..7d6ce4d86af1a 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/parquet_index.rs
@@ -685,7 +685,7 @@ fn make_demo_file(path: impl AsRef<Path>, value_range: Range<i32>) -> Result<()>
 
     let num_values = value_range.len();
     let file_names =
-        StringArray::from_iter_values(std::iter::repeat(&filename).take(num_values));
+        StringArray::from_iter_values(std::iter::repeat_n(&filename, num_values));
     let values = Int32Array::from_iter_values(value_range);
     let batch = RecordBatch::try_from_iter(vec![
         ("file_name", Arc::new(file_names) as ArrayRef),
diff --git a/datafusion-examples/examples/sql_dialect.rs b/datafusion-examples/examples/sql_dialect.rs
index 12141847ca361..840faa63b1a48 100644
--- a/datafusion-examples/examples/sql_dialect.rs
+++ b/datafusion-examples/examples/sql_dialect.rs
@@ -17,10 +17,10 @@
 
 use std::fmt::Display;
 
-use datafusion::error::Result;
+use datafusion::error::{DataFusionError, Result};
 use datafusion::sql::{
     parser::{CopyToSource, CopyToStatement, DFParser, DFParserBuilder, Statement},
-    sqlparser::{keywords::Keyword, parser::ParserError, tokenizer::Token},
+    sqlparser::{keywords::Keyword, tokenizer::Token},
 };
 
 /// This example demonstrates how to use the DFParser to parse a statement in a custom way
@@ -62,7 +62,7 @@ impl<'a> MyParser<'a> {
 
     /// This is the entry point to our parser -- it handles `COPY` statements specially
     /// but otherwise delegates to the existing DataFusion parser.
-    pub fn parse_statement(&mut self) -> Result<MyStatement, ParserError> {
+    pub fn parse_statement(&mut self) -> Result<MyStatement, DataFusionError> {
         if self.is_copy() {
             self.df_parser.parser.next_token(); // COPY
             let df_statement = self.df_parser.parse_copy()?;
diff --git a/datafusion-testing b/datafusion-testing
index 243047b9dd682..e9f9e22ccf091 160000
--- a/datafusion-testing
+++ b/datafusion-testing
@@ -1 +1 @@
-Subproject commit 243047b9dd682be688628539c604daaddfe640f9
+Subproject commit e9f9e22ccf09145a7368f80fd6a871f11e2b4481
diff --git a/datafusion/catalog/src/lib.rs b/datafusion/catalog/src/lib.rs
index f160bddd2b9c1..0394b05277dac 100644
--- a/datafusion/catalog/src/lib.rs
+++ b/datafusion/catalog/src/lib.rs
@@ -50,7 +50,7 @@ pub use catalog::*;
 pub use datafusion_session::Session;
 pub use dynamic_file::catalog::*;
 pub use memory::{
-    MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
+    MemTable, MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
 };
 pub use r#async::*;
 pub use schema::*;
diff --git a/datafusion/catalog/src/memory/mod.rs b/datafusion/catalog/src/memory/mod.rs
index 4c5cf1a9ae9de..541d25b3345b4 100644
--- a/datafusion/catalog/src/memory/mod.rs
+++ b/datafusion/catalog/src/memory/mod.rs
@@ -17,6 +17,12 @@
 
 pub(crate) mod catalog;
 pub(crate) mod schema;
+pub(crate) mod table;
 
 pub use catalog::*;
 pub use schema::*;
+pub use table::*;
+
+// backward compatibility
+pub use datafusion_datasource::memory::MemorySourceConfig;
+pub use datafusion_datasource::source::DataSourceExec;
diff --git a/datafusion/catalog/src/memory/table.rs b/datafusion/catalog/src/memory/table.rs
new file mode 100644
index 0000000000000..81243e2c4889e
--- /dev/null
+++ b/datafusion/catalog/src/memory/table.rs
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`MemTable`] for querying `Vec<RecordBatch>` by DataFusion.
+
+use std::any::Any;
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::sync::Arc;
+
+use crate::TableProvider;
+use datafusion_common::error::Result;
+use datafusion_expr::Expr;
+use datafusion_expr::TableType;
+use datafusion_physical_expr::create_physical_sort_exprs;
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::{
+    common, ExecutionPlan, ExecutionPlanProperties, Partitioning,
+};
+
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
+use datafusion_common_runtime::JoinSet;
+use datafusion_datasource::memory::MemSink;
+use datafusion_datasource::memory::MemorySourceConfig;
+use datafusion_datasource::sink::DataSinkExec;
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_expr::dml::InsertOp;
+use datafusion_expr::SortExpr;
+use datafusion_session::Session;
+
+use async_trait::async_trait;
+use futures::StreamExt;
+use log::debug;
+use parking_lot::Mutex;
+use tokio::sync::RwLock;
+
+// backward compatibility
+pub use datafusion_datasource::memory::PartitionData;
+
+/// In-memory data source for presenting a `Vec<RecordBatch>` as a
+/// data source that can be queried by DataFusion. This allows data to
+/// be pre-loaded into memory and then repeatedly queried without
+/// incurring additional file I/O overhead.
+#[derive(Debug)]
+pub struct MemTable {
+    schema: SchemaRef,
+    // batches used to be pub(crate), but it's needed to be public for the tests
+    pub batches: Vec<PartitionData>,
+    constraints: Constraints,
+    column_defaults: HashMap<String, Expr>,
+    /// Optional pre-known sort order(s). Must be `SortExpr`s.
+    /// inserting data into this table removes the order
+    pub sort_order: Arc<Mutex<Vec<Vec<SortExpr>>>>,
+}
+
+impl MemTable {
+    /// Create a new in-memory table from the provided schema and record batches
+    pub fn try_new(schema: SchemaRef, partitions: Vec<Vec<RecordBatch>>) -> Result<Self> {
+        for batches in partitions.iter().flatten() {
+            let batches_schema = batches.schema();
+            if !schema.contains(&batches_schema) {
+                debug!(
+                    "mem table schema does not contain batches schema. \
+                        Target_schema: {schema:?}. Batches Schema: {batches_schema:?}"
+                );
+                return plan_err!("Mismatch between schema and batches");
+            }
+        }
+
+        Ok(Self {
+            schema,
+            batches: partitions
+                .into_iter()
+                .map(|e| Arc::new(RwLock::new(e)))
+                .collect::<Vec<_>>(),
+            constraints: Constraints::empty(),
+            column_defaults: HashMap::new(),
+            sort_order: Arc::new(Mutex::new(vec![])),
+        })
+    }
+
+    /// Assign constraints
+    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
+        self.constraints = constraints;
+        self
+    }
+
+    /// Assign column defaults
+    pub fn with_column_defaults(
+        mut self,
+        column_defaults: HashMap<String, Expr>,
+    ) -> Self {
+        self.column_defaults = column_defaults;
+        self
+    }
+
+    /// Specify an optional pre-known sort order(s). Must be `SortExpr`s.
+    ///
+    /// If the data is not sorted by this order, DataFusion may produce
+    /// incorrect results.
+    ///
+    /// DataFusion may take advantage of this ordering to omit sorts
+    /// or use more efficient algorithms.
+    ///
+    /// Note that multiple sort orders are supported, if some are known to be
+    /// equivalent,
+    pub fn with_sort_order(self, mut sort_order: Vec<Vec<SortExpr>>) -> Self {
+        std::mem::swap(self.sort_order.lock().as_mut(), &mut sort_order);
+        self
+    }
+
+    /// Create a mem table by reading from another data source
+    pub async fn load(
+        t: Arc<dyn TableProvider>,
+        output_partitions: Option<usize>,
+        state: &dyn Session,
+    ) -> Result<Self> {
+        let schema = t.schema();
+        let constraints = t.constraints();
+        let exec = t.scan(state, None, &[], None).await?;
+        let partition_count = exec.output_partitioning().partition_count();
+
+        let mut join_set = JoinSet::new();
+
+        for part_idx in 0..partition_count {
+            let task = state.task_ctx();
+            let exec = Arc::clone(&exec);
+            join_set.spawn(async move {
+                let stream = exec.execute(part_idx, task)?;
+                common::collect(stream).await
+            });
+        }
+
+        let mut data: Vec<Vec<RecordBatch>> =
+            Vec::with_capacity(exec.output_partitioning().partition_count());
+
+        while let Some(result) = join_set.join_next().await {
+            match result {
+                Ok(res) => data.push(res?),
+                Err(e) => {
+                    if e.is_panic() {
+                        std::panic::resume_unwind(e.into_panic());
+                    } else {
+                        unreachable!();
+                    }
+                }
+            }
+        }
+
+        let mut exec = DataSourceExec::new(Arc::new(MemorySourceConfig::try_new(
+            &data,
+            Arc::clone(&schema),
+            None,
+        )?));
+        if let Some(cons) = constraints {
+            exec = exec.with_constraints(cons.clone());
+        }
+
+        if let Some(num_partitions) = output_partitions {
+            let exec = RepartitionExec::try_new(
+                Arc::new(exec),
+                Partitioning::RoundRobinBatch(num_partitions),
+            )?;
+
+            // execute and collect results
+            let mut output_partitions = vec![];
+            for i in 0..exec.properties().output_partitioning().partition_count() {
+                // execute this *output* partition and collect all batches
+                let task_ctx = state.task_ctx();
+                let mut stream = exec.execute(i, task_ctx)?;
+                let mut batches = vec![];
+                while let Some(result) = stream.next().await {
+                    batches.push(result?);
+                }
+                output_partitions.push(batches);
+            }
+
+            return MemTable::try_new(Arc::clone(&schema), output_partitions);
+        }
+        MemTable::try_new(Arc::clone(&schema), data)
+    }
+}
+
+#[async_trait]
+impl TableProvider for MemTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    fn constraints(&self) -> Option<&Constraints> {
+        Some(&self.constraints)
+    }
+
+    fn table_type(&self) -> TableType {
+        TableType::Base
+    }
+
+    async fn scan(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        _filters: &[Expr],
+        _limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let mut partitions = vec![];
+        for arc_inner_vec in self.batches.iter() {
+            let inner_vec = arc_inner_vec.read().await;
+            partitions.push(inner_vec.clone())
+        }
+
+        let mut source =
+            MemorySourceConfig::try_new(&partitions, self.schema(), projection.cloned())?;
+
+        let show_sizes = state.config_options().explain.show_sizes;
+        source = source.with_show_sizes(show_sizes);
+
+        // add sort information if present
+        let sort_order = self.sort_order.lock();
+        if !sort_order.is_empty() {
+            let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?;
+
+            let file_sort_order = sort_order
+                .iter()
+                .map(|sort_exprs| {
+                    create_physical_sort_exprs(
+                        sort_exprs,
+                        &df_schema,
+                        state.execution_props(),
+                    )
+                })
+                .collect::<Result<Vec<_>>>()?;
+            source = source.try_with_sort_information(file_sort_order)?;
+        }
+
+        Ok(DataSourceExec::from_data_source(source))
+    }
+
+    /// Returns an ExecutionPlan that inserts the execution results of a given [`ExecutionPlan`] into this [`MemTable`].
+    ///
+    /// The [`ExecutionPlan`] must have the same schema as this [`MemTable`].
+    ///
+    /// # Arguments
+    ///
+    /// * `state` - The [`SessionState`] containing the context for executing the plan.
+    /// * `input` - The [`ExecutionPlan`] to execute and insert.
+    ///
+    /// # Returns
+    ///
+    /// * A plan that returns the number of rows written.
+    ///
+    /// [`SessionState`]: https://docs.rs/datafusion/latest/datafusion/execution/session_state/struct.SessionState.html
+    async fn insert_into(
+        &self,
+        _state: &dyn Session,
+        input: Arc<dyn ExecutionPlan>,
+        insert_op: InsertOp,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // If we are inserting into the table, any sort order may be messed up so reset it here
+        *self.sort_order.lock() = vec![];
+
+        // Create a physical plan from the logical plan.
+        // Check that the schema of the plan matches the schema of this table.
+        self.schema()
+            .logically_equivalent_names_and_types(&input.schema())?;
+
+        if insert_op != InsertOp::Append {
+            return not_impl_err!("{insert_op} not implemented for MemoryTable yet");
+        }
+        let sink = MemSink::try_new(self.batches.clone(), Arc::clone(&self.schema))?;
+        Ok(Arc::new(DataSinkExec::new(input, Arc::new(sink), None)))
+    }
+
+    fn get_column_default(&self, column: &str) -> Option<&Expr> {
+        self.column_defaults.get(column)
+    }
+}
diff --git a/datafusion/common-runtime/src/common.rs b/datafusion/common-runtime/src/common.rs
index 361f6af95cf13..e7aba1d455ee6 100644
--- a/datafusion/common-runtime/src/common.rs
+++ b/datafusion/common-runtime/src/common.rs
@@ -15,18 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::future::Future;
+use std::{
+    future::Future,
+    pin::Pin,
+    task::{Context, Poll},
+};
 
-use crate::JoinSet;
-use tokio::task::JoinError;
+use tokio::task::{JoinError, JoinHandle};
+
+use crate::trace_utils::{trace_block, trace_future};
 
 /// Helper that  provides a simple API to spawn a single task and join it.
 /// Provides guarantees of aborting on `Drop` to keep it cancel-safe.
+/// Note that if the task was spawned with `spawn_blocking`, it will only be
+/// aborted if it hasn't started yet.
 ///
-/// Technically, it's just a wrapper of `JoinSet` (with size=1).
+/// Technically, it's just a wrapper of a `JoinHandle` overriding drop.
 #[derive(Debug)]
 pub struct SpawnedTask<R> {
-    inner: JoinSet<R>,
+    inner: JoinHandle<R>,
 }
 
 impl<R: 'static> SpawnedTask<R> {
@@ -36,8 +43,9 @@ impl<R: 'static> SpawnedTask<R> {
         T: Send + 'static,
         R: Send,
     {
-        let mut inner = JoinSet::new();
-        inner.spawn(task);
+        // Ok to use spawn here as SpawnedTask handles aborting/cancelling the task on Drop
+        #[allow(clippy::disallowed_methods)]
+        let inner = tokio::task::spawn(trace_future(task));
         Self { inner }
     }
 
@@ -47,22 +55,21 @@ impl<R: 'static> SpawnedTask<R> {
         T: Send + 'static,
         R: Send,
     {
-        let mut inner = JoinSet::new();
-        inner.spawn_blocking(task);
+        // Ok to use spawn_blocking here as SpawnedTask handles aborting/cancelling the task on Drop
+        #[allow(clippy::disallowed_methods)]
+        let inner = tokio::task::spawn_blocking(trace_block(task));
         Self { inner }
     }
 
     /// Joins the task, returning the result of join (`Result<R, JoinError>`).
-    pub async fn join(mut self) -> Result<R, JoinError> {
-        self.inner
-            .join_next()
-            .await
-            .expect("`SpawnedTask` instance always contains exactly 1 task")
+    /// Same as awaiting the spawned task, but left for backwards compatibility.
+    pub async fn join(self) -> Result<R, JoinError> {
+        self.await
     }
 
     /// Joins the task and unwinds the panic if it happens.
     pub async fn join_unwind(self) -> Result<R, JoinError> {
-        self.join().await.map_err(|e| {
+        self.await.map_err(|e| {
             // `JoinError` can be caused either by panic or cancellation. We have to handle panics:
             if e.is_panic() {
                 std::panic::resume_unwind(e.into_panic());
@@ -77,17 +84,32 @@ impl<R: 'static> SpawnedTask<R> {
     }
 }
 
+impl<R> Future for SpawnedTask<R> {
+    type Output = Result<R, JoinError>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        Pin::new(&mut self.inner).poll(cx)
+    }
+}
+
+impl<R> Drop for SpawnedTask<R> {
+    fn drop(&mut self) {
+        self.inner.abort();
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
     use std::future::{pending, Pending};
 
-    use tokio::runtime::Runtime;
+    use tokio::{runtime::Runtime, sync::oneshot};
 
     #[tokio::test]
     async fn runtime_shutdown() {
         let rt = Runtime::new().unwrap();
+        #[allow(clippy::async_yields_async)]
         let task = rt
             .spawn(async {
                 SpawnedTask::spawn(async {
@@ -119,4 +141,36 @@ mod tests {
             .await
             .ok();
     }
+
+    #[tokio::test]
+    async fn cancel_not_started_task() {
+        let (sender, receiver) = oneshot::channel::<i32>();
+        let task = SpawnedTask::spawn(async {
+            // Shouldn't be reached.
+            sender.send(42).unwrap();
+        });
+
+        drop(task);
+
+        // If the task was cancelled, the sender was also dropped,
+        // and awaiting the receiver should result in an error.
+        assert!(receiver.await.is_err());
+    }
+
+    #[tokio::test]
+    async fn cancel_ongoing_task() {
+        let (sender, mut receiver) = tokio::sync::mpsc::channel(1);
+        let task = SpawnedTask::spawn(async move {
+            sender.send(1).await.unwrap();
+            // This line will never be reached because the channel has a buffer
+            // of 1.
+            sender.send(2).await.unwrap();
+        });
+        // Let the task start.
+        assert_eq!(receiver.recv().await.unwrap(), 1);
+        drop(task);
+
+        // The sender was dropped so we receive `None`.
+        assert!(receiver.recv().await.is_none());
+    }
 }
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index 39b47a96bccf3..d471e48be4e75 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -58,12 +58,12 @@ base64 = "0.22.1"
 half = { workspace = true }
 hashbrown = { workspace = true }
 indexmap = { workspace = true }
-libc = "0.2.171"
+libc = "0.2.172"
 log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
-pyo3 = { version = "0.23.5", optional = true }
+pyo3 = { version = "0.24.2", optional = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true }
 tokio = { workspace = true }
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b0f17630c910c..1e0f63d6d81ca 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -149,9 +149,17 @@ macro_rules! config_namespace {
                             // $(#[allow(deprecated)])?
                             {
                                 $(let value = $transform(value);)? // Apply transformation if specified
-                                $(log::warn!($warn);)? // Log warning if specified
                                 #[allow(deprecated)]
-                                self.$field_name.set(rem, value.as_ref())
+                                let ret = self.$field_name.set(rem, value.as_ref());
+
+                                $(if !$warn.is_empty() {
+                                    let default: $field_type = $default;
+                                    #[allow(deprecated)]
+                                    if default != self.$field_name {
+                                        log::warn!($warn);
+                                    }
+                                })? // Log warning if specified, and the value is not the default
+                                ret
                             }
                         },
                     )*
@@ -292,7 +300,7 @@ config_namespace! {
         /// concurrency.
         ///
         /// Defaults to the number of CPU cores on the system
-        pub target_partitions: usize, default = get_available_parallelism()
+        pub target_partitions: usize, transform = ExecutionOptions::normalized_parallelism, default = get_available_parallelism()
 
         /// The default time zone
         ///
@@ -308,7 +316,7 @@ config_namespace! {
         /// This is mostly use to plan `UNION` children in parallel.
         ///
         /// Defaults to the number of CPU cores on the system
-        pub planning_concurrency: usize, default = get_available_parallelism()
+        pub planning_concurrency: usize, transform = ExecutionOptions::normalized_parallelism, default = get_available_parallelism()
 
         /// When set to true, skips verifying that the schema produced by
         /// planning the input of `LogicalPlan::Aggregate` exactly matches the
@@ -451,6 +459,14 @@ config_namespace! {
         /// BLOB instead.
         pub binary_as_string: bool, default = false
 
+        /// (reading) If true, parquet reader will read columns of
+        /// physical type int96 as originating from a different resolution
+        /// than nanosecond. This is useful for reading data from systems like Spark
+        /// which stores microsecond resolution timestamps in an int96 allowing it
+        /// to write values with a larger date range than 64-bit timestamps with
+        /// nanosecond resolution.
+        pub coerce_int96: Option<String>, transform = str::to_lowercase, default = None
+
         // The following options affect writing to parquet files
         // and map to parquet::file::properties::WriterProperties
 
@@ -723,6 +739,19 @@ config_namespace! {
     }
 }
 
+impl ExecutionOptions {
+    /// Returns the correct parallelism based on the provided `value`.
+    /// If `value` is `"0"`, returns the default available parallelism, computed with
+    /// `get_available_parallelism`. Otherwise, returns `value`.
+    fn normalized_parallelism(value: &str) -> String {
+        if value.parse::<usize>() == Ok(0) {
+            get_available_parallelism().to_string()
+        } else {
+            value.to_owned()
+        }
+    }
+}
+
 /// A key value pair, with a corresponding description
 #[derive(Debug)]
 pub struct ConfigEntry {
@@ -1999,8 +2028,8 @@ mod tests {
     use std::collections::HashMap;
 
     use crate::config::{
-        ConfigEntry, ConfigExtension, ConfigFileType, ExtensionOptions, Extensions,
-        TableOptions,
+        ConfigEntry, ConfigExtension, ConfigField, ConfigFileType, ExtensionOptions,
+        Extensions, TableOptions,
     };
 
     #[derive(Default, Debug, Clone)]
@@ -2085,6 +2114,37 @@ mod tests {
         assert_eq!(table_config.csv.escape.unwrap() as char, '\'');
     }
 
+    #[test]
+    fn warning_only_not_default() {
+        use std::sync::atomic::AtomicUsize;
+        static COUNT: AtomicUsize = AtomicUsize::new(0);
+        use log::{Level, LevelFilter, Metadata, Record};
+        struct SimpleLogger;
+        impl log::Log for SimpleLogger {
+            fn enabled(&self, metadata: &Metadata) -> bool {
+                metadata.level() <= Level::Info
+            }
+
+            fn log(&self, record: &Record) {
+                if self.enabled(record.metadata()) {
+                    COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+                }
+            }
+            fn flush(&self) {}
+        }
+        log::set_logger(&SimpleLogger).unwrap();
+        log::set_max_level(LevelFilter::Info);
+        let mut sql_parser_options = crate::config::SqlParserOptions::default();
+        sql_parser_options
+            .set("enable_options_value_normalization", "false")
+            .unwrap();
+        assert_eq!(COUNT.load(std::sync::atomic::Ordering::Relaxed), 0);
+        sql_parser_options
+            .set("enable_options_value_normalization", "true")
+            .unwrap();
+        assert_eq!(COUNT.load(std::sync::atomic::Ordering::Relaxed), 1);
+    }
+
     #[cfg(feature = "parquet")]
     #[test]
     fn parquet_table_options() {
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 43d082f9dc936..66a26a18c0dc8 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -641,7 +641,7 @@ impl DFSchema {
                         || (!DFSchema::datatype_is_semantically_equal(
                             f1.data_type(),
                             f2.data_type(),
-                        ) && !can_cast_types(f2.data_type(), f1.data_type()))
+                        ))
                     {
                         _plan_err!(
                             "Schema mismatch: Expected field '{}' with type {:?}, \
@@ -659,9 +659,12 @@ impl DFSchema {
     }
 
     /// Checks if two [`DataType`]s are logically equal. This is a notably weaker constraint
-    /// than datatype_is_semantically_equal in that a Dictionary<K,V> type is logically
-    /// equal to a plain V type, but not semantically equal. Dictionary<K1, V1> is also
-    /// logically equal to Dictionary<K2, V1>.
+    /// than datatype_is_semantically_equal in that different representations of same data can be
+    /// logically but not semantically equivalent. Semantically equivalent types are always also
+    /// logically equivalent. For example:
+    /// - a Dictionary<K,V> type is logically equal to a plain V type
+    /// - a Dictionary<K1, V1> is also logically equal to Dictionary<K2, V1>
+    /// - Utf8 and Utf8View are logically equal
     pub fn datatype_is_logically_equal(dt1: &DataType, dt2: &DataType) -> bool {
         // check nested fields
         match (dt1, dt2) {
@@ -711,12 +714,15 @@ impl DFSchema {
                         .zip(iter2)
                         .all(|((t1, f1), (t2, f2))| t1 == t2 && Self::field_is_logically_equal(f1, f2))
             }
-            _ => dt1 == dt2,
+            // Utf8 and Utf8View are logically equivalent
+            (DataType::Utf8, DataType::Utf8View) => true,
+            (DataType::Utf8View, DataType::Utf8) => true,
+            _ => Self::datatype_is_semantically_equal(dt1, dt2),
         }
     }
 
     /// Returns true of two [`DataType`]s are semantically equal (same
-    /// name and type), ignoring both metadata and nullability.
+    /// name and type), ignoring both metadata and nullability, and decimal precision/scale.
     ///
     /// request to upstream: <https://github.com/apache/arrow-rs/issues/3199>
     pub fn datatype_is_semantically_equal(dt1: &DataType, dt2: &DataType) -> bool {
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 939cb5e1a3578..3e33466edf505 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -239,6 +239,7 @@ impl ParquetOptions {
             bloom_filter_on_read: _, // reads not used for writer props
             schema_force_view_types: _,
             binary_as_string: _, // not used for writer props
+            coerce_int96: _,     // not used for writer props
             skip_arrow_metadata: _,
         } = self;
 
@@ -516,6 +517,7 @@ mod tests {
             schema_force_view_types: defaults.schema_force_view_types,
             binary_as_string: defaults.binary_as_string,
             skip_arrow_metadata: defaults.skip_arrow_metadata,
+            coerce_int96: None,
         }
     }
 
@@ -622,6 +624,7 @@ mod tests {
                 schema_force_view_types: global_options_defaults.schema_force_view_types,
                 binary_as_string: global_options_defaults.binary_as_string,
                 skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,
+                coerce_int96: None,
             },
             column_specific_options,
             key_value_metadata,
diff --git a/datafusion/common/src/functional_dependencies.rs b/datafusion/common/src/functional_dependencies.rs
index 5f262d634af37..c4f2805f82856 100644
--- a/datafusion/common/src/functional_dependencies.rs
+++ b/datafusion/common/src/functional_dependencies.rs
@@ -47,11 +47,13 @@ impl Constraints {
         Constraints::new_unverified(vec![])
     }
 
-    /// Create a new `Constraints` object from the given `constraints`.
-    /// Users should use the `empty` or `new_from_table_constraints` functions
-    /// for constructing `Constraints`. This constructor is for internal
+    /// Create a new [`Constraints`] object from the given `constraints`.
+    /// Users should use the [`Constraints::empty`] or [`SqlToRel::new_constraint_from_table_constraints`] functions
+    /// for constructing [`Constraints`]. This constructor is for internal
     /// purposes only and does not check whether the argument is valid. The user
-    /// is responsible for supplying a valid vector of `Constraint` objects.
+    /// is responsible for supplying a valid vector of [`Constraint`] objects.
+    ///
+    /// [`SqlToRel::new_constraint_from_table_constraints`]: https://docs.rs/datafusion/latest/datafusion/sql/planner/struct.SqlToRel.html#method.new_constraint_from_table_constraints
     pub fn new_unverified(constraints: Vec<Constraint>) -> Self {
         Self { inner: constraints }
     }
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 2b758f4568760..b8d9aea810f03 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -27,7 +27,7 @@ use std::convert::Infallible;
 use std::fmt;
 use std::hash::Hash;
 use std::hash::Hasher;
-use std::iter::repeat;
+use std::iter::repeat_n;
 use std::mem::{size_of, size_of_val};
 use std::str::FromStr;
 use std::sync::Arc;
@@ -802,12 +802,14 @@ fn dict_from_scalar<K: ArrowDictionaryKeyType>(
     let values_array = value.to_array_of_size(1)?;
 
     // Create a key array with `size` elements, each of 0
-    let key_array: PrimitiveArray<K> = repeat(if value.is_null() {
-        None
-    } else {
-        Some(K::default_value())
-    })
-    .take(size)
+    let key_array: PrimitiveArray<K> = repeat_n(
+        if value.is_null() {
+            None
+        } else {
+            Some(K::default_value())
+        },
+        size,
+    )
     .collect();
 
     // create a new DictionaryArray
@@ -2189,8 +2191,7 @@ impl ScalarValue {
         scale: i8,
         size: usize,
     ) -> Result<Decimal256Array> {
-        Ok(repeat(value)
-            .take(size)
+        Ok(repeat_n(value, size)
             .collect::<Decimal256Array>()
             .with_precision_and_scale(precision, scale)?)
     }
@@ -2416,53 +2417,47 @@ impl ScalarValue {
             }
             ScalarValue::Utf8(e) => match e {
                 Some(value) => {
-                    Arc::new(StringArray::from_iter_values(repeat(value).take(size)))
+                    Arc::new(StringArray::from_iter_values(repeat_n(value, size)))
                 }
                 None => new_null_array(&DataType::Utf8, size),
             },
             ScalarValue::Utf8View(e) => match e {
                 Some(value) => {
-                    Arc::new(StringViewArray::from_iter_values(repeat(value).take(size)))
+                    Arc::new(StringViewArray::from_iter_values(repeat_n(value, size)))
                 }
                 None => new_null_array(&DataType::Utf8View, size),
             },
             ScalarValue::LargeUtf8(e) => match e {
                 Some(value) => {
-                    Arc::new(LargeStringArray::from_iter_values(repeat(value).take(size)))
+                    Arc::new(LargeStringArray::from_iter_values(repeat_n(value, size)))
                 }
                 None => new_null_array(&DataType::LargeUtf8, size),
             },
             ScalarValue::Binary(e) => match e {
                 Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<BinaryArray>(),
+                    repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
                 ),
-                None => {
-                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryArray>())
-                }
+                None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
             },
             ScalarValue::BinaryView(e) => match e {
                 Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<BinaryViewArray>(),
+                    repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
                 ),
                 None => {
-                    Arc::new(repeat(None::<&str>).take(size).collect::<BinaryViewArray>())
+                    Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
                 }
             },
             ScalarValue::FixedSizeBinary(s, e) => match e {
                 Some(value) => Arc::new(
                     FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-                        repeat(Some(value.as_slice())).take(size),
+                        repeat_n(Some(value.as_slice()), size),
                         *s,
                     )
                     .unwrap(),
                 ),
                 None => Arc::new(
                     FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-                        repeat(None::<&[u8]>).take(size),
+                        repeat_n(None::<&[u8]>, size),
                         *s,
                     )
                     .unwrap(),
@@ -2470,15 +2465,11 @@ impl ScalarValue {
             },
             ScalarValue::LargeBinary(e) => match e {
                 Some(value) => Arc::new(
-                    repeat(Some(value.as_slice()))
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
-                ),
-                None => Arc::new(
-                    repeat(None::<&str>)
-                        .take(size)
-                        .collect::<LargeBinaryArray>(),
+                    repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
                 ),
+                None => {
+                    Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
+                }
             },
             ScalarValue::List(arr) => {
                 Self::list_to_array_of_size(arr.as_ref() as &dyn Array, size)?
@@ -2606,7 +2597,7 @@ impl ScalarValue {
                         child_arrays.push(ar);
                         new_fields.push(field.clone());
                     }
-                    let type_ids = repeat(*v_id).take(size);
+                    let type_ids = repeat_n(*v_id, size);
                     let type_ids = ScalarBuffer::<i8>::from_iter(type_ids);
                     let value_offsets = match mode {
                         UnionMode::Sparse => None,
@@ -2674,7 +2665,7 @@ impl ScalarValue {
     }
 
     fn list_to_array_of_size(arr: &dyn Array, size: usize) -> Result<ArrayRef> {
-        let arrays = repeat(arr).take(size).collect::<Vec<_>>();
+        let arrays = repeat_n(arr, size).collect::<Vec<_>>();
         let ret = match !arrays.is_empty() {
             true => arrow::compute::concat(arrays.as_slice())?,
             false => arr.slice(0, 0),
@@ -3036,6 +3027,34 @@ impl ScalarValue {
                 DataType::Timestamp(TimeUnit::Nanosecond, None),
             ) => ScalarValue::Int64(Some((float_ts * 1_000_000_000_f64).trunc() as i64))
                 .to_array()?,
+            (
+                ScalarValue::Decimal128(Some(decimal_value), _, scale),
+                DataType::Timestamp(time_unit, None),
+            ) => {
+                let scale_factor = 10_i128.pow(*scale as u32);
+                let seconds = decimal_value / scale_factor;
+                let fraction = decimal_value % scale_factor;
+
+                let timestamp_value = match time_unit {
+                    TimeUnit::Second => ScalarValue::Int64(Some(seconds as i64)),
+                    TimeUnit::Millisecond => {
+                        let millis = seconds * 1_000 + (fraction * 1_000) / scale_factor;
+                        ScalarValue::Int64(Some(millis as i64))
+                    }
+                    TimeUnit::Microsecond => {
+                        let micros =
+                            seconds * 1_000_000 + (fraction * 1_000_000) / scale_factor;
+                        ScalarValue::Int64(Some(micros as i64))
+                    }
+                    TimeUnit::Nanosecond => {
+                        let nanos = seconds * 1_000_000_000
+                            + (fraction * 1_000_000_000) / scale_factor;
+                        ScalarValue::Int64(Some(nanos as i64))
+                    }
+                };
+
+                timestamp_value.to_array()?
+            }
             _ => self.to_array()?,
         };
 
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 5b841db53c5ee..807d885b3a4de 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -21,6 +21,7 @@ use std::fmt::{self, Debug, Display};
 
 use crate::{Result, ScalarValue};
 
+use crate::error::_plan_err;
 use arrow::datatypes::{DataType, Schema, SchemaRef};
 
 /// Represents a value with a degree of certainty. `Precision` is used to
@@ -271,11 +272,25 @@ pub struct Statistics {
     pub num_rows: Precision<usize>,
     /// Total bytes of the table rows.
     pub total_byte_size: Precision<usize>,
-    /// Statistics on a column level. It contains a [`ColumnStatistics`] for
-    /// each field in the schema of the table to which the [`Statistics`] refer.
+    /// Statistics on a column level.
+    ///
+    /// It must contains a [`ColumnStatistics`] for each field in the schema of
+    /// the table to which the [`Statistics`] refer.
     pub column_statistics: Vec<ColumnStatistics>,
 }
 
+impl Default for Statistics {
+    /// Returns a new [`Statistics`] instance with all fields set to unknown
+    /// and no columns.
+    fn default() -> Self {
+        Self {
+            num_rows: Precision::Absent,
+            total_byte_size: Precision::Absent,
+            column_statistics: vec![],
+        }
+    }
+}
+
 impl Statistics {
     /// Returns a [`Statistics`] instance for the given schema by assigning
     /// unknown statistics to each column in the schema.
@@ -296,6 +311,24 @@ impl Statistics {
             .collect()
     }
 
+    /// Set the number of rows
+    pub fn with_num_rows(mut self, num_rows: Precision<usize>) -> Self {
+        self.num_rows = num_rows;
+        self
+    }
+
+    /// Set the total size, in bytes
+    pub fn with_total_byte_size(mut self, total_byte_size: Precision<usize>) -> Self {
+        self.total_byte_size = total_byte_size;
+        self
+    }
+
+    /// Add a column to the column statistics
+    pub fn add_column_statistics(mut self, column_stats: ColumnStatistics) -> Self {
+        self.column_statistics.push(column_stats);
+        self
+    }
+
     /// If the exactness of a [`Statistics`] instance is lost, this function relaxes
     /// the exactness of all information by converting them [`Precision::Inexact`].
     pub fn to_inexact(mut self) -> Self {
@@ -351,7 +384,8 @@ impl Statistics {
         self
     }
 
-    /// Calculates the statistics after `fetch` and `skip` operations apply.
+    /// Calculates the statistics after applying `fetch` and `skip` operations.
+    ///
     /// Here, `self` denotes per-partition statistics. Use the `n_partitions`
     /// parameter to compute global statistics in a multi-partition setting.
     pub fn with_fetch(
@@ -414,6 +448,100 @@ impl Statistics {
         self.total_byte_size = Precision::Absent;
         Ok(self)
     }
+
+    /// Summarize zero or more statistics into a single `Statistics` instance.
+    ///
+    /// Returns an error if the statistics do not match the specified schemas.
+    pub fn try_merge_iter<'a, I>(items: I, schema: &Schema) -> Result<Statistics>
+    where
+        I: IntoIterator<Item = &'a Statistics>,
+    {
+        let mut items = items.into_iter();
+
+        let Some(init) = items.next() else {
+            return Ok(Statistics::new_unknown(schema));
+        };
+        items.try_fold(init.clone(), |acc: Statistics, item_stats: &Statistics| {
+            acc.try_merge(item_stats)
+        })
+    }
+
+    /// Merge this Statistics value with another Statistics value.
+    ///
+    /// Returns an error if the statistics do not match (different schemas).
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion_common::{ColumnStatistics, ScalarValue, Statistics};
+    /// # use arrow::datatypes::{Field, Schema, DataType};
+    /// # use datafusion_common::stats::Precision;
+    /// let stats1 = Statistics::default()
+    ///   .with_num_rows(Precision::Exact(1))
+    ///   .with_total_byte_size(Precision::Exact(2))
+    ///   .add_column_statistics(ColumnStatistics::new_unknown()
+    ///      .with_null_count(Precision::Exact(3))
+    ///      .with_min_value(Precision::Exact(ScalarValue::from(4)))
+    ///      .with_max_value(Precision::Exact(ScalarValue::from(5)))
+    ///   );
+    ///
+    /// let stats2 = Statistics::default()
+    ///   .with_num_rows(Precision::Exact(10))
+    ///   .with_total_byte_size(Precision::Inexact(20))
+    ///   .add_column_statistics(ColumnStatistics::new_unknown()
+    ///       // absent null count
+    ///      .with_min_value(Precision::Exact(ScalarValue::from(40)))
+    ///      .with_max_value(Precision::Exact(ScalarValue::from(50)))
+    ///   );
+    ///
+    /// let merged_stats = stats1.try_merge(&stats2).unwrap();
+    /// let expected_stats = Statistics::default()
+    ///   .with_num_rows(Precision::Exact(11))
+    ///   .with_total_byte_size(Precision::Inexact(22)) // inexact in stats2 --> inexact
+    ///   .add_column_statistics(
+    ///     ColumnStatistics::new_unknown()
+    ///       .with_null_count(Precision::Absent) // missing from stats2 --> absent
+    ///       .with_min_value(Precision::Exact(ScalarValue::from(4)))
+    ///       .with_max_value(Precision::Exact(ScalarValue::from(50)))
+    ///   );
+    ///
+    /// assert_eq!(merged_stats, expected_stats)
+    /// ```
+    pub fn try_merge(self, other: &Statistics) -> Result<Self> {
+        let Self {
+            mut num_rows,
+            mut total_byte_size,
+            mut column_statistics,
+        } = self;
+
+        // Accumulate statistics for subsequent items
+        num_rows = num_rows.add(&other.num_rows);
+        total_byte_size = total_byte_size.add(&other.total_byte_size);
+
+        if column_statistics.len() != other.column_statistics.len() {
+            return _plan_err!(
+                "Cannot merge statistics with different number of columns: {} vs {}",
+                column_statistics.len(),
+                other.column_statistics.len()
+            );
+        }
+
+        for (item_col_stats, col_stats) in other
+            .column_statistics
+            .iter()
+            .zip(column_statistics.iter_mut())
+        {
+            col_stats.null_count = col_stats.null_count.add(&item_col_stats.null_count);
+            col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
+            col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
+            col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
+        }
+
+        Ok(Statistics {
+            num_rows,
+            total_byte_size,
+            column_statistics,
+        })
+    }
 }
 
 /// Creates an estimate of the number of rows in the output using the given
@@ -521,6 +649,36 @@ impl ColumnStatistics {
         }
     }
 
+    /// Set the null count
+    pub fn with_null_count(mut self, null_count: Precision<usize>) -> Self {
+        self.null_count = null_count;
+        self
+    }
+
+    /// Set the max value
+    pub fn with_max_value(mut self, max_value: Precision<ScalarValue>) -> Self {
+        self.max_value = max_value;
+        self
+    }
+
+    /// Set the min value
+    pub fn with_min_value(mut self, min_value: Precision<ScalarValue>) -> Self {
+        self.min_value = min_value;
+        self
+    }
+
+    /// Set the sum value
+    pub fn with_sum_value(mut self, sum_value: Precision<ScalarValue>) -> Self {
+        self.sum_value = sum_value;
+        self
+    }
+
+    /// Set the distinct count
+    pub fn with_distinct_count(mut self, distinct_count: Precision<usize>) -> Self {
+        self.distinct_count = distinct_count;
+        self
+    }
+
     /// If the exactness of a [`ColumnStatistics`] instance is lost, this
     /// function relaxes the exactness of all information by converting them
     /// [`Precision::Inexact`].
@@ -537,6 +695,9 @@ impl ColumnStatistics {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::assert_contains;
+    use arrow::datatypes::Field;
+    use std::sync::Arc;
 
     #[test]
     fn test_get_value() {
@@ -798,4 +959,193 @@ mod tests {
             distinct_count: Precision::Exact(100),
         }
     }
+
+    #[test]
+    fn test_try_merge_basic() {
+        // Create a schema with two columns
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("col1", DataType::Int32, false),
+            Field::new("col2", DataType::Int32, false),
+        ]));
+
+        // Create items with statistics
+        let stats1 = Statistics {
+            num_rows: Precision::Exact(10),
+            total_byte_size: Precision::Exact(100),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(1),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(1))),
+                    sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
+                    distinct_count: Precision::Absent,
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(2),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(200))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(10))),
+                    sum_value: Precision::Exact(ScalarValue::Int32(Some(1000))),
+                    distinct_count: Precision::Absent,
+                },
+            ],
+        };
+
+        let stats2 = Statistics {
+            num_rows: Precision::Exact(15),
+            total_byte_size: Precision::Exact(150),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(2),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(120))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
+                    sum_value: Precision::Exact(ScalarValue::Int32(Some(600))),
+                    distinct_count: Precision::Absent,
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(3),
+                    max_value: Precision::Exact(ScalarValue::Int32(Some(180))),
+                    min_value: Precision::Exact(ScalarValue::Int32(Some(5))),
+                    sum_value: Precision::Exact(ScalarValue::Int32(Some(1200))),
+                    distinct_count: Precision::Absent,
+                },
+            ],
+        };
+
+        let items = vec![stats1, stats2];
+
+        let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
+
+        // Verify the results
+        assert_eq!(summary_stats.num_rows, Precision::Exact(25)); // 10 + 15
+        assert_eq!(summary_stats.total_byte_size, Precision::Exact(250)); // 100 + 150
+
+        // Verify column statistics
+        let col1_stats = &summary_stats.column_statistics[0];
+        assert_eq!(col1_stats.null_count, Precision::Exact(3)); // 1 + 2
+        assert_eq!(
+            col1_stats.max_value,
+            Precision::Exact(ScalarValue::Int32(Some(120)))
+        );
+        assert_eq!(
+            col1_stats.min_value,
+            Precision::Exact(ScalarValue::Int32(Some(-10)))
+        );
+        assert_eq!(
+            col1_stats.sum_value,
+            Precision::Exact(ScalarValue::Int32(Some(1100)))
+        ); // 500 + 600
+
+        let col2_stats = &summary_stats.column_statistics[1];
+        assert_eq!(col2_stats.null_count, Precision::Exact(5)); // 2 + 3
+        assert_eq!(
+            col2_stats.max_value,
+            Precision::Exact(ScalarValue::Int32(Some(200)))
+        );
+        assert_eq!(
+            col2_stats.min_value,
+            Precision::Exact(ScalarValue::Int32(Some(5)))
+        );
+        assert_eq!(
+            col2_stats.sum_value,
+            Precision::Exact(ScalarValue::Int32(Some(2200)))
+        ); // 1000 + 1200
+    }
+
+    #[test]
+    fn test_try_merge_mixed_precision() {
+        // Create a schema with one column
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "col1",
+            DataType::Int32,
+            false,
+        )]));
+
+        // Create items with different precision levels
+        let stats1 = Statistics {
+            num_rows: Precision::Exact(10),
+            total_byte_size: Precision::Inexact(100),
+            column_statistics: vec![ColumnStatistics {
+                null_count: Precision::Exact(1),
+                max_value: Precision::Exact(ScalarValue::Int32(Some(100))),
+                min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
+                sum_value: Precision::Exact(ScalarValue::Int32(Some(500))),
+                distinct_count: Precision::Absent,
+            }],
+        };
+
+        let stats2 = Statistics {
+            num_rows: Precision::Inexact(15),
+            total_byte_size: Precision::Exact(150),
+            column_statistics: vec![ColumnStatistics {
+                null_count: Precision::Inexact(2),
+                max_value: Precision::Inexact(ScalarValue::Int32(Some(120))),
+                min_value: Precision::Exact(ScalarValue::Int32(Some(-10))),
+                sum_value: Precision::Absent,
+                distinct_count: Precision::Absent,
+            }],
+        };
+
+        let items = vec![stats1, stats2];
+
+        let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
+
+        assert_eq!(summary_stats.num_rows, Precision::Inexact(25));
+        assert_eq!(summary_stats.total_byte_size, Precision::Inexact(250));
+
+        let col_stats = &summary_stats.column_statistics[0];
+        assert_eq!(col_stats.null_count, Precision::Inexact(3));
+        assert_eq!(
+            col_stats.max_value,
+            Precision::Inexact(ScalarValue::Int32(Some(120)))
+        );
+        assert_eq!(
+            col_stats.min_value,
+            Precision::Inexact(ScalarValue::Int32(Some(-10)))
+        );
+        assert!(matches!(col_stats.sum_value, Precision::Absent));
+    }
+
+    #[test]
+    fn test_try_merge_empty() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "col1",
+            DataType::Int32,
+            false,
+        )]));
+
+        // Empty collection
+        let items: Vec<Statistics> = vec![];
+
+        let summary_stats = Statistics::try_merge_iter(&items, &schema).unwrap();
+
+        // Verify default values for empty collection
+        assert_eq!(summary_stats.num_rows, Precision::Absent);
+        assert_eq!(summary_stats.total_byte_size, Precision::Absent);
+        assert_eq!(summary_stats.column_statistics.len(), 1);
+        assert_eq!(
+            summary_stats.column_statistics[0].null_count,
+            Precision::Absent
+        );
+    }
+
+    #[test]
+    fn test_try_merge_mismatched_size() {
+        // Create a schema with one column
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "col1",
+            DataType::Int32,
+            false,
+        )]));
+
+        // No column statistics
+        let stats1 = Statistics::default();
+
+        let stats2 =
+            Statistics::default().add_column_statistics(ColumnStatistics::new_unknown());
+
+        let items = vec![stats1, stats2];
+
+        let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
+        assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
+    }
 }
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index ab73996fcd8b7..7ac081e0beb84 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -25,7 +25,7 @@ use std::mem::size_of;
 /// # Parameters
 /// - `num_elements`: The number of elements expected in the hash table.
 /// - `fixed_size`: A fixed overhead size associated with the collection
-///    (e.g., HashSet or HashTable).
+///   (e.g., HashSet or HashTable).
 /// - `T`: The type of elements stored in the hash table.
 ///
 /// # Details
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 56698e4d7e255..edc0d34b539ac 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -125,7 +125,7 @@ datafusion-physical-optimizer = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-session = { workspace = true }
 datafusion-sql = { workspace = true }
-flate2 = { version = "1.1.0", optional = true }
+flate2 = { version = "1.1.1", optional = true }
 futures = { workspace = true }
 itertools = { workspace = true }
 log = { workspace = true }
@@ -160,7 +160,7 @@ rand_distr = "0.4.3"
 regex = { workspace = true }
 rstest = { workspace = true }
 serde_json = { workspace = true }
-sysinfo = "0.33.1"
+sysinfo = "0.34.2"
 test-utils = { path = "../../test-utils" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot", "fs"] }
 
diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs
index ebe94450c1f8d..057a0e1d1b54c 100644
--- a/datafusion/core/benches/aggregate_query_sql.rs
+++ b/datafusion/core/benches/aggregate_query_sql.rs
@@ -29,8 +29,7 @@ use parking_lot::Mutex;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-fn query(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
 }
@@ -51,11 +50,13 @@ fn criterion_benchmark(c: &mut Criterion) {
     let array_len = 32768 * 2; // 2^16
     let batch_size = 2048; // 2^11
     let ctx = create_context(partitions_len, array_len, batch_size).unwrap();
+    let rt = Runtime::new().unwrap();
 
     c.bench_function("aggregate_query_no_group_by 15 12", |b| {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t",
             )
@@ -66,6 +67,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT MIN(f64), MAX(f64) \
                  FROM t",
             )
@@ -76,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT COUNT(DISTINCT u64_wide) \
                  FROM t",
             )
@@ -86,6 +89,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT COUNT(DISTINCT u64_narrow) \
                  FROM t",
             )
@@ -96,6 +100,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t GROUP BY utf8",
             )
@@ -106,6 +111,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT utf8, MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t \
                  WHERE f32 > 10 AND f32 < 20 GROUP BY utf8",
@@ -117,6 +123,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t GROUP BY u64_narrow",
             )
@@ -127,6 +134,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT u64_narrow, MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t \
                  WHERE f32 > 10 AND f32 < 20 GROUP BY u64_narrow",
@@ -138,6 +146,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT u64_wide, utf8, MIN(f64), AVG(f64), COUNT(f64) \
                  FROM t GROUP BY u64_wide, utf8",
             )
@@ -148,7 +157,8 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
-                "SELECT utf8, approx_percentile_cont(u64_wide, 0.5, 2500)  \
+                &rt,
+                "SELECT utf8, approx_percentile_cont(0.5, 2500) WITHIN GROUP (ORDER BY u64_wide)  \
                  FROM t GROUP BY utf8",
             )
         })
@@ -158,7 +168,8 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
-                "SELECT utf8, approx_percentile_cont(f32, 0.5, 2500)  \
+                &rt,
+                "SELECT utf8, approx_percentile_cont(0.5, 2500) WITHIN GROUP (ORDER BY f32)  \
                  FROM t GROUP BY utf8",
             )
         })
@@ -168,6 +179,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT MEDIAN(DISTINCT u64_wide), MEDIAN(DISTINCT u64_narrow) \
                  FROM t",
             )
@@ -178,6 +190,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT first_value(u64_wide order by f64, u64_narrow, utf8),\
                             last_value(u64_wide order by f64, u64_narrow, utf8)  \
                  FROM t GROUP BY u64_narrow",
@@ -189,6 +202,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT first_value(u64_wide ignore nulls order by f64, u64_narrow, utf8),  \
                             last_value(u64_wide ignore nulls order by f64, u64_narrow, utf8)    \
                  FROM t GROUP BY u64_narrow",
@@ -200,6 +214,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT first_value(u64_wide order by f64), \
                             last_value(u64_wide order by f64)   \
                 FROM t GROUP BY u64_narrow",
diff --git a/datafusion/core/benches/csv_load.rs b/datafusion/core/benches/csv_load.rs
index 2d42121ec9b25..3f984757466d5 100644
--- a/datafusion/core/benches/csv_load.rs
+++ b/datafusion/core/benches/csv_load.rs
@@ -32,8 +32,12 @@ use std::time::Duration;
 use test_utils::AccessLogGenerator;
 use tokio::runtime::Runtime;
 
-fn load_csv(ctx: Arc<Mutex<SessionContext>>, path: &str, options: CsvReadOptions) {
-    let rt = Runtime::new().unwrap();
+fn load_csv(
+    ctx: Arc<Mutex<SessionContext>>,
+    rt: &Runtime,
+    path: &str,
+    options: CsvReadOptions,
+) {
     let df = rt.block_on(ctx.lock().read_csv(path, options)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
 }
@@ -61,6 +65,7 @@ fn generate_test_file() -> TestCsvFile {
 
 fn criterion_benchmark(c: &mut Criterion) {
     let ctx = create_context().unwrap();
+    let rt = Runtime::new().unwrap();
     let test_file = generate_test_file();
 
     let mut group = c.benchmark_group("load csv testing");
@@ -70,6 +75,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             load_csv(
                 ctx.clone(),
+                &rt,
                 test_file.path().to_str().unwrap(),
                 CsvReadOptions::default(),
             )
@@ -80,6 +86,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             load_csv(
                 ctx.clone(),
+                &rt,
                 test_file.path().to_str().unwrap(),
                 CsvReadOptions::default().null_regex(Some("^NULL$|^$".to_string())),
             )
diff --git a/datafusion/core/benches/data_utils/mod.rs b/datafusion/core/benches/data_utils/mod.rs
index 38f6a2c76df6d..fc5f8945c4392 100644
--- a/datafusion/core/benches/data_utils/mod.rs
+++ b/datafusion/core/benches/data_utils/mod.rs
@@ -19,7 +19,8 @@
 
 use arrow::array::{
     builder::{Int64Builder, StringBuilder},
-    Float32Array, Float64Array, RecordBatch, StringArray, UInt64Array,
+    ArrayRef, Float32Array, Float64Array, RecordBatch, StringArray, StringViewBuilder,
+    UInt64Array,
 };
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion::datasource::MemTable;
@@ -158,6 +159,31 @@ pub fn create_record_batches(
         .collect::<Vec<_>>()
 }
 
+/// An enum that wraps either a regular StringBuilder or a GenericByteViewBuilder
+/// so that both can be used interchangeably.
+enum TraceIdBuilder {
+    Utf8(StringBuilder),
+    Utf8View(StringViewBuilder),
+}
+
+impl TraceIdBuilder {
+    /// Append a value to the builder.
+    fn append_value(&mut self, value: &str) {
+        match self {
+            TraceIdBuilder::Utf8(builder) => builder.append_value(value),
+            TraceIdBuilder::Utf8View(builder) => builder.append_value(value),
+        }
+    }
+
+    /// Finish building and return the ArrayRef.
+    fn finish(self) -> ArrayRef {
+        match self {
+            TraceIdBuilder::Utf8(mut builder) => Arc::new(builder.finish()),
+            TraceIdBuilder::Utf8View(mut builder) => Arc::new(builder.finish()),
+        }
+    }
+}
+
 /// Create time series data with `partition_cnt` partitions and `sample_cnt` rows per partition
 /// in ascending order, if `asc` is true, otherwise randomly sampled using a Pareto distribution
 #[allow(dead_code)]
@@ -165,6 +191,7 @@ pub(crate) fn make_data(
     partition_cnt: i32,
     sample_cnt: i32,
     asc: bool,
+    use_view: bool,
 ) -> Result<(Arc<Schema>, Vec<Vec<RecordBatch>>), DataFusionError> {
     // constants observed from trace data
     let simultaneous_group_cnt = 2000;
@@ -177,11 +204,17 @@ pub(crate) fn make_data(
     let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
 
     // populate data
-    let schema = test_schema();
+    let schema = test_schema(use_view);
     let mut partitions = vec![];
     let mut cur_time = 16909000000000i64;
     for _ in 0..partition_cnt {
-        let mut id_builder = StringBuilder::new();
+        // Choose the appropriate builder based on use_view.
+        let mut id_builder = if use_view {
+            TraceIdBuilder::Utf8View(StringViewBuilder::new())
+        } else {
+            TraceIdBuilder::Utf8(StringBuilder::new())
+        };
+
         let mut ts_builder = Int64Builder::new();
         let gen_id = |rng: &mut rand::rngs::SmallRng| {
             rng.gen::<[u8; 16]>()
@@ -230,10 +263,19 @@ pub(crate) fn make_data(
     Ok((schema, partitions))
 }
 
-/// The Schema used by make_data
-fn test_schema() -> SchemaRef {
-    Arc::new(Schema::new(vec![
-        Field::new("trace_id", DataType::Utf8, false),
-        Field::new("timestamp_ms", DataType::Int64, false),
-    ]))
+/// Returns a Schema based on the use_view flag
+fn test_schema(use_view: bool) -> SchemaRef {
+    if use_view {
+        // Return Utf8View schema
+        Arc::new(Schema::new(vec![
+            Field::new("trace_id", DataType::Utf8View, false),
+            Field::new("timestamp_ms", DataType::Int64, false),
+        ]))
+    } else {
+        // Return regular Utf8 schema
+        Arc::new(Schema::new(vec![
+            Field::new("trace_id", DataType::Utf8, false),
+            Field::new("timestamp_ms", DataType::Int64, false),
+        ]))
+    }
 }
diff --git a/datafusion/core/benches/dataframe.rs b/datafusion/core/benches/dataframe.rs
index 03078e05e1054..832553ebed82a 100644
--- a/datafusion/core/benches/dataframe.rs
+++ b/datafusion/core/benches/dataframe.rs
@@ -44,9 +44,7 @@ fn create_context(field_count: u32) -> datafusion_common::Result<Arc<SessionCont
     Ok(Arc::new(ctx))
 }
 
-fn run(column_count: u32, ctx: Arc<SessionContext>) {
-    let rt = Runtime::new().unwrap();
-
+fn run(column_count: u32, ctx: Arc<SessionContext>, rt: &Runtime) {
     criterion::black_box(rt.block_on(async {
         let mut data_frame = ctx.table("t").await.unwrap();
 
@@ -67,11 +65,13 @@ fn run(column_count: u32, ctx: Arc<SessionContext>) {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
+    let rt = Runtime::new().unwrap();
+
     for column_count in [10, 100, 200, 500] {
         let ctx = create_context(column_count).unwrap();
 
         c.bench_function(&format!("with_column_{column_count}"), |b| {
-            b.iter(|| run(column_count, ctx.clone()))
+            b.iter(|| run(column_count, ctx.clone(), &rt))
         });
     }
 }
diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs
index c242798a56f00..c7056aab86897 100644
--- a/datafusion/core/benches/distinct_query_sql.rs
+++ b/datafusion/core/benches/distinct_query_sql.rs
@@ -33,8 +33,7 @@ use parking_lot::Mutex;
 use std::{sync::Arc, time::Duration};
 use tokio::runtime::Runtime;
 
-fn query(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
 }
@@ -55,6 +54,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
     let array_len = 1 << 26; // 64 M
     let batch_size = 8192;
     let ctx = create_context(partitions_len, array_len, batch_size).unwrap();
+    let rt = Runtime::new().unwrap();
 
     let mut group = c.benchmark_group("custom-measurement-time");
     group.measurement_time(Duration::from_secs(40));
@@ -63,6 +63,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT DISTINCT u64_narrow FROM t GROUP BY u64_narrow LIMIT 10",
             )
         })
@@ -72,6 +73,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT DISTINCT u64_narrow FROM t GROUP BY u64_narrow LIMIT 100",
             )
         })
@@ -81,6 +83,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT DISTINCT u64_narrow FROM t GROUP BY u64_narrow LIMIT 1000",
             )
         })
@@ -90,6 +93,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT DISTINCT u64_narrow FROM t GROUP BY u64_narrow LIMIT 10000",
             )
         })
@@ -99,6 +103,7 @@ fn criterion_benchmark_limited_distinct(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT u64_narrow, u64_wide, utf8, f64 FROM t GROUP BY 1, 2, 3, 4 LIMIT 10",
             )
         })
@@ -118,12 +123,9 @@ async fn distinct_with_limit(
     Ok(())
 }
 
-fn run(plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>) {
-    let rt = Runtime::new().unwrap();
-    criterion::black_box(
-        rt.block_on(async { distinct_with_limit(plan.clone(), ctx.clone()).await }),
-    )
-    .unwrap();
+fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>) {
+    criterion::black_box(rt.block_on(distinct_with_limit(plan.clone(), ctx.clone())))
+        .unwrap();
 }
 
 pub async fn create_context_sampled_data(
@@ -131,7 +133,8 @@ pub async fn create_context_sampled_data(
     partition_cnt: i32,
     sample_cnt: i32,
 ) -> Result<(Arc<dyn ExecutionPlan>, Arc<TaskContext>)> {
-    let (schema, parts) = make_data(partition_cnt, sample_cnt, false /* asc */).unwrap();
+    let (schema, parts) =
+        make_data(partition_cnt, sample_cnt, false /* asc */, false).unwrap();
     let mem_table = Arc::new(MemTable::try_new(schema, parts).unwrap());
 
     // Create the DataFrame
@@ -145,58 +148,47 @@ pub async fn create_context_sampled_data(
 
 fn criterion_benchmark_limited_distinct_sampled(c: &mut Criterion) {
     let rt = Runtime::new().unwrap();
-
     let limit = 10;
     let partitions = 100;
     let samples = 100_000;
     let sql =
         format!("select DISTINCT trace_id from traces group by trace_id limit {limit};");
-
-    let distinct_trace_id_100_partitions_100_000_samples_limit_100 = rt.block_on(async {
-        create_context_sampled_data(sql.as_str(), partitions, samples)
-            .await
-            .unwrap()
-    });
-
     c.bench_function(
         format!("distinct query with {} partitions and {} samples per partition with limit {}", partitions, samples, limit).as_str(),
-        |b| b.iter(|| run(distinct_trace_id_100_partitions_100_000_samples_limit_100.0.clone(),
-                                   distinct_trace_id_100_partitions_100_000_samples_limit_100.1.clone())),
+        |b| b.iter(|| {
+            let (plan, ctx) = rt.block_on(
+                create_context_sampled_data(sql.as_str(), partitions, samples)
+            ).unwrap();
+            run(&rt, plan.clone(), ctx.clone())
+        }),
     );
 
     let partitions = 10;
     let samples = 1_000_000;
     let sql =
         format!("select DISTINCT trace_id from traces group by trace_id limit {limit};");
-
-    let distinct_trace_id_10_partitions_1_000_000_samples_limit_10 = rt.block_on(async {
-        create_context_sampled_data(sql.as_str(), partitions, samples)
-            .await
-            .unwrap()
-    });
-
     c.bench_function(
         format!("distinct query with {} partitions and {} samples per partition with limit {}", partitions, samples, limit).as_str(),
-        |b| b.iter(|| run(distinct_trace_id_10_partitions_1_000_000_samples_limit_10.0.clone(),
-                                   distinct_trace_id_10_partitions_1_000_000_samples_limit_10.1.clone())),
+        |b| b.iter(|| {
+            let (plan, ctx) = rt.block_on(
+                create_context_sampled_data(sql.as_str(), partitions, samples)
+            ).unwrap();
+            run(&rt, plan.clone(), ctx.clone())
+        }),
     );
 
     let partitions = 1;
     let samples = 10_000_000;
     let sql =
         format!("select DISTINCT trace_id from traces group by trace_id limit {limit};");
-
-    let rt = Runtime::new().unwrap();
-    let distinct_trace_id_1_partition_10_000_000_samples_limit_10 = rt.block_on(async {
-        create_context_sampled_data(sql.as_str(), partitions, samples)
-            .await
-            .unwrap()
-    });
-
     c.bench_function(
         format!("distinct query with {} partitions and {} samples per partition with limit {}", partitions, samples, limit).as_str(),
-        |b| b.iter(|| run(distinct_trace_id_1_partition_10_000_000_samples_limit_10.0.clone(),
-                                   distinct_trace_id_1_partition_10_000_000_samples_limit_10.1.clone())),
+        |b| b.iter(|| {
+            let (plan, ctx) = rt.block_on(
+                create_context_sampled_data(sql.as_str(), partitions, samples)
+            ).unwrap();
+            run(&rt, plan.clone(), ctx.clone())
+        }),
     );
 }
 
diff --git a/datafusion/core/benches/filter_query_sql.rs b/datafusion/core/benches/filter_query_sql.rs
index 0e09ae09d7c2e..c82a1607184dc 100644
--- a/datafusion/core/benches/filter_query_sql.rs
+++ b/datafusion/core/benches/filter_query_sql.rs
@@ -27,9 +27,7 @@ use futures::executor::block_on;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-async fn query(ctx: &SessionContext, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
+async fn query(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.sql(sql)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
@@ -68,10 +66,11 @@ fn create_context(array_len: usize, batch_size: usize) -> Result<SessionContext>
 fn criterion_benchmark(c: &mut Criterion) {
     let array_len = 524_288; // 2^19
     let batch_size = 4096; // 2^12
+    let rt = Runtime::new().unwrap();
 
     c.bench_function("filter_array", |b| {
         let ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| block_on(query(&ctx, "select f32, f64 from t where f32 >= f64")))
+        b.iter(|| block_on(query(&ctx, &rt, "select f32, f64 from t where f32 >= f64")))
     });
 
     c.bench_function("filter_scalar", |b| {
@@ -79,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             block_on(query(
                 &ctx,
+                &rt,
                 "select f32, f64 from t where f32 >= 250 and f64 > 250",
             ))
         })
@@ -89,6 +89,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             block_on(query(
                 &ctx,
+                &rt,
                 "select f32, f64 from t where f32 in (10, 20, 30, 40)",
             ))
         })
diff --git a/datafusion/core/benches/math_query_sql.rs b/datafusion/core/benches/math_query_sql.rs
index 92c59d5066401..76824850c114c 100644
--- a/datafusion/core/benches/math_query_sql.rs
+++ b/datafusion/core/benches/math_query_sql.rs
@@ -36,9 +36,7 @@ use datafusion::datasource::MemTable;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionContext;
 
-fn query(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     rt.block_on(df.collect()).unwrap();
@@ -81,29 +79,31 @@ fn criterion_benchmark(c: &mut Criterion) {
     let array_len = 1048576; // 2^20
     let batch_size = 512; // 2^9
     let ctx = create_context(array_len, batch_size).unwrap();
+    let rt = Runtime::new().unwrap();
+
     c.bench_function("sqrt_20_9", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
+        b.iter(|| query(ctx.clone(), &rt, "SELECT sqrt(f32) FROM t"))
     });
 
     let array_len = 1048576; // 2^20
     let batch_size = 4096; // 2^12
     let ctx = create_context(array_len, batch_size).unwrap();
     c.bench_function("sqrt_20_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
+        b.iter(|| query(ctx.clone(), &rt, "SELECT sqrt(f32) FROM t"))
     });
 
     let array_len = 4194304; // 2^22
     let batch_size = 4096; // 2^12
     let ctx = create_context(array_len, batch_size).unwrap();
     c.bench_function("sqrt_22_12", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
+        b.iter(|| query(ctx.clone(), &rt, "SELECT sqrt(f32) FROM t"))
     });
 
     let array_len = 4194304; // 2^22
     let batch_size = 16384; // 2^14
     let ctx = create_context(array_len, batch_size).unwrap();
     c.bench_function("sqrt_22_14", |b| {
-        b.iter(|| query(ctx.clone(), "SELECT sqrt(f32) FROM t"))
+        b.iter(|| query(ctx.clone(), &rt, "SELECT sqrt(f32) FROM t"))
     });
 }
 
diff --git a/datafusion/core/benches/physical_plan.rs b/datafusion/core/benches/physical_plan.rs
index aae1457ab9e6d..0a65c52f72def 100644
--- a/datafusion/core/benches/physical_plan.rs
+++ b/datafusion/core/benches/physical_plan.rs
@@ -42,6 +42,7 @@ use datafusion_physical_expr_common::sort_expr::LexOrdering;
 // as inputs. All record batches must have the same schema.
 fn sort_preserving_merge_operator(
     session_ctx: Arc<SessionContext>,
+    rt: &Runtime,
     batches: Vec<RecordBatch>,
     sort: &[&str],
 ) {
@@ -63,7 +64,6 @@ fn sort_preserving_merge_operator(
     .unwrap();
     let merge = Arc::new(SortPreservingMergeExec::new(sort, exec));
     let task_ctx = session_ctx.task_ctx();
-    let rt = Runtime::new().unwrap();
     rt.block_on(collect(merge, task_ctx)).unwrap();
 }
 
@@ -166,14 +166,16 @@ fn criterion_benchmark(c: &mut Criterion) {
     ];
 
     let ctx = Arc::new(SessionContext::new());
+    let rt = Runtime::new().unwrap();
+
     for (name, input) in benches {
-        let ctx_clone = ctx.clone();
-        c.bench_function(name, move |b| {
+        c.bench_function(name, |b| {
             b.iter_batched(
                 || input.clone(),
                 |input| {
                     sort_preserving_merge_operator(
-                        ctx_clone.clone(),
+                        ctx.clone(),
+                        &rt,
                         input,
                         &["a", "b", "c", "d"],
                     );
diff --git a/datafusion/core/benches/sort_limit_query_sql.rs b/datafusion/core/benches/sort_limit_query_sql.rs
index cfd4b8bc4bba8..e535a018161f1 100644
--- a/datafusion/core/benches/sort_limit_query_sql.rs
+++ b/datafusion/core/benches/sort_limit_query_sql.rs
@@ -37,9 +37,7 @@ use datafusion::execution::context::SessionContext;
 
 use tokio::runtime::Runtime;
 
-fn query(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     rt.block_on(df.collect()).unwrap();
@@ -104,11 +102,14 @@ fn create_context() -> Arc<Mutex<SessionContext>> {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
+    let ctx = create_context();
+    let rt = Runtime::new().unwrap();
+
     c.bench_function("sort_and_limit_by_int", |b| {
-        let ctx = create_context();
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT c1, c13, c6, c10 \
                  FROM aggregate_test_100 \
                  ORDER BY c6
@@ -118,10 +119,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("sort_and_limit_by_float", |b| {
-        let ctx = create_context();
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT c1, c13, c12 \
                  FROM aggregate_test_100 \
                  ORDER BY c13
@@ -131,10 +132,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("sort_and_limit_lex_by_int", |b| {
-        let ctx = create_context();
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT c1, c13, c6, c10 \
                  FROM aggregate_test_100 \
                  ORDER BY c6 DESC, c10 DESC
@@ -144,10 +145,10 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("sort_and_limit_lex_by_string", |b| {
-        let ctx = create_context();
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT c1, c13, c6, c10 \
                  FROM aggregate_test_100 \
                  ORDER BY c1, c13
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 2d79778d4d42f..49cc830d58bc4 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -45,14 +45,12 @@ const BENCHMARKS_PATH_2: &str = "./benchmarks/";
 const CLICKBENCH_DATA_PATH: &str = "data/hits_partitioned/";
 
 /// Create a logical plan from the specified sql
-fn logical_plan(ctx: &SessionContext, sql: &str) {
-    let rt = Runtime::new().unwrap();
+fn logical_plan(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     criterion::black_box(rt.block_on(ctx.sql(sql)).unwrap());
 }
 
 /// Create a physical ExecutionPlan (by way of logical plan)
-fn physical_plan(ctx: &SessionContext, sql: &str) {
-    let rt = Runtime::new().unwrap();
+fn physical_plan(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     criterion::black_box(rt.block_on(async {
         ctx.sql(sql)
             .await
@@ -104,9 +102,8 @@ fn register_defs(ctx: SessionContext, defs: Vec<TableDef>) -> SessionContext {
     ctx
 }
 
-fn register_clickbench_hits_table() -> SessionContext {
+fn register_clickbench_hits_table(rt: &Runtime) -> SessionContext {
     let ctx = SessionContext::new();
-    let rt = Runtime::new().unwrap();
 
     // use an external table for clickbench benchmarks
     let path =
@@ -128,7 +125,11 @@ fn register_clickbench_hits_table() -> SessionContext {
 
 /// Target of this benchmark: control that placeholders replacing does not get slower,
 /// if the query does not contain placeholders at all.
-fn benchmark_with_param_values_many_columns(ctx: &SessionContext, b: &mut Bencher) {
+fn benchmark_with_param_values_many_columns(
+    ctx: &SessionContext,
+    rt: &Runtime,
+    b: &mut Bencher,
+) {
     const COLUMNS_NUM: usize = 200;
     let mut aggregates = String::new();
     for i in 0..COLUMNS_NUM {
@@ -140,7 +141,6 @@ fn benchmark_with_param_values_many_columns(ctx: &SessionContext, b: &mut Benche
     // SELECT max(attr0), ..., max(attrN) FROM t1.
     let query = format!("SELECT {} FROM t1", aggregates);
     let statement = ctx.state().sql_to_statement(&query, "Generic").unwrap();
-    let rt = Runtime::new().unwrap();
     let plan =
         rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
     b.iter(|| {
@@ -230,33 +230,35 @@ fn criterion_benchmark(c: &mut Criterion) {
     }
 
     let ctx = create_context();
+    let rt = Runtime::new().unwrap();
 
     // Test simplest
     // https://github.com/apache/datafusion/issues/5157
     c.bench_function("logical_select_one_from_700", |b| {
-        b.iter(|| logical_plan(&ctx, "SELECT c1 FROM t700"))
+        b.iter(|| logical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
     });
 
     // Test simplest
     // https://github.com/apache/datafusion/issues/5157
     c.bench_function("physical_select_one_from_700", |b| {
-        b.iter(|| physical_plan(&ctx, "SELECT c1 FROM t700"))
+        b.iter(|| physical_plan(&ctx, &rt, "SELECT c1 FROM t700"))
     });
 
     // Test simplest
     c.bench_function("logical_select_all_from_1000", |b| {
-        b.iter(|| logical_plan(&ctx, "SELECT * FROM t1000"))
+        b.iter(|| logical_plan(&ctx, &rt, "SELECT * FROM t1000"))
     });
 
     // Test simplest
     c.bench_function("physical_select_all_from_1000", |b| {
-        b.iter(|| physical_plan(&ctx, "SELECT * FROM t1000"))
+        b.iter(|| physical_plan(&ctx, &rt, "SELECT * FROM t1000"))
     });
 
     c.bench_function("logical_trivial_join_low_numbered_columns", |b| {
         b.iter(|| {
             logical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a2, t2.b2  \
                  FROM t1, t2 WHERE a1 = b1",
             )
@@ -267,6 +269,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             logical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a99, t2.b99  \
                  FROM t1, t2 WHERE a199 = b199",
             )
@@ -277,6 +280,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             logical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a99, MIN(t2.b1), MAX(t2.b199), AVG(t2.b123), COUNT(t2.b73)  \
                  FROM t1 JOIN t2 ON t1.a199 = t2.b199 GROUP BY t1.a99",
             )
@@ -293,7 +297,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         }
         let query = format!("SELECT {} FROM t1", aggregates);
         b.iter(|| {
-            physical_plan(&ctx, &query);
+            physical_plan(&ctx, &rt, &query);
         });
     });
 
@@ -302,6 +306,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             physical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a7, t2.b8  \
                  FROM t1, t2 WHERE a7 = b7 \
                  ORDER BY a7",
@@ -313,6 +318,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             physical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a7, t2.b8  \
                  FROM t1, t2 WHERE a7 < b7 \
                  ORDER BY a7",
@@ -324,6 +330,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             physical_plan(
                 &ctx,
+                &rt,
                 "SELECT ta.a9, tb.a10, tc.a11, td.a12, te.a13, tf.a14 \
                  FROM t1 AS ta, t1 AS tb, t1 AS tc, t1 AS td, t1 AS te, t1 AS tf \
                  WHERE ta.a9 = tb.a10 AND tb.a10 = tc.a11 AND tc.a11 = td.a12 AND \
@@ -336,6 +343,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             physical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a7  \
                  FROM t1 WHERE a7 = (SELECT b8 FROM t2)",
             );
@@ -346,6 +354,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             physical_plan(
                 &ctx,
+                &rt,
                 "SELECT t1.a7 FROM t1  \
                  INTERSECT SELECT t2.b8 FROM t2",
             );
@@ -356,6 +365,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             logical_plan(
                 &ctx,
+                &rt,
                 "SELECT DISTINCT t1.a7  \
                  FROM t1, t2 WHERE t1.a7 = t2.b8",
             );
@@ -370,7 +380,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("physical_sorted_union_orderby", |b| {
         // SELECT ... UNION ALL ...
         let query = union_orderby_query(20);
-        b.iter(|| physical_plan(&ctx, &query))
+        b.iter(|| physical_plan(&ctx, &rt, &query))
     });
 
     // --- TPC-H ---
@@ -393,7 +403,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let sql =
             std::fs::read_to_string(format!("{benchmarks_path}queries/{q}.sql")).unwrap();
         c.bench_function(&format!("physical_plan_tpch_{}", q), |b| {
-            b.iter(|| physical_plan(&tpch_ctx, &sql))
+            b.iter(|| physical_plan(&tpch_ctx, &rt, &sql))
         });
     }
 
@@ -407,7 +417,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("physical_plan_tpch_all", |b| {
         b.iter(|| {
             for sql in &all_tpch_sql_queries {
-                physical_plan(&tpch_ctx, sql)
+                physical_plan(&tpch_ctx, &rt, sql)
             }
         })
     });
@@ -442,7 +452,7 @@ fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("physical_plan_tpcds_all", |b| {
         b.iter(|| {
             for sql in &all_tpcds_sql_queries {
-                physical_plan(&tpcds_ctx, sql)
+                physical_plan(&tpcds_ctx, &rt, sql)
             }
         })
     });
@@ -468,7 +478,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         .map(|l| l.expect("Could not parse line"))
         .collect_vec();
 
-    let clickbench_ctx = register_clickbench_hits_table();
+    let clickbench_ctx = register_clickbench_hits_table(&rt);
 
     // for (i, sql) in clickbench_queries.iter().enumerate() {
     //     c.bench_function(&format!("logical_plan_clickbench_q{}", i + 1), |b| {
@@ -478,7 +488,7 @@ fn criterion_benchmark(c: &mut Criterion) {
 
     for (i, sql) in clickbench_queries.iter().enumerate() {
         c.bench_function(&format!("physical_plan_clickbench_q{}", i + 1), |b| {
-            b.iter(|| physical_plan(&clickbench_ctx, sql))
+            b.iter(|| physical_plan(&clickbench_ctx, &rt, sql))
         });
     }
 
@@ -493,13 +503,13 @@ fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("physical_plan_clickbench_all", |b| {
         b.iter(|| {
             for sql in &clickbench_queries {
-                physical_plan(&clickbench_ctx, sql)
+                physical_plan(&clickbench_ctx, &rt, sql)
             }
         })
     });
 
     c.bench_function("with_param_values_many_columns", |b| {
-        benchmark_with_param_values_many_columns(&ctx, b);
+        benchmark_with_param_values_many_columns(&ctx, &rt, b);
     });
 }
 
diff --git a/datafusion/core/benches/struct_query_sql.rs b/datafusion/core/benches/struct_query_sql.rs
index 3ef7292c66271..f9cc43d1ea2c5 100644
--- a/datafusion/core/benches/struct_query_sql.rs
+++ b/datafusion/core/benches/struct_query_sql.rs
@@ -27,9 +27,7 @@ use futures::executor::block_on;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-async fn query(ctx: &SessionContext, sql: &str) {
-    let rt = Runtime::new().unwrap();
-
+async fn query(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.sql(sql)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
@@ -68,10 +66,11 @@ fn create_context(array_len: usize, batch_size: usize) -> Result<SessionContext>
 fn criterion_benchmark(c: &mut Criterion) {
     let array_len = 524_288; // 2^19
     let batch_size = 4096; // 2^12
+    let ctx = create_context(array_len, batch_size).unwrap();
+    let rt = Runtime::new().unwrap();
 
     c.bench_function("struct", |b| {
-        let ctx = create_context(array_len, batch_size).unwrap();
-        b.iter(|| block_on(query(&ctx, "select struct(f32, f64) from t")))
+        b.iter(|| block_on(query(&ctx, &rt, "select struct(f32, f64) from t")))
     });
 }
 
diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs
index 922cbd2b42292..cf3c7fa2e26fe 100644
--- a/datafusion/core/benches/topk_aggregate.rs
+++ b/datafusion/core/benches/topk_aggregate.rs
@@ -33,8 +33,9 @@ async fn create_context(
     sample_cnt: i32,
     asc: bool,
     use_topk: bool,
+    use_view: bool,
 ) -> Result<(Arc<dyn ExecutionPlan>, Arc<TaskContext>)> {
-    let (schema, parts) = make_data(partition_cnt, sample_cnt, asc).unwrap();
+    let (schema, parts) = make_data(partition_cnt, sample_cnt, asc, use_view).unwrap();
     let mem_table = Arc::new(MemTable::try_new(schema, parts).unwrap());
 
     // Create the DataFrame
@@ -55,8 +56,7 @@ async fn create_context(
     Ok((physical_plan, ctx.task_ctx()))
 }
 
-fn run(plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>, asc: bool) {
-    let rt = Runtime::new().unwrap();
+fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>, asc: bool) {
     criterion::black_box(
         rt.block_on(async { aggregate(plan.clone(), ctx.clone(), asc).await }),
     )
@@ -99,40 +99,37 @@ async fn aggregate(
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
+    let rt = Runtime::new().unwrap();
     let limit = 10;
     let partitions = 10;
     let samples = 1_000_000;
 
-    let rt = Runtime::new().unwrap();
-    let topk_real = rt.block_on(async {
-        create_context(limit, partitions, samples, false, true)
-            .await
-            .unwrap()
-    });
-    let topk_asc = rt.block_on(async {
-        create_context(limit, partitions, samples, true, true)
-            .await
-            .unwrap()
-    });
-    let real = rt.block_on(async {
-        create_context(limit, partitions, samples, false, false)
-            .await
-            .unwrap()
-    });
-    let asc = rt.block_on(async {
-        create_context(limit, partitions, samples, true, false)
-            .await
-            .unwrap()
-    });
-
     c.bench_function(
         format!("aggregate {} time-series rows", partitions * samples).as_str(),
-        |b| b.iter(|| run(real.0.clone(), real.1.clone(), false)),
+        |b| {
+            b.iter(|| {
+                let real = rt.block_on(async {
+                    create_context(limit, partitions, samples, false, false, false)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, real.0.clone(), real.1.clone(), false)
+            })
+        },
     );
 
     c.bench_function(
         format!("aggregate {} worst-case rows", partitions * samples).as_str(),
-        |b| b.iter(|| run(asc.0.clone(), asc.1.clone(), true)),
+        |b| {
+            b.iter(|| {
+                let asc = rt.block_on(async {
+                    create_context(limit, partitions, samples, true, false, false)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, asc.0.clone(), asc.1.clone(), true)
+            })
+        },
     );
 
     c.bench_function(
@@ -141,7 +138,16 @@ fn criterion_benchmark(c: &mut Criterion) {
             partitions * samples
         )
         .as_str(),
-        |b| b.iter(|| run(topk_real.0.clone(), topk_real.1.clone(), false)),
+        |b| {
+            b.iter(|| {
+                let topk_real = rt.block_on(async {
+                    create_context(limit, partitions, samples, false, true, false)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, topk_real.0.clone(), topk_real.1.clone(), false)
+            })
+        },
     );
 
     c.bench_function(
@@ -150,7 +156,54 @@ fn criterion_benchmark(c: &mut Criterion) {
             partitions * samples
         )
         .as_str(),
-        |b| b.iter(|| run(topk_asc.0.clone(), topk_asc.1.clone(), true)),
+        |b| {
+            b.iter(|| {
+                let topk_asc = rt.block_on(async {
+                    create_context(limit, partitions, samples, true, true, false)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, topk_asc.0.clone(), topk_asc.1.clone(), true)
+            })
+        },
+    );
+
+    // Utf8View schema，time-series rows
+    c.bench_function(
+        format!(
+            "top k={limit} aggregate {} time-series rows [Utf8View]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| {
+            b.iter(|| {
+                let topk_real = rt.block_on(async {
+                    create_context(limit, partitions, samples, false, true, true)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, topk_real.0.clone(), topk_real.1.clone(), false)
+            })
+        },
+    );
+
+    // Utf8View schema，worst-case rows
+    c.bench_function(
+        format!(
+            "top k={limit} aggregate {} worst-case rows [Utf8View]",
+            partitions * samples
+        )
+        .as_str(),
+        |b| {
+            b.iter(|| {
+                let topk_asc = rt.block_on(async {
+                    create_context(limit, partitions, samples, true, true, true)
+                        .await
+                        .unwrap()
+                });
+                run(&rt, topk_asc.0.clone(), topk_asc.1.clone(), true)
+            })
+        },
     );
 }
 
diff --git a/datafusion/core/benches/window_query_sql.rs b/datafusion/core/benches/window_query_sql.rs
index 42a1e51be361a..a55d17a7c5dcf 100644
--- a/datafusion/core/benches/window_query_sql.rs
+++ b/datafusion/core/benches/window_query_sql.rs
@@ -29,8 +29,7 @@ use parking_lot::Mutex;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
-fn query(ctx: Arc<Mutex<SessionContext>>, sql: &str) {
-    let rt = Runtime::new().unwrap();
+fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
     criterion::black_box(rt.block_on(df.collect()).unwrap());
 }
@@ -51,11 +50,13 @@ fn criterion_benchmark(c: &mut Criterion) {
     let array_len = 1024 * 1024;
     let batch_size = 8 * 1024;
     let ctx = create_context(partitions_len, array_len, batch_size).unwrap();
+    let rt = Runtime::new().unwrap();
 
     c.bench_function("window empty over, aggregate functions", |b| {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                     MAX(f64) OVER (), \
                     MIN(f32) OVER (), \
@@ -69,6 +70,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                     FIRST_VALUE(f64) OVER (), \
                     LAST_VALUE(f32) OVER (), \
@@ -82,6 +84,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                     MAX(f64) OVER (ORDER BY u64_narrow), \
                     MIN(f32) OVER (ORDER BY u64_narrow DESC), \
@@ -95,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                   FIRST_VALUE(f64) OVER (ORDER BY u64_narrow), \
                   LAST_VALUE(f32) OVER (ORDER BY u64_narrow DESC), \
@@ -108,6 +112,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                   MAX(f64) OVER (PARTITION BY u64_wide), \
                   MIN(f32) OVER (PARTITION BY u64_wide), \
@@ -123,6 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             b.iter(|| {
                 query(
                     ctx.clone(),
+                    &rt,
                     "SELECT \
                   MAX(f64) OVER (PARTITION BY u64_narrow), \
                   MIN(f32) OVER (PARTITION BY u64_narrow), \
@@ -137,6 +143,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                   FIRST_VALUE(f64) OVER (PARTITION BY u64_wide), \
                   LAST_VALUE(f32) OVER (PARTITION BY u64_wide), \
@@ -150,6 +157,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         b.iter(|| {
             query(
                 ctx.clone(),
+                &rt,
                 "SELECT \
                   FIRST_VALUE(f64) OVER (PARTITION BY u64_narrow), \
                   LAST_VALUE(f32) OVER (PARTITION BY u64_narrow), \
@@ -165,6 +173,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             b.iter(|| {
                 query(
                     ctx.clone(),
+                    &rt,
                     "SELECT \
                         MAX(f64) OVER (PARTITION BY u64_wide ORDER by f64), \
                         MIN(f32) OVER (PARTITION BY u64_wide ORDER by f64), \
@@ -181,6 +190,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             b.iter(|| {
                 query(
                     ctx.clone(),
+                    &rt,
                     "SELECT \
                         MAX(f64) OVER (PARTITION BY u64_narrow ORDER by f64), \
                         MIN(f32) OVER (PARTITION BY u64_narrow ORDER by f64), \
@@ -197,6 +207,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             b.iter(|| {
                 query(
                     ctx.clone(),
+                    &rt,
                     "SELECT \
                         FIRST_VALUE(f64) OVER (PARTITION BY u64_wide ORDER by f64), \
                         LAST_VALUE(f32) OVER (PARTITION BY u64_wide ORDER by f64), \
@@ -213,6 +224,7 @@ fn criterion_benchmark(c: &mut Criterion) {
             b.iter(|| {
                 query(
                     ctx.clone(),
+                    &rt,
                     "SELECT \
                         FIRST_VALUE(f64) OVER (PARTITION BY u64_narrow ORDER by f64), \
                         LAST_VALUE(f32) OVER (PARTITION BY u64_narrow ORDER by f64), \
diff --git a/datafusion/core/src/bin/print_runtime_config_docs.rs b/datafusion/core/src/bin/print_runtime_config_docs.rs
new file mode 100644
index 0000000000000..f374a5acb78a0
--- /dev/null
+++ b/datafusion/core/src/bin/print_runtime_config_docs.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+
+fn main() {
+    let docs = RuntimeEnvBuilder::generate_config_markdown();
+    println!("{}", docs);
+}
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 9c27c7c5d3076..9a70f8f43fb61 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -685,6 +685,46 @@ impl DataFrame {
         })
     }
 
+    /// Calculate the union of two [`DataFrame`]s using column names, preserving duplicate rows.
+    ///
+    /// The two [`DataFrame`]s are combined using column names rather than position,
+    /// filling missing columns with null.
+    ///
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion::prelude::*;
+    /// # use datafusion::error::Result;
+    /// # use datafusion_common::assert_batches_sorted_eq;
+    /// # #[tokio::main]
+    /// # async fn main() -> Result<()> {
+    /// let ctx = SessionContext::new();
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let d2 = df.clone().select_columns(&["b", "c", "a"])?.with_column("d", lit("77"))?;
+    /// let df = df.union_by_name(d2)?;
+    /// let expected = vec![
+    ///     "+---+---+---+----+",
+    ///     "| a | b | c | d  |",
+    ///     "+---+---+---+----+",
+    ///     "| 1 | 2 | 3 |    |",
+    ///     "| 1 | 2 | 3 | 77 |",
+    ///     "+---+---+---+----+"
+    /// ];
+    /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn union_by_name(self, dataframe: DataFrame) -> Result<DataFrame> {
+        let plan = LogicalPlanBuilder::from(self.plan)
+            .union_by_name(dataframe.plan)?
+            .build()?;
+        Ok(DataFrame {
+            session_state: self.session_state,
+            plan,
+            projection_requires_validation: true,
+        })
+    }
+
     /// Calculate the distinct union of two [`DataFrame`]s.
     ///
     /// The two [`DataFrame`]s must have exactly the same schema. Any duplicate
@@ -724,6 +764,45 @@ impl DataFrame {
         })
     }
 
+    /// Calculate the union of two [`DataFrame`]s using column names with all duplicated rows removed.
+    ///
+    /// The two [`DataFrame`]s are combined using column names rather than position,
+    /// filling missing columns with null.
+    ///
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion::prelude::*;
+    /// # use datafusion::error::Result;
+    /// # use datafusion_common::assert_batches_sorted_eq;
+    /// # #[tokio::main]
+    /// # async fn main() -> Result<()> {
+    /// let ctx = SessionContext::new();
+    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let d2 = df.clone().select_columns(&["b", "c", "a"])?;
+    /// let df = df.union_by_name_distinct(d2)?;
+    /// let expected = vec![
+    ///     "+---+---+---+",
+    ///     "| a | b | c |",
+    ///     "+---+---+---+",
+    ///     "| 1 | 2 | 3 |",
+    ///     "+---+---+---+"
+    /// ];
+    /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn union_by_name_distinct(self, dataframe: DataFrame) -> Result<DataFrame> {
+        let plan = LogicalPlanBuilder::from(self.plan)
+            .union_by_name_distinct(dataframe.plan)?
+            .build()?;
+        Ok(DataFrame {
+            session_state: self.session_state,
+            plan,
+            projection_requires_validation: true,
+        })
+    }
+
     /// Return a new `DataFrame` with all duplicated rows removed.
     ///
     /// # Example
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 6c7c9463cf3b7..7fc27453d1ad5 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -144,6 +144,7 @@ impl FileFormat for ArrowFormat {
         for object in objects {
             let r = store.as_ref().get(&object.location).await?;
             let schema = match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(mut file, _) => {
                     let reader = FileReader::try_new(&mut file, None)?;
                     reader.schema()
@@ -442,7 +443,7 @@ mod tests {
         let object_meta = ObjectMeta {
             location,
             last_modified: DateTime::default(),
-            size: usize::MAX,
+            size: u64::MAX,
             e_tag: None,
             version: None,
         };
@@ -485,7 +486,7 @@ mod tests {
         let object_meta = ObjectMeta {
             location,
             last_modified: DateTime::default(),
-            size: usize::MAX,
+            size: u64::MAX,
             e_tag: None,
             version: None,
         };
diff --git a/datafusion/core/src/datasource/file_format/avro.rs b/datafusion/core/src/datasource/file_format/avro.rs
index a9516aad9e22d..3428d08a6ae52 100644
--- a/datafusion/core/src/datasource/file_format/avro.rs
+++ b/datafusion/core/src/datasource/file_format/avro.rs
@@ -382,6 +382,15 @@ mod tests {
         let testdata = test_util::arrow_test_data();
         let store_root = format!("{testdata}/avro");
         let format = AvroFormat {};
-        scan_format(state, &format, &store_root, file_name, projection, limit).await
+        scan_format(
+            state,
+            &format,
+            None,
+            &store_root,
+            file_name,
+            projection,
+            limit,
+        )
+        .await
     }
 }
diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs
index 309458975ab6c..323bc28057d43 100644
--- a/datafusion/core/src/datasource/file_format/csv.rs
+++ b/datafusion/core/src/datasource/file_format/csv.rs
@@ -72,7 +72,7 @@ mod tests {
     #[derive(Debug)]
     struct VariableStream {
         bytes_to_repeat: Bytes,
-        max_iterations: usize,
+        max_iterations: u64,
         iterations_detected: Arc<Mutex<usize>>,
     }
 
@@ -103,14 +103,15 @@ mod tests {
 
         async fn get(&self, location: &Path) -> object_store::Result<GetResult> {
             let bytes = self.bytes_to_repeat.clone();
-            let range = 0..bytes.len() * self.max_iterations;
+            let len = bytes.len() as u64;
+            let range = 0..len * self.max_iterations;
             let arc = self.iterations_detected.clone();
             let stream = futures::stream::repeat_with(move || {
                 let arc_inner = arc.clone();
                 *arc_inner.lock().unwrap() += 1;
                 Ok(bytes.clone())
             })
-            .take(self.max_iterations)
+            .take(self.max_iterations as usize)
             .boxed();
 
             Ok(GetResult {
@@ -138,7 +139,7 @@ mod tests {
         async fn get_ranges(
             &self,
             _location: &Path,
-            _ranges: &[Range<usize>],
+            _ranges: &[Range<u64>],
         ) -> object_store::Result<Vec<Bytes>> {
             unimplemented!()
         }
@@ -154,7 +155,7 @@ mod tests {
         fn list(
             &self,
             _prefix: Option<&Path>,
-        ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
+        ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
             unimplemented!()
         }
 
@@ -179,7 +180,7 @@ mod tests {
     }
 
     impl VariableStream {
-        pub fn new(bytes_to_repeat: Bytes, max_iterations: usize) -> Self {
+        pub fn new(bytes_to_repeat: Bytes, max_iterations: u64) -> Self {
             Self {
                 bytes_to_repeat,
                 max_iterations,
@@ -249,6 +250,7 @@ mod tests {
         let exec = scan_format(
             &state,
             &format,
+            None,
             root,
             "aggregate_test_100_with_nulls.csv",
             projection,
@@ -299,6 +301,7 @@ mod tests {
         let exec = scan_format(
             &state,
             &format,
+            None,
             root,
             "aggregate_test_100_with_nulls.csv",
             projection,
@@ -371,7 +374,7 @@ mod tests {
         let object_meta = ObjectMeta {
             location: Path::parse("/")?,
             last_modified: DateTime::default(),
-            size: usize::MAX,
+            size: u64::MAX,
             e_tag: None,
             version: None,
         };
@@ -429,7 +432,7 @@ mod tests {
         let object_meta = ObjectMeta {
             location: Path::parse("/")?,
             last_modified: DateTime::default(),
-            size: usize::MAX,
+            size: u64::MAX,
             e_tag: None,
             version: None,
         };
@@ -581,7 +584,7 @@ mod tests {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let root = format!("{}/csv", arrow_test_data());
         let format = CsvFormat::default().with_has_header(has_header);
-        scan_format(state, &format, &root, file_name, projection, limit).await
+        scan_format(state, &format, None, &root, file_name, projection, limit).await
     }
 
     #[tokio::test]
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index d533dcf7646da..a70a0f51d3307 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -149,7 +149,7 @@ mod tests {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let filename = "tests/data/2.json";
         let format = JsonFormat::default();
-        scan_format(state, &format, ".", filename, projection, limit).await
+        scan_format(state, &format, None, ".", filename, projection, limit).await
     }
 
     #[tokio::test]
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index e921f0158e540..3a098301f14e3 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -36,19 +36,20 @@ pub use datafusion_datasource::write;
 
 #[cfg(test)]
 pub(crate) mod test_util {
-    use std::sync::Arc;
-
+    use arrow_schema::SchemaRef;
     use datafusion_catalog::Session;
     use datafusion_common::Result;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
     use datafusion_datasource::{file_format::FileFormat, PartitionedFile};
     use datafusion_execution::object_store::ObjectStoreUrl;
+    use std::sync::Arc;
 
     use crate::test::object_store::local_unpartitioned_file;
 
     pub async fn scan_format(
         state: &dyn Session,
         format: &dyn FileFormat,
+        schema: Option<SchemaRef>,
         store_root: &str,
         file_name: &str,
         projection: Option<Vec<usize>>,
@@ -57,9 +58,13 @@ pub(crate) mod test_util {
         let store = Arc::new(object_store::local::LocalFileSystem::new()) as _;
         let meta = local_unpartitioned_file(format!("{store_root}/{file_name}"));
 
-        let file_schema = format
-            .infer_schema(state, &store, std::slice::from_ref(&meta))
-            .await?;
+        let file_schema = if let Some(file_schema) = schema {
+            file_schema
+        } else {
+            format
+                .infer_schema(state, &store, std::slice::from_ref(&meta))
+                .await?
+        };
 
         let statistics = format
             .infer_stats(state, &store, file_schema.clone(), &meta)
@@ -127,7 +132,7 @@ mod tests {
             .write_parquet(out_dir_url, DataFrameWriteOptions::new(), None)
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value d for column 0 at line 4");
+        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
         Ok(())
     }
 }
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 27a7e7ae3c061..7b8b99273f4ea 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -67,13 +67,13 @@ pub(crate) mod test_util {
             .into_iter()
             .zip(tmp_files.into_iter())
             .map(|(batch, mut output)| {
-                let builder = parquet::file::properties::WriterProperties::builder();
-                let props = if multi_page {
-                    builder.set_data_page_row_count_limit(ROWS_PER_PAGE)
-                } else {
-                    builder
+                let mut builder = parquet::file::properties::WriterProperties::builder();
+                if multi_page {
+                    builder = builder.set_data_page_row_count_limit(ROWS_PER_PAGE)
                 }
-                .build();
+                builder = builder.set_bloom_filter_enabled(true);
+
+                let props = builder.build();
 
                 let mut writer = parquet::arrow::ArrowWriter::try_new(
                     &mut output,
@@ -331,7 +331,7 @@ mod tests {
         fn list(
             &self,
             _prefix: Option<&Path>,
-        ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
+        ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
             Box::pin(futures::stream::once(async {
                 Err(object_store::Error::NotImplemented)
             }))
@@ -408,7 +408,7 @@ mod tests {
         )));
 
         // Use the file size as the hint so we can get the full metadata from the first fetch
-        let size_hint = meta[0].size;
+        let size_hint = meta[0].size as usize;
 
         fetch_parquet_metadata(store.upcast().as_ref(), &meta[0], Some(size_hint))
             .await
@@ -443,7 +443,7 @@ mod tests {
         )));
 
         // Use the a size hint larger than the file size to make sure we don't panic
-        let size_hint = meta[0].size + 100;
+        let size_hint = (meta[0].size + 100) as usize;
 
         fetch_parquet_metadata(store.upcast().as_ref(), &meta[0], Some(size_hint))
             .await
@@ -1075,7 +1075,10 @@ mod tests {
             .map(|factory| factory.create(state, &Default::default()).unwrap())
             .unwrap_or(Arc::new(ParquetFormat::new()));
 
-        scan_format(state, &*format, &testdata, file_name, projection, limit).await
+        scan_format(
+            state, &*format, None, &testdata, file_name, projection, limit,
+        )
+        .await
     }
 
     /// Test that 0-byte files don't break while reading
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 61eeb419a4800..a9834da92e5a4 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -17,18 +17,16 @@
 
 //! The table implementation.
 
-use std::collections::HashMap;
-use std::{any::Any, str::FromStr, sync::Arc};
-
 use super::helpers::{expr_applicable_for_cols, pruned_partition_list};
 use super::{ListingTableUrl, PartitionedFile};
+use std::collections::HashMap;
+use std::{any::Any, str::FromStr, sync::Arc};
 
 use crate::datasource::{
     create_ordering,
     file_format::{
         file_compression_type::FileCompressionType, FileFormat, FilePushdownSupport,
     },
-    get_statistics_with_limit,
     physical_plan::FileSinkConfig,
 };
 use crate::execution::context::SessionState;
@@ -55,9 +53,11 @@ use datafusion_physical_expr::{
 
 use async_trait::async_trait;
 use datafusion_catalog::Session;
+use datafusion_common::stats::Precision;
+use datafusion_datasource::compute_all_files_statistics;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
-use futures::{future, stream, StreamExt, TryStreamExt};
+use futures::{future, stream, Stream, StreamExt, TryStreamExt};
 use itertools::Itertools;
 use object_store::ObjectStore;
 
@@ -715,9 +715,13 @@ impl ListingOptions {
 #[derive(Debug)]
 pub struct ListingTable {
     table_paths: Vec<ListingTableUrl>,
-    /// File fields only
+    /// `file_schema` contains only the columns physically stored in the data files themselves.
+    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
+    ///     - Used when reading the raw data from files
     file_schema: SchemaRef,
-    /// File fields + partition columns
+    /// `table_schema` combines `file_schema` + partition columns
+    ///     - Partition columns are derived from directory paths (not stored in files)
+    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
     table_schema: SchemaRef,
     options: ListingOptions,
     definition: Option<String>,
@@ -795,7 +799,7 @@ impl ListingTable {
     /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
     pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
         self.collected_statistics =
-            cache.unwrap_or(Arc::new(DefaultFileStatisticsCache::default()));
+            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
         self
     }
 
@@ -874,15 +878,13 @@ impl TableProvider for ListingTable {
             filters.iter().cloned().partition(|filter| {
                 can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
             });
-        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
-        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
 
         // We should not limit the number of partitioned files to scan if there are filters and limit
         // at the same time. This is because the limit should be applied after the filters are applied.
         let statistic_file_limit = if filters.is_empty() { limit } else { None };
 
         let (mut partitioned_file_lists, statistics) = self
-            .list_files_for_scan(session_state, &partition_filters, statistic_file_limit)
+            .list_files_for_scan(state, &partition_filters, statistic_file_limit)
             .await?;
 
         // if no files need to be read, return an `EmptyExec`
@@ -898,10 +900,11 @@ impl TableProvider for ListingTable {
             .split_file_groups_by_statistics
             .then(|| {
                 output_ordering.first().map(|output_ordering| {
-                    FileScanConfig::split_groups_by_statistics(
+                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
                         &self.table_schema,
                         &partitioned_file_lists,
                         output_ordering,
+                        self.options.target_partitions,
                     )
                 })
             })
@@ -941,7 +944,7 @@ impl TableProvider for ListingTable {
         self.options
             .format
             .create_physical_plan(
-                session_state,
+                state,
                 FileScanConfigBuilder::new(
                     object_store_url,
                     Arc::clone(&self.file_schema),
@@ -1021,10 +1024,8 @@ impl TableProvider for ListingTable {
         // Get the object store for the table path.
         let store = state.runtime_env().object_store(table_path)?;
 
-        // TODO (https://github.com/apache/datafusion/issues/11600) remove downcast_ref from here?
-        let session_state = state.as_any().downcast_ref::<SessionState>().unwrap();
         let file_list_stream = pruned_partition_list(
-            session_state,
+            state,
             store.as_ref(),
             table_path,
             &[],
@@ -1072,7 +1073,7 @@ impl TableProvider for ListingTable {
 
         self.options()
             .format
-            .create_writer_physical_plan(input, session_state, config, order_requirements)
+            .create_writer_physical_plan(input, state, config, order_requirements)
             .await
     }
 
@@ -1115,32 +1116,26 @@ impl ListingTable {
         let files = file_list
             .map(|part_file| async {
                 let part_file = part_file?;
-                if self.options.collect_stat {
-                    let statistics =
-                        self.do_collect_statistics(ctx, &store, &part_file).await?;
-                    Ok((part_file, statistics))
+                let statistics = if self.options.collect_stat {
+                    self.do_collect_statistics(ctx, &store, &part_file).await?
                 } else {
-                    Ok((
-                        part_file,
-                        Arc::new(Statistics::new_unknown(&self.file_schema)),
-                    ))
-                }
+                    Arc::new(Statistics::new_unknown(&self.file_schema))
+                };
+                Ok(part_file.with_statistics(statistics))
             })
             .boxed()
             .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
 
-        let (files, statistics) = get_statistics_with_limit(
-            files,
+        let (file_group, inexact_stats) =
+            get_files_with_limit(files, limit, self.options.collect_stat).await?;
+
+        let file_groups = file_group.split_files(self.options.target_partitions);
+        compute_all_files_statistics(
+            file_groups,
             self.schema(),
-            limit,
             self.options.collect_stat,
+            inexact_stats,
         )
-        .await?;
-
-        Ok((
-            files.split_files(self.options.target_partitions),
-            statistics,
-        ))
     }
 
     /// Collects statistics for a given partitioned file.
@@ -1182,6 +1177,82 @@ impl ListingTable {
     }
 }
 
+/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
+///
+/// This function collects files from the provided stream until either:
+/// 1. The stream is exhausted
+/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
+///
+/// # Arguments
+/// * `files` - A stream of `Result<PartitionedFile>` items to process
+/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
+///   once the accumulated number of rows exceeds this limit
+/// * `collect_stats` - Whether to collect and accumulate statistics from the files
+///
+/// # Returns
+/// A `Result` containing a `FileGroup` with the collected files
+/// and a boolean indicating whether the statistics are inexact.
+///
+/// # Note
+/// The function will continue processing files if statistics are not available or if the
+/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
+/// but files will still be collected.
+async fn get_files_with_limit(
+    files: impl Stream<Item = Result<PartitionedFile>>,
+    limit: Option<usize>,
+    collect_stats: bool,
+) -> Result<(FileGroup, bool)> {
+    let mut file_group = FileGroup::default();
+    // Fusing the stream allows us to call next safely even once it is finished.
+    let mut all_files = Box::pin(files.fuse());
+    enum ProcessingState {
+        ReadingFiles,
+        ReachedLimit,
+    }
+
+    let mut state = ProcessingState::ReadingFiles;
+    let mut num_rows = Precision::Absent;
+
+    while let Some(file_result) = all_files.next().await {
+        // Early exit if we've already reached our limit
+        if matches!(state, ProcessingState::ReachedLimit) {
+            break;
+        }
+
+        let file = file_result?;
+
+        // Update file statistics regardless of state
+        if collect_stats {
+            if let Some(file_stats) = &file.statistics {
+                num_rows = if file_group.is_empty() {
+                    // For the first file, just take its row count
+                    file_stats.num_rows
+                } else {
+                    // For subsequent files, accumulate the counts
+                    num_rows.add(&file_stats.num_rows)
+                };
+            }
+        }
+
+        // Always add the file to our group
+        file_group.push(file);
+
+        // Check if we've hit the limit (if one was specified)
+        if let Some(limit) = limit {
+            if let Precision::Exact(row_count) = num_rows {
+                if row_count > limit {
+                    state = ProcessingState::ReachedLimit;
+                }
+            }
+        }
+    }
+    // If we still have files in the stream, it means that the limit kicked
+    // in, and the statistic could have been different had we processed the
+    // files in a different order.
+    let inexact_stats = all_files.next().await.is_some();
+    Ok((file_group, inexact_stats))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory_test.rs
similarity index 58%
rename from datafusion/core/src/datasource/memory.rs
rename to datafusion/core/src/datasource/memory_test.rs
index 0288cd3e8bc7d..381000ab8ee1e 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory_test.rs
@@ -15,378 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`MemTable`] for querying `Vec<RecordBatch>` by DataFusion.
-
-use std::any::Any;
-use std::collections::HashMap;
-use std::fmt::{self, Debug};
-use std::sync::Arc;
-
-use crate::datasource::{TableProvider, TableType};
-use crate::error::Result;
-use crate::logical_expr::Expr;
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::{
-    common, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties,
-    Partitioning, SendableRecordBatchStream,
-};
-use crate::physical_planner::create_physical_sort_exprs;
-
-use arrow::datatypes::SchemaRef;
-use arrow::record_batch::RecordBatch;
-use datafusion_catalog::Session;
-use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
-use datafusion_common_runtime::JoinSet;
-pub use datafusion_datasource::memory::MemorySourceConfig;
-use datafusion_datasource::sink::{DataSink, DataSinkExec};
-pub use datafusion_datasource::source::DataSourceExec;
-use datafusion_execution::TaskContext;
-use datafusion_expr::dml::InsertOp;
-use datafusion_expr::SortExpr;
-
-use async_trait::async_trait;
-use futures::StreamExt;
-use log::debug;
-use parking_lot::Mutex;
-use tokio::sync::RwLock;
-
-/// Type alias for partition data
-pub type PartitionData = Arc<RwLock<Vec<RecordBatch>>>;
-
-/// In-memory data source for presenting a `Vec<RecordBatch>` as a
-/// data source that can be queried by DataFusion. This allows data to
-/// be pre-loaded into memory and then repeatedly queried without
-/// incurring additional file I/O overhead.
-#[derive(Debug)]
-pub struct MemTable {
-    schema: SchemaRef,
-    pub(crate) batches: Vec<PartitionData>,
-    constraints: Constraints,
-    column_defaults: HashMap<String, Expr>,
-    /// Optional pre-known sort order(s). Must be `SortExpr`s.
-    /// inserting data into this table removes the order
-    pub sort_order: Arc<Mutex<Vec<Vec<SortExpr>>>>,
-}
-
-impl MemTable {
-    /// Create a new in-memory table from the provided schema and record batches
-    pub fn try_new(schema: SchemaRef, partitions: Vec<Vec<RecordBatch>>) -> Result<Self> {
-        for batches in partitions.iter().flatten() {
-            let batches_schema = batches.schema();
-            if !schema.contains(&batches_schema) {
-                debug!(
-                    "mem table schema does not contain batches schema. \
-                        Target_schema: {schema:?}. Batches Schema: {batches_schema:?}"
-                );
-                return plan_err!("Mismatch between schema and batches");
-            }
-        }
-
-        Ok(Self {
-            schema,
-            batches: partitions
-                .into_iter()
-                .map(|e| Arc::new(RwLock::new(e)))
-                .collect::<Vec<_>>(),
-            constraints: Constraints::empty(),
-            column_defaults: HashMap::new(),
-            sort_order: Arc::new(Mutex::new(vec![])),
-        })
-    }
-
-    /// Assign constraints
-    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
-        self.constraints = constraints;
-        self
-    }
-
-    /// Assign column defaults
-    pub fn with_column_defaults(
-        mut self,
-        column_defaults: HashMap<String, Expr>,
-    ) -> Self {
-        self.column_defaults = column_defaults;
-        self
-    }
-
-    /// Specify an optional pre-known sort order(s). Must be `SortExpr`s.
-    ///
-    /// If the data is not sorted by this order, DataFusion may produce
-    /// incorrect results.
-    ///
-    /// DataFusion may take advantage of this ordering to omit sorts
-    /// or use more efficient algorithms.
-    ///
-    /// Note that multiple sort orders are supported, if some are known to be
-    /// equivalent,
-    pub fn with_sort_order(self, mut sort_order: Vec<Vec<SortExpr>>) -> Self {
-        std::mem::swap(self.sort_order.lock().as_mut(), &mut sort_order);
-        self
-    }
-
-    /// Create a mem table by reading from another data source
-    pub async fn load(
-        t: Arc<dyn TableProvider>,
-        output_partitions: Option<usize>,
-        state: &dyn Session,
-    ) -> Result<Self> {
-        let schema = t.schema();
-        let constraints = t.constraints();
-        let exec = t.scan(state, None, &[], None).await?;
-        let partition_count = exec.output_partitioning().partition_count();
-
-        let mut join_set = JoinSet::new();
-
-        for part_idx in 0..partition_count {
-            let task = state.task_ctx();
-            let exec = Arc::clone(&exec);
-            join_set.spawn(async move {
-                let stream = exec.execute(part_idx, task)?;
-                common::collect(stream).await
-            });
-        }
-
-        let mut data: Vec<Vec<RecordBatch>> =
-            Vec::with_capacity(exec.output_partitioning().partition_count());
-
-        while let Some(result) = join_set.join_next().await {
-            match result {
-                Ok(res) => data.push(res?),
-                Err(e) => {
-                    if e.is_panic() {
-                        std::panic::resume_unwind(e.into_panic());
-                    } else {
-                        unreachable!();
-                    }
-                }
-            }
-        }
-
-        let mut exec = DataSourceExec::new(Arc::new(MemorySourceConfig::try_new(
-            &data,
-            Arc::clone(&schema),
-            None,
-        )?));
-        if let Some(cons) = constraints {
-            exec = exec.with_constraints(cons.clone());
-        }
-
-        if let Some(num_partitions) = output_partitions {
-            let exec = RepartitionExec::try_new(
-                Arc::new(exec),
-                Partitioning::RoundRobinBatch(num_partitions),
-            )?;
-
-            // execute and collect results
-            let mut output_partitions = vec![];
-            for i in 0..exec.properties().output_partitioning().partition_count() {
-                // execute this *output* partition and collect all batches
-                let task_ctx = state.task_ctx();
-                let mut stream = exec.execute(i, task_ctx)?;
-                let mut batches = vec![];
-                while let Some(result) = stream.next().await {
-                    batches.push(result?);
-                }
-                output_partitions.push(batches);
-            }
-
-            return MemTable::try_new(Arc::clone(&schema), output_partitions);
-        }
-        MemTable::try_new(Arc::clone(&schema), data)
-    }
-}
-
-#[async_trait]
-impl TableProvider for MemTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
-    }
-
-    fn constraints(&self) -> Option<&Constraints> {
-        Some(&self.constraints)
-    }
-
-    fn table_type(&self) -> TableType {
-        TableType::Base
-    }
-
-    async fn scan(
-        &self,
-        state: &dyn Session,
-        projection: Option<&Vec<usize>>,
-        _filters: &[Expr],
-        _limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let mut partitions = vec![];
-        for arc_inner_vec in self.batches.iter() {
-            let inner_vec = arc_inner_vec.read().await;
-            partitions.push(inner_vec.clone())
-        }
-
-        let mut source =
-            MemorySourceConfig::try_new(&partitions, self.schema(), projection.cloned())?;
-
-        let show_sizes = state.config_options().explain.show_sizes;
-        source = source.with_show_sizes(show_sizes);
-
-        // add sort information if present
-        let sort_order = self.sort_order.lock();
-        if !sort_order.is_empty() {
-            let df_schema = DFSchema::try_from(self.schema.as_ref().clone())?;
-
-            let file_sort_order = sort_order
-                .iter()
-                .map(|sort_exprs| {
-                    create_physical_sort_exprs(
-                        sort_exprs,
-                        &df_schema,
-                        state.execution_props(),
-                    )
-                })
-                .collect::<Result<Vec<_>>>()?;
-            source = source.try_with_sort_information(file_sort_order)?;
-        }
-
-        Ok(DataSourceExec::from_data_source(source))
-    }
-
-    /// Returns an ExecutionPlan that inserts the execution results of a given [`ExecutionPlan`] into this [`MemTable`].
-    ///
-    /// The [`ExecutionPlan`] must have the same schema as this [`MemTable`].
-    ///
-    /// # Arguments
-    ///
-    /// * `state` - The [`SessionState`] containing the context for executing the plan.
-    /// * `input` - The [`ExecutionPlan`] to execute and insert.
-    ///
-    /// # Returns
-    ///
-    /// * A plan that returns the number of rows written.
-    ///
-    /// [`SessionState`]: crate::execution::context::SessionState
-    async fn insert_into(
-        &self,
-        _state: &dyn Session,
-        input: Arc<dyn ExecutionPlan>,
-        insert_op: InsertOp,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // If we are inserting into the table, any sort order may be messed up so reset it here
-        *self.sort_order.lock() = vec![];
-
-        // Create a physical plan from the logical plan.
-        // Check that the schema of the plan matches the schema of this table.
-        self.schema()
-            .logically_equivalent_names_and_types(&input.schema())?;
-
-        if insert_op != InsertOp::Append {
-            return not_impl_err!("{insert_op} not implemented for MemoryTable yet");
-        }
-        let sink = MemSink::try_new(self.batches.clone(), Arc::clone(&self.schema))?;
-        Ok(Arc::new(DataSinkExec::new(input, Arc::new(sink), None)))
-    }
-
-    fn get_column_default(&self, column: &str) -> Option<&Expr> {
-        self.column_defaults.get(column)
-    }
-}
-
-/// Implements for writing to a [`MemTable`]
-struct MemSink {
-    /// Target locations for writing data
-    batches: Vec<PartitionData>,
-    schema: SchemaRef,
-}
-
-impl Debug for MemSink {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("MemSink")
-            .field("num_partitions", &self.batches.len())
-            .finish()
-    }
-}
-
-impl DisplayAs for MemSink {
-    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match t {
-            DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let partition_count = self.batches.len();
-                write!(f, "MemoryTable (partitions={partition_count})")
-            }
-            DisplayFormatType::TreeRender => {
-                // TODO: collect info
-                write!(f, "")
-            }
-        }
-    }
-}
-
-impl MemSink {
-    /// Creates a new [`MemSink`].
-    ///
-    /// The caller is responsible for ensuring that there is at least one partition to insert into.
-    fn try_new(batches: Vec<PartitionData>, schema: SchemaRef) -> Result<Self> {
-        if batches.is_empty() {
-            return plan_err!("Cannot insert into MemTable with zero partitions");
-        }
-        Ok(Self { batches, schema })
-    }
-}
-
-#[async_trait]
-impl DataSink for MemSink {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> &SchemaRef {
-        &self.schema
-    }
-
-    async fn write_all(
-        &self,
-        mut data: SendableRecordBatchStream,
-        _context: &Arc<TaskContext>,
-    ) -> Result<u64> {
-        let num_partitions = self.batches.len();
-
-        // buffer up the data round robin style into num_partitions
-
-        let mut new_batches = vec![vec![]; num_partitions];
-        let mut i = 0;
-        let mut row_count = 0;
-        while let Some(batch) = data.next().await.transpose()? {
-            row_count += batch.num_rows();
-            new_batches[i].push(batch);
-            i = (i + 1) % num_partitions;
-        }
-
-        // write the outputs into the batches
-        for (target, mut batches) in self.batches.iter().zip(new_batches.into_iter()) {
-            // Append all the new batches in one go to minimize locking overhead
-            target.write().await.append(&mut batches);
-        }
-
-        Ok(row_count as u64)
-    }
-}
-
 #[cfg(test)]
 mod tests {
 
-    use super::*;
+    use crate::datasource::MemTable;
     use crate::datasource::{provider_as_source, DefaultTableSource};
     use crate::physical_plan::collect;
     use crate::prelude::SessionContext;
-
     use arrow::array::{AsArray, Int32Array};
     use arrow::datatypes::{DataType, Field, Schema, UInt64Type};
     use arrow::error::ArrowError;
-    use datafusion_common::DataFusionError;
+    use arrow::record_batch::RecordBatch;
+    use arrow_schema::SchemaRef;
+    use datafusion_catalog::TableProvider;
+    use datafusion_common::{DataFusionError, Result};
+    use datafusion_expr::dml::InsertOp;
     use datafusion_expr::LogicalPlanBuilder;
+    use futures::StreamExt;
+    use std::collections::HashMap;
+    use std::sync::Arc;
 
     #[tokio::test]
     async fn test_with_projection() -> Result<()> {
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 35a451cbc803a..25a89644cd2a4 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -24,10 +24,9 @@ pub mod empty;
 pub mod file_format;
 pub mod listing;
 pub mod listing_table_factory;
-pub mod memory;
+mod memory_test;
 pub mod physical_plan;
 pub mod provider;
-mod statistics;
 mod view_test;
 
 // backwards compatibility
@@ -40,6 +39,7 @@ pub use crate::catalog::TableProvider;
 pub use crate::logical_expr::TableType;
 pub use datafusion_catalog::cte_worktable;
 pub use datafusion_catalog::default_table_source;
+pub use datafusion_catalog::memory;
 pub use datafusion_catalog::stream;
 pub use datafusion_catalog::view;
 pub use datafusion_datasource::schema_adapter;
@@ -47,7 +47,6 @@ pub use datafusion_datasource::sink;
 pub use datafusion_datasource::source;
 pub use datafusion_execution::object_store;
 pub use datafusion_physical_expr::create_ordering;
-pub use statistics::get_statistics_with_limit;
 
 #[cfg(all(test, feature = "parquet"))]
 mod tests {
@@ -107,7 +106,7 @@ mod tests {
         let meta = ObjectMeta {
             location,
             last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(),
-            size: metadata.len() as usize,
+            size: metadata.len(),
             e_tag: None,
             version: None,
         };
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 5dcf4df73f57a..f0a1f94d87e1f 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -273,6 +273,7 @@ impl FileOpener for ArrowOpener {
                 None => {
                     let r = object_store.get(file_meta.location()).await?;
                     match r.payload {
+                        #[cfg(not(target_arch = "wasm32"))]
                         GetResultPayload::File(file, _) => {
                             let arrow_reader = arrow::ipc::reader::FileReader::try_new(
                                 file, projection,
@@ -305,7 +306,7 @@ impl FileOpener for ArrowOpener {
                     )?;
                     // read footer according to footer_len
                     let get_option = GetOptions {
-                        range: Some(GetRange::Suffix(10 + footer_len)),
+                        range: Some(GetRange::Suffix(10 + (footer_len as u64))),
                         ..Default::default()
                     };
                     let get_result = object_store
@@ -332,9 +333,9 @@ impl FileOpener for ArrowOpener {
                         .iter()
                         .flatten()
                         .map(|block| {
-                            let block_len = block.bodyLength() as usize
-                                + block.metaDataLength() as usize;
-                            let block_offset = block.offset() as usize;
+                            let block_len =
+                                block.bodyLength() as u64 + block.metaDataLength() as u64;
+                            let block_offset = block.offset() as u64;
                             block_offset..block_offset + block_len
                         })
                         .collect_vec();
@@ -354,9 +355,9 @@ impl FileOpener for ArrowOpener {
                         .iter()
                         .flatten()
                         .filter(|block| {
-                            let block_offset = block.offset() as usize;
-                            block_offset >= range.start as usize
-                                && block_offset < range.end as usize
+                            let block_offset = block.offset() as u64;
+                            block_offset >= range.start as u64
+                                && block_offset < range.end as u64
                         })
                         .copied()
                         .collect_vec();
@@ -364,9 +365,9 @@ impl FileOpener for ArrowOpener {
                     let recordbatch_ranges = recordbatches
                         .iter()
                         .map(|block| {
-                            let block_len = block.bodyLength() as usize
-                                + block.metaDataLength() as usize;
-                            let block_offset = block.offset() as usize;
+                            let block_len =
+                                block.bodyLength() as u64 + block.metaDataLength() as u64;
+                            let block_offset = block.offset() as u64;
                             block_offset..block_offset + block_len
                         })
                         .collect_vec();
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 5914924797dce..3ef4030134520 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -658,7 +658,7 @@ mod tests {
             )
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value d for column 0 at line 4");
+        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
         Ok(())
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 910c4316d9734..736248fbd95df 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -495,7 +495,7 @@ mod tests {
             .write_json(out_dir_url, DataFrameWriteOptions::new(), None)
             .await
             .expect_err("should fail because input file does not match inferred schema");
-        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value d for column 0 at line 4");
+        assert_eq!(e.strip_backtrace(), "Arrow error: Parser error: Error while parsing value 'd' as type 'Int64' for column 0 at line 4. Row data: '[d,4]'");
         Ok(())
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index 9e1b2822e8540..e9bb8b0db3682 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -38,11 +38,12 @@ mod tests {
     use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
     use crate::test::object_store::local_unpartitioned_file;
     use arrow::array::{
-        ArrayRef, Date64Array, Int32Array, Int64Array, Int8Array, StringArray,
+        ArrayRef, AsArray, Date64Array, Int32Array, Int64Array, Int8Array, StringArray,
         StructArray,
     };
     use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaBuilder};
     use arrow::record_batch::RecordBatch;
+    use arrow::util::pretty::pretty_format_batches;
     use arrow_schema::SchemaRef;
     use bytes::{BufMut, BytesMut};
     use datafusion_common::config::TableParquetOptions;
@@ -61,8 +62,9 @@ mod tests {
     use datafusion_execution::object_store::ObjectStoreUrl;
     use datafusion_expr::{col, lit, when, Expr};
     use datafusion_physical_expr::planner::logical2physical;
+    use datafusion_physical_plan::analyze::AnalyzeExec;
+    use datafusion_physical_plan::collect;
     use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-    use datafusion_physical_plan::{collect, displayable};
     use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
     use chrono::{TimeZone, Utc};
@@ -81,10 +83,10 @@ mod tests {
     struct RoundTripResult {
         /// Data that was read back from ParquetFiles
         batches: Result<Vec<RecordBatch>>,
+        /// The EXPLAIN ANALYZE output
+        explain: Result<String>,
         /// The physical plan that was created (that has statistics, etc)
         parquet_exec: Arc<DataSourceExec>,
-        /// The ParquetSource that is used in plan
-        parquet_source: ParquetSource,
     }
 
     /// round-trip record batches by writing each individual RecordBatch to
@@ -137,71 +139,109 @@ mod tests {
             self.round_trip(batches).await.batches
         }
 
-        /// run the test, returning the `RoundTripResult`
-        async fn round_trip(self, batches: Vec<RecordBatch>) -> RoundTripResult {
-            let Self {
-                projection,
-                schema,
-                predicate,
-                pushdown_predicate,
-                page_index_predicate,
-            } = self;
-
-            let file_schema = match schema {
-                Some(schema) => schema,
-                None => Arc::new(
-                    Schema::try_merge(
-                        batches.iter().map(|b| b.schema().as_ref().clone()),
-                    )
-                    .unwrap(),
-                ),
-            };
-            // If testing with page_index_predicate, write parquet
-            // files with multiple pages
-            let multi_page = page_index_predicate;
-            let (meta, _files) = store_parquet(batches, multi_page).await.unwrap();
-            let file_group = meta.into_iter().map(Into::into).collect();
-
+        fn build_file_source(&self, file_schema: SchemaRef) -> Arc<ParquetSource> {
             // set up predicate (this is normally done by a layer higher up)
-            let predicate = predicate.map(|p| logical2physical(&p, &file_schema));
+            let predicate = self
+                .predicate
+                .as_ref()
+                .map(|p| logical2physical(p, &file_schema));
 
             let mut source = ParquetSource::default();
             if let Some(predicate) = predicate {
                 source = source.with_predicate(Arc::clone(&file_schema), predicate);
             }
 
-            if pushdown_predicate {
+            if self.pushdown_predicate {
                 source = source
                     .with_pushdown_filters(true)
                     .with_reorder_filters(true);
             }
 
-            if page_index_predicate {
+            if self.page_index_predicate {
                 source = source.with_enable_page_index(true);
             }
 
+            Arc::new(source)
+        }
+
+        fn build_parquet_exec(
+            &self,
+            file_schema: SchemaRef,
+            file_group: FileGroup,
+            source: Arc<ParquetSource>,
+        ) -> Arc<DataSourceExec> {
             let base_config = FileScanConfigBuilder::new(
                 ObjectStoreUrl::local_filesystem(),
                 file_schema,
-                Arc::new(source.clone()),
+                source,
             )
             .with_file_group(file_group)
-            .with_projection(projection)
+            .with_projection(self.projection.clone())
             .build();
+            DataSourceExec::from_data_source(base_config)
+        }
+
+        /// run the test, returning the `RoundTripResult`
+        async fn round_trip(&self, batches: Vec<RecordBatch>) -> RoundTripResult {
+            let file_schema = match &self.schema {
+                Some(schema) => schema,
+                None => &Arc::new(
+                    Schema::try_merge(
+                        batches.iter().map(|b| b.schema().as_ref().clone()),
+                    )
+                    .unwrap(),
+                ),
+            };
+            let file_schema = Arc::clone(file_schema);
+            // If testing with page_index_predicate, write parquet
+            // files with multiple pages
+            let multi_page = self.page_index_predicate;
+            let (meta, _files) = store_parquet(batches, multi_page).await.unwrap();
+            let file_group: FileGroup = meta.into_iter().map(Into::into).collect();
+
+            // build a ParquetExec to return the results
+            let parquet_source = self.build_file_source(file_schema.clone());
+            let parquet_exec = self.build_parquet_exec(
+                file_schema.clone(),
+                file_group.clone(),
+                Arc::clone(&parquet_source),
+            );
+
+            let analyze_exec = Arc::new(AnalyzeExec::new(
+                false,
+                false,
+                // use a new ParquetSource to avoid sharing execution metrics
+                self.build_parquet_exec(
+                    file_schema.clone(),
+                    file_group.clone(),
+                    self.build_file_source(file_schema.clone()),
+                ),
+                Arc::new(Schema::new(vec![
+                    Field::new("plan_type", DataType::Utf8, true),
+                    Field::new("plan", DataType::Utf8, true),
+                ])),
+            ));
 
             let session_ctx = SessionContext::new();
             let task_ctx = session_ctx.task_ctx();
 
-            let parquet_exec = DataSourceExec::from_data_source(base_config.clone());
+            let batches = collect(
+                Arc::clone(&parquet_exec) as Arc<dyn ExecutionPlan>,
+                task_ctx.clone(),
+            )
+            .await;
+
+            let explain = collect(analyze_exec, task_ctx.clone())
+                .await
+                .map(|batches| {
+                    let batches = pretty_format_batches(&batches).unwrap();
+                    format!("{batches}")
+                });
+
             RoundTripResult {
-                batches: collect(parquet_exec.clone(), task_ctx).await,
+                batches,
+                explain,
                 parquet_exec,
-                parquet_source: base_config
-                    .file_source()
-                    .as_any()
-                    .downcast_ref::<ParquetSource>()
-                    .unwrap()
-                    .clone(),
             }
         }
     }
@@ -1069,6 +1109,7 @@ mod tests {
         let parquet_exec = scan_format(
             &state,
             &ParquetFormat::default(),
+            None,
             &testdata,
             filename,
             Some(vec![0, 1, 2]),
@@ -1101,6 +1142,92 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn parquet_exec_with_int96_from_spark() -> Result<()> {
+        // arrow-rs relies on the chrono library to convert between timestamps and strings, so
+        // instead compare as Int64. The underlying type should be a PrimitiveArray of Int64
+        // anyway, so this should be a zero-copy non-modifying cast at the SchemaAdapter.
+
+        let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int64, true)]));
+        let testdata = datafusion_common::test_util::parquet_test_data();
+        let filename = "int96_from_spark.parquet";
+        let session_ctx = SessionContext::new();
+        let state = session_ctx.state();
+        let task_ctx = state.task_ctx();
+
+        let time_units_and_expected = vec![
+            (
+                None, // Same as "ns" time_unit
+                Arc::new(Int64Array::from(vec![
+                    Some(1704141296123456000), // Reads as nanosecond fine (note 3 extra 0s)
+                    Some(1704070800000000000), // Reads as nanosecond fine (note 3 extra 0s)
+                    Some(-4852191831933722624), // Cannot be represented with nanos timestamp (year 9999)
+                    Some(1735599600000000000), // Reads as nanosecond fine (note 3 extra 0s)
+                    None,
+                    Some(-4864435138808946688), // Cannot be represented with nanos timestamp (year 290000)
+                ])),
+            ),
+            (
+                Some("ns".to_string()),
+                Arc::new(Int64Array::from(vec![
+                    Some(1704141296123456000),
+                    Some(1704070800000000000),
+                    Some(-4852191831933722624),
+                    Some(1735599600000000000),
+                    None,
+                    Some(-4864435138808946688),
+                ])),
+            ),
+            (
+                Some("us".to_string()),
+                Arc::new(Int64Array::from(vec![
+                    Some(1704141296123456),
+                    Some(1704070800000000),
+                    Some(253402225200000000),
+                    Some(1735599600000000),
+                    None,
+                    Some(9089380393200000000),
+                ])),
+            ),
+        ];
+
+        for (time_unit, expected) in time_units_and_expected {
+            let parquet_exec = scan_format(
+                &state,
+                &ParquetFormat::default().with_coerce_int96(time_unit.clone()),
+                Some(schema.clone()),
+                &testdata,
+                filename,
+                Some(vec![0]),
+                None,
+            )
+            .await
+            .unwrap();
+            assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
+
+            let mut results = parquet_exec.execute(0, task_ctx.clone())?;
+            let batch = results.next().await.unwrap()?;
+
+            assert_eq!(6, batch.num_rows());
+            assert_eq!(1, batch.num_columns());
+
+            assert_eq!(batch.num_columns(), 1);
+            let column = batch.column(0);
+
+            assert_eq!(column.len(), expected.len());
+
+            column
+                .as_primitive::<arrow::datatypes::Int64Type>()
+                .iter()
+                .zip(expected.iter())
+                .for_each(|(lhs, rhs)| {
+                    assert_eq!(lhs, rhs);
+                });
+        }
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn parquet_exec_with_range() -> Result<()> {
         fn file_range(meta: &ObjectMeta, start: i64, end: i64) -> PartitionedFile {
@@ -1375,26 +1502,6 @@ mod tests {
         create_batch(vec![("c1", c1.clone())])
     }
 
-    /// Returns a int64 array with contents:
-    /// "[-1, 1, null, 2, 3, null, null]"
-    fn int64_batch() -> RecordBatch {
-        let contents: ArrayRef = Arc::new(Int64Array::from(vec![
-            Some(-1),
-            Some(1),
-            None,
-            Some(2),
-            Some(3),
-            None,
-            None,
-        ]));
-
-        create_batch(vec![
-            ("a", contents.clone()),
-            ("b", contents.clone()),
-            ("c", contents.clone()),
-        ])
-    }
-
     #[tokio::test]
     async fn parquet_exec_metrics() {
         // batch1: c1(string)
@@ -1454,110 +1561,17 @@ mod tests {
             .round_trip(vec![batch1])
             .await;
 
-        // should have a pruning predicate
-        let pruning_predicate = rt.parquet_source.pruning_predicate();
-        assert!(pruning_predicate.is_some());
-
-        // convert to explain plan form
-        let display = displayable(rt.parquet_exec.as_ref())
-            .indent(true)
-            .to_string();
+        let explain = rt.explain.unwrap();
 
-        assert_contains!(
-            &display,
-            "pruning_predicate=c1_null_count@2 != row_count@3 AND (c1_min@0 != bar OR bar != c1_max@1)"
-        );
+        // check that there was a pruning predicate -> row groups got pruned
+        assert_contains!(&explain, "predicate=c1@0 != bar");
 
-        assert_contains!(&display, r#"predicate=c1@0 != bar"#);
+        // there's a single row group, but we can check that it matched
+        // if no pruning was done this would be 0 instead of 1
+        assert_contains!(&explain, "row_groups_matched_statistics=1");
 
-        assert_contains!(&display, "projection=[c1]");
-    }
-
-    #[tokio::test]
-    async fn parquet_exec_display_deterministic() {
-        // batches: a(int64), b(int64), c(int64)
-        let batches = int64_batch();
-
-        fn extract_required_guarantees(s: &str) -> Option<&str> {
-            s.split("required_guarantees=").nth(1)
-        }
-
-        // Ensuring that the required_guarantees remain consistent across every display plan of the filter conditions
-        for _ in 0..100 {
-            // c = 1 AND b = 1 AND a = 1
-            let filter0 = col("c")
-                .eq(lit(1))
-                .and(col("b").eq(lit(1)))
-                .and(col("a").eq(lit(1)));
-
-            let rt0 = RoundTrip::new()
-                .with_predicate(filter0)
-                .with_pushdown_predicate()
-                .round_trip(vec![batches.clone()])
-                .await;
-
-            let pruning_predicate = rt0.parquet_source.pruning_predicate();
-            assert!(pruning_predicate.is_some());
-
-            let display0 = displayable(rt0.parquet_exec.as_ref())
-                .indent(true)
-                .to_string();
-
-            let guarantees0: &str = extract_required_guarantees(&display0)
-                .expect("Failed to extract required_guarantees");
-            // Compare only the required_guarantees part (Because the file_groups part will not be the same)
-            assert_eq!(
-                guarantees0.trim(),
-                "[a in (1), b in (1), c in (1)]",
-                "required_guarantees don't match"
-            );
-        }
-
-        // c = 1 AND a = 1 AND b = 1
-        let filter1 = col("c")
-            .eq(lit(1))
-            .and(col("a").eq(lit(1)))
-            .and(col("b").eq(lit(1)));
-
-        let rt1 = RoundTrip::new()
-            .with_predicate(filter1)
-            .with_pushdown_predicate()
-            .round_trip(vec![batches.clone()])
-            .await;
-
-        // b = 1 AND a = 1 AND c = 1
-        let filter2 = col("b")
-            .eq(lit(1))
-            .and(col("a").eq(lit(1)))
-            .and(col("c").eq(lit(1)));
-
-        let rt2 = RoundTrip::new()
-            .with_predicate(filter2)
-            .with_pushdown_predicate()
-            .round_trip(vec![batches])
-            .await;
-
-        // should have a pruning predicate
-        let pruning_predicate = rt1.parquet_source.pruning_predicate();
-        assert!(pruning_predicate.is_some());
-        let pruning_predicate = rt2.parquet_source.predicate();
-        assert!(pruning_predicate.is_some());
-
-        // convert to explain plan form
-        let display1 = displayable(rt1.parquet_exec.as_ref())
-            .indent(true)
-            .to_string();
-        let display2 = displayable(rt2.parquet_exec.as_ref())
-            .indent(true)
-            .to_string();
-
-        let guarantees1 = extract_required_guarantees(&display1)
-            .expect("Failed to extract required_guarantees");
-        let guarantees2 = extract_required_guarantees(&display2)
-            .expect("Failed to extract required_guarantees");
-
-        // Compare only the required_guarantees part (Because the predicate part will not be the same)
-        assert_eq!(guarantees1, guarantees2, "required_guarantees don't match");
+        // check the projection
+        assert_contains!(&explain, "projection=[c1]");
     }
 
     #[tokio::test]
@@ -1581,16 +1595,19 @@ mod tests {
             .await;
 
         // Should not contain a pruning predicate (since nothing can be pruned)
-        let pruning_predicate = rt.parquet_source.pruning_predicate();
-        assert!(
-            pruning_predicate.is_none(),
-            "Still had pruning predicate: {pruning_predicate:?}"
-        );
+        let explain = rt.explain.unwrap();
 
-        // but does still has a pushdown down predicate
-        let predicate = rt.parquet_source.predicate();
-        let filter_phys = logical2physical(&filter, rt.parquet_exec.schema().as_ref());
-        assert_eq!(predicate.unwrap().to_string(), filter_phys.to_string());
+        // When both matched and pruned are 0, it means that the pruning predicate
+        // was not used at all.
+        assert_contains!(&explain, "row_groups_matched_statistics=0");
+        assert_contains!(&explain, "row_groups_pruned_statistics=0");
+
+        // But pushdown predicate should be present
+        assert_contains!(
+            &explain,
+            "predicate=CASE WHEN c1@0 != bar THEN true ELSE false END"
+        );
+        assert_contains!(&explain, "pushdown_rows_pruned=5");
     }
 
     #[tokio::test]
@@ -1616,8 +1633,14 @@ mod tests {
             .await;
 
         // Should have a pruning predicate
-        let pruning_predicate = rt.parquet_source.pruning_predicate();
-        assert!(pruning_predicate.is_some());
+        let explain = rt.explain.unwrap();
+        assert_contains!(
+            &explain,
+            "predicate=c1@0 = foo AND CASE WHEN c1@0 != bar THEN true ELSE false END"
+        );
+
+        // And bloom filters should have been evaluated
+        assert_contains!(&explain, "row_groups_pruned_bloom_filter=1");
     }
 
     /// Returns the sum of all the metrics with the specified name
@@ -1850,13 +1873,13 @@ mod tests {
         path: &str,
         store: Arc<dyn ObjectStore>,
         batch: RecordBatch,
-    ) -> usize {
+    ) -> u64 {
         let mut writer =
             ArrowWriter::try_new(BytesMut::new().writer(), batch.schema(), None).unwrap();
         writer.write(&batch).unwrap();
         writer.flush().unwrap();
         let bytes = writer.into_inner().unwrap().into_inner().freeze();
-        let total_size = bytes.len();
+        let total_size = bytes.len() as u64;
         let path = Path::from(path);
         let payload = object_store::PutPayload::from_bytes(bytes);
         store
diff --git a/datafusion/core/src/datasource/statistics.rs b/datafusion/core/src/datasource/statistics.rs
deleted file mode 100644
index cf283ecee0bf7..0000000000000
--- a/datafusion/core/src/datasource/statistics.rs
+++ /dev/null
@@ -1,219 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::mem;
-use std::sync::Arc;
-
-use futures::{Stream, StreamExt};
-
-use crate::arrow::datatypes::SchemaRef;
-use crate::error::Result;
-use crate::physical_plan::{ColumnStatistics, Statistics};
-use datafusion_common::stats::Precision;
-use datafusion_common::ScalarValue;
-use datafusion_datasource::file_groups::FileGroup;
-
-use super::listing::PartitionedFile;
-
-/// Get all files as well as the file level summary statistics (no statistic for partition columns).
-/// If the optional `limit` is provided, includes only sufficient files. Needed to read up to
-/// `limit` number of rows. `collect_stats` is passed down from the configuration parameter on
-/// `ListingTable`. If it is false we only construct bare statistics and skip a potentially expensive
-///  call to `multiunzip` for constructing file level summary statistics.
-pub async fn get_statistics_with_limit(
-    all_files: impl Stream<Item = Result<(PartitionedFile, Arc<Statistics>)>>,
-    file_schema: SchemaRef,
-    limit: Option<usize>,
-    collect_stats: bool,
-) -> Result<(FileGroup, Statistics)> {
-    let mut result_files = FileGroup::default();
-    // These statistics can be calculated as long as at least one file provides
-    // useful information. If none of the files provides any information, then
-    // they will end up having `Precision::Absent` values. Throughout calculations,
-    // missing values will be imputed as:
-    // - zero for summations, and
-    // - neutral element for extreme points.
-    let size = file_schema.fields().len();
-    let mut col_stats_set = vec![ColumnStatistics::default(); size];
-    let mut num_rows = Precision::<usize>::Absent;
-    let mut total_byte_size = Precision::<usize>::Absent;
-
-    // Fusing the stream allows us to call next safely even once it is finished.
-    let mut all_files = Box::pin(all_files.fuse());
-
-    if let Some(first_file) = all_files.next().await {
-        let (mut file, file_stats) = first_file?;
-        file.statistics = Some(file_stats.as_ref().clone());
-        result_files.push(file);
-
-        // First file, we set them directly from the file statistics.
-        num_rows = file_stats.num_rows;
-        total_byte_size = file_stats.total_byte_size;
-        for (index, file_column) in
-            file_stats.column_statistics.clone().into_iter().enumerate()
-        {
-            col_stats_set[index].null_count = file_column.null_count;
-            col_stats_set[index].max_value = file_column.max_value;
-            col_stats_set[index].min_value = file_column.min_value;
-            col_stats_set[index].sum_value = file_column.sum_value;
-        }
-
-        // If the number of rows exceeds the limit, we can stop processing
-        // files. This only applies when we know the number of rows. It also
-        // currently ignores tables that have no statistics regarding the
-        // number of rows.
-        let conservative_num_rows = match num_rows {
-            Precision::Exact(nr) => nr,
-            _ => usize::MIN,
-        };
-        if conservative_num_rows <= limit.unwrap_or(usize::MAX) {
-            while let Some(current) = all_files.next().await {
-                let (mut file, file_stats) = current?;
-                file.statistics = Some(file_stats.as_ref().clone());
-                result_files.push(file);
-                if !collect_stats {
-                    continue;
-                }
-
-                // We accumulate the number of rows, total byte size and null
-                // counts across all the files in question. If any file does not
-                // provide any information or provides an inexact value, we demote
-                // the statistic precision to inexact.
-                num_rows = add_row_stats(file_stats.num_rows, num_rows);
-
-                total_byte_size =
-                    add_row_stats(file_stats.total_byte_size, total_byte_size);
-
-                for (file_col_stats, col_stats) in file_stats
-                    .column_statistics
-                    .iter()
-                    .zip(col_stats_set.iter_mut())
-                {
-                    let ColumnStatistics {
-                        null_count: file_nc,
-                        max_value: file_max,
-                        min_value: file_min,
-                        sum_value: file_sum,
-                        distinct_count: _,
-                    } = file_col_stats;
-
-                    col_stats.null_count = add_row_stats(*file_nc, col_stats.null_count);
-                    set_max_if_greater(file_max, &mut col_stats.max_value);
-                    set_min_if_lesser(file_min, &mut col_stats.min_value);
-                    col_stats.sum_value = file_sum.add(&col_stats.sum_value);
-                }
-
-                // If the number of rows exceeds the limit, we can stop processing
-                // files. This only applies when we know the number of rows. It also
-                // currently ignores tables that have no statistics regarding the
-                // number of rows.
-                if num_rows.get_value().unwrap_or(&usize::MIN)
-                    > &limit.unwrap_or(usize::MAX)
-                {
-                    break;
-                }
-            }
-        }
-    };
-
-    let mut statistics = Statistics {
-        num_rows,
-        total_byte_size,
-        column_statistics: col_stats_set,
-    };
-    if all_files.next().await.is_some() {
-        // If we still have files in the stream, it means that the limit kicked
-        // in, and the statistic could have been different had we processed the
-        // files in a different order.
-        statistics = statistics.to_inexact()
-    }
-
-    Ok((result_files, statistics))
-}
-
-fn add_row_stats(
-    file_num_rows: Precision<usize>,
-    num_rows: Precision<usize>,
-) -> Precision<usize> {
-    match (file_num_rows, &num_rows) {
-        (Precision::Absent, _) => num_rows.to_inexact(),
-        (lhs, Precision::Absent) => lhs.to_inexact(),
-        (lhs, rhs) => lhs.add(rhs),
-    }
-}
-
-/// If the given value is numerically greater than the original maximum value,
-/// return the new maximum value with appropriate exactness information.
-fn set_max_if_greater(
-    max_nominee: &Precision<ScalarValue>,
-    max_value: &mut Precision<ScalarValue>,
-) {
-    match (&max_value, max_nominee) {
-        (Precision::Exact(val1), Precision::Exact(val2)) if val1 < val2 => {
-            *max_value = max_nominee.clone();
-        }
-        (Precision::Exact(val1), Precision::Inexact(val2))
-        | (Precision::Inexact(val1), Precision::Inexact(val2))
-        | (Precision::Inexact(val1), Precision::Exact(val2))
-            if val1 < val2 =>
-        {
-            *max_value = max_nominee.clone().to_inexact();
-        }
-        (Precision::Exact(_), Precision::Absent) => {
-            let exact_max = mem::take(max_value);
-            *max_value = exact_max.to_inexact();
-        }
-        (Precision::Absent, Precision::Exact(_)) => {
-            *max_value = max_nominee.clone().to_inexact();
-        }
-        (Precision::Absent, Precision::Inexact(_)) => {
-            *max_value = max_nominee.clone();
-        }
-        _ => {}
-    }
-}
-
-/// If the given value is numerically lesser than the original minimum value,
-/// return the new minimum value with appropriate exactness information.
-fn set_min_if_lesser(
-    min_nominee: &Precision<ScalarValue>,
-    min_value: &mut Precision<ScalarValue>,
-) {
-    match (&min_value, min_nominee) {
-        (Precision::Exact(val1), Precision::Exact(val2)) if val1 > val2 => {
-            *min_value = min_nominee.clone();
-        }
-        (Precision::Exact(val1), Precision::Inexact(val2))
-        | (Precision::Inexact(val1), Precision::Inexact(val2))
-        | (Precision::Inexact(val1), Precision::Exact(val2))
-            if val1 > val2 =>
-        {
-            *min_value = min_nominee.clone().to_inexact();
-        }
-        (Precision::Exact(_), Precision::Absent) => {
-            let exact_min = mem::take(min_value);
-            *min_value = exact_min.to_inexact();
-        }
-        (Precision::Absent, Precision::Exact(_)) => {
-            *min_value = min_nominee.clone().to_inexact();
-        }
-        (Precision::Absent, Precision::Inexact(_)) => {
-            *min_value = min_nominee.clone();
-        }
-        _ => {}
-    }
-}
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index fc110a0699df2..0bb91536da3ca 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -35,7 +35,11 @@ use crate::{
     },
     datasource::{provider_as_source, MemTable, ViewTable},
     error::{DataFusionError, Result},
-    execution::{options::ArrowReadOptions, runtime_env::RuntimeEnv, FunctionRegistry},
+    execution::{
+        options::ArrowReadOptions,
+        runtime_env::{RuntimeEnv, RuntimeEnvBuilder},
+        FunctionRegistry,
+    },
     logical_expr::AggregateUDF,
     logical_expr::ScalarUDF,
     logical_expr::{
@@ -1036,13 +1040,73 @@ impl SessionContext {
             variable, value, ..
         } = stmt;
 
-        let mut state = self.state.write();
-        state.config_mut().options_mut().set(&variable, &value)?;
-        drop(state);
+        // Check if this is a runtime configuration
+        if variable.starts_with("datafusion.runtime.") {
+            self.set_runtime_variable(&variable, &value)?;
+        } else {
+            let mut state = self.state.write();
+            state.config_mut().options_mut().set(&variable, &value)?;
+            drop(state);
+        }
 
         self.return_empty_dataframe()
     }
 
+    fn set_runtime_variable(&self, variable: &str, value: &str) -> Result<()> {
+        let key = variable.strip_prefix("datafusion.runtime.").unwrap();
+
+        match key {
+            "memory_limit" => {
+                let memory_limit = Self::parse_memory_limit(value)?;
+
+                let mut state = self.state.write();
+                let mut builder =
+                    RuntimeEnvBuilder::from_runtime_env(state.runtime_env());
+                builder = builder.with_memory_limit(memory_limit, 1.0);
+                *state = SessionStateBuilder::from(state.clone())
+                    .with_runtime_env(Arc::new(builder.build()?))
+                    .build();
+            }
+            _ => {
+                return Err(DataFusionError::Plan(format!(
+                    "Unknown runtime configuration: {}",
+                    variable
+                )))
+            }
+        }
+        Ok(())
+    }
+
+    /// Parse memory limit from string to number of bytes
+    /// Supports formats like '1.5G', '100M', '512K'
+    ///
+    /// # Examples
+    /// ```
+    /// use datafusion::execution::context::SessionContext;
+    ///
+    /// assert_eq!(SessionContext::parse_memory_limit("1M").unwrap(), 1024 * 1024);
+    /// assert_eq!(SessionContext::parse_memory_limit("1.5G").unwrap(), (1.5 * 1024.0 * 1024.0 * 1024.0) as usize);
+    /// ```
+    pub fn parse_memory_limit(limit: &str) -> Result<usize> {
+        let (number, unit) = limit.split_at(limit.len() - 1);
+        let number: f64 = number.parse().map_err(|_| {
+            DataFusionError::Plan(format!(
+                "Failed to parse number from memory limit '{}'",
+                limit
+            ))
+        })?;
+
+        match unit {
+            "K" => Ok((number * 1024.0) as usize),
+            "M" => Ok((number * 1024.0 * 1024.0) as usize),
+            "G" => Ok((number * 1024.0 * 1024.0 * 1024.0) as usize),
+            _ => Err(DataFusionError::Plan(format!(
+                "Unsupported unit '{}' in memory limit '{}'",
+                unit, limit
+            ))),
+        }
+    }
+
     async fn create_custom_table(
         &self,
         cmd: &CreateExternalTable,
@@ -1833,7 +1897,6 @@ mod tests {
     use crate::test;
     use crate::test_util::{plan_and_collect, populate_csv_partitions};
     use arrow::datatypes::{DataType, TimeUnit};
-    use std::env;
     use std::error::Error;
     use std::path::PathBuf;
 
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 28f599304f8c8..597700bf8be3d 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -1348,28 +1348,30 @@ impl SessionStateBuilder {
         } = self;
 
         let config = config.unwrap_or_default();
-        let runtime_env = runtime_env.unwrap_or(Arc::new(RuntimeEnv::default()));
+        let runtime_env = runtime_env.unwrap_or_else(|| Arc::new(RuntimeEnv::default()));
 
         let mut state = SessionState {
-            session_id: session_id.unwrap_or(Uuid::new_v4().to_string()),
+            session_id: session_id.unwrap_or_else(|| Uuid::new_v4().to_string()),
             analyzer: analyzer.unwrap_or_default(),
             expr_planners: expr_planners.unwrap_or_default(),
             type_planner,
             optimizer: optimizer.unwrap_or_default(),
             physical_optimizers: physical_optimizers.unwrap_or_default(),
-            query_planner: query_planner.unwrap_or(Arc::new(DefaultQueryPlanner {})),
-            catalog_list: catalog_list
-                .unwrap_or(Arc::new(MemoryCatalogProviderList::new())
-                    as Arc<dyn CatalogProviderList>),
+            query_planner: query_planner
+                .unwrap_or_else(|| Arc::new(DefaultQueryPlanner {})),
+            catalog_list: catalog_list.unwrap_or_else(|| {
+                Arc::new(MemoryCatalogProviderList::new()) as Arc<dyn CatalogProviderList>
+            }),
             table_functions: table_functions.unwrap_or_default(),
             scalar_functions: HashMap::new(),
             aggregate_functions: HashMap::new(),
             window_functions: HashMap::new(),
             serializer_registry: serializer_registry
-                .unwrap_or(Arc::new(EmptySerializerRegistry)),
+                .unwrap_or_else(|| Arc::new(EmptySerializerRegistry)),
             file_formats: HashMap::new(),
-            table_options: table_options
-                .unwrap_or(TableOptions::default_from_session_config(config.options())),
+            table_options: table_options.unwrap_or_else(|| {
+                TableOptions::default_from_session_config(config.options())
+            }),
             config,
             execution_props: execution_props.unwrap_or_default(),
             table_factories: table_factories.unwrap_or_default(),
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index cc510bc81f1a8..928efd533ca44 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -22,7 +22,18 @@
 #![cfg_attr(docsrs, feature(doc_auto_cfg))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
-#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+//
+// Eliminate unnecessary function calls(some may be not cheap) due to `xxx_or`
+// for performance. Also avoid abusing `xxx_or_else` for readability:
+// https://github.com/apache/datafusion/issues/15802
+#![cfg_attr(
+    not(test),
+    deny(
+        clippy::clone_on_ref_ptr,
+        clippy::or_fun_call,
+        clippy::unnecessary_lazy_evaluations
+    )
+)]
 #![warn(missing_docs, clippy::needless_borrow)]
 
 //! [DataFusion] is an extensible query engine written in Rust that
@@ -872,6 +883,12 @@ doc_comment::doctest!(
     user_guide_configs
 );
 
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/user-guide/runtime_configs.md",
+    user_guide_runtime_configs
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/user-guide/crate-configuration.md",
@@ -1021,8 +1038,8 @@ doc_comment::doctest!(
 
 #[cfg(doctest)]
 doc_comment::doctest!(
-    "../../../docs/source/user-guide/sql/write_options.md",
-    user_guide_sql_write_options
+    "../../../docs/source/user-guide/sql/format_options.md",
+    user_guide_sql_format_options
 );
 
 #[cfg(doctest)]
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index f1a99a7714ac4..be24206c676c6 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -81,7 +81,7 @@ use datafusion_expr::{
     WindowFrameBound, WriteOp,
 };
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
-use datafusion_physical_expr::expressions::Literal;
+use datafusion_physical_expr::expressions::{Column, Literal};
 use datafusion_physical_expr::LexOrdering;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
@@ -1023,18 +1023,12 @@ impl DefaultPhysicalPlanner {
                         // Collect left & right field indices, the field indices are sorted in ascending order
                         let left_field_indices = cols
                             .iter()
-                            .filter_map(|c| match left_df_schema.index_of_column(c) {
-                                Ok(idx) => Some(idx),
-                                _ => None,
-                            })
+                            .filter_map(|c| left_df_schema.index_of_column(c).ok())
                             .sorted()
                             .collect::<Vec<_>>();
                         let right_field_indices = cols
                             .iter()
-                            .filter_map(|c| match right_df_schema.index_of_column(c) {
-                                Ok(idx) => Some(idx),
-                                _ => None,
-                            })
+                            .filter_map(|c| right_df_schema.index_of_column(c).ok())
                             .sorted()
                             .collect::<Vec<_>>();
 
@@ -2006,7 +2000,8 @@ impl DefaultPhysicalPlanner {
         input: &Arc<LogicalPlan>,
         expr: &[Expr],
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let input_schema = input.as_ref().schema();
+        let input_logical_schema = input.as_ref().schema();
+        let input_physical_schema = input_exec.schema();
         let physical_exprs = expr
             .iter()
             .map(|e| {
@@ -2025,7 +2020,7 @@ impl DefaultPhysicalPlanner {
                 // This depends on the invariant that logical schema field index MUST match
                 // with physical schema field index.
                 let physical_name = if let Expr::Column(col) = e {
-                    match input_schema.index_of_column(col) {
+                    match input_logical_schema.index_of_column(col) {
                         Ok(idx) => {
                             // index physical field using logical field index
                             Ok(input_exec.schema().field(idx).name().to_string())
@@ -2038,10 +2033,14 @@ impl DefaultPhysicalPlanner {
                     physical_name(e)
                 };
 
-                tuple_err((
-                    self.create_physical_expr(e, input_schema, session_state),
-                    physical_name,
-                ))
+                let physical_expr =
+                    self.create_physical_expr(e, input_logical_schema, session_state);
+
+                // Check for possible column name mismatches
+                let final_physical_expr =
+                    maybe_fix_physical_column_name(physical_expr, &input_physical_schema);
+
+                tuple_err((final_physical_expr, physical_name))
             })
             .collect::<Result<Vec<_>>>()?;
 
@@ -2061,6 +2060,40 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
     }
 }
 
+// Handle the case where the name of a physical column expression does not match the corresponding physical input fields names.
+// Physical column names are derived from the physical schema, whereas physical column expressions are derived from the logical column names.
+//
+// This is a special case that applies only to column expressions. Logical plans may slightly modify column names by appending a suffix (e.g., using ':'),
+// to avoid duplicates—since DFSchemas do not allow duplicate names. For example: `count(Int64(1)):1`.
+fn maybe_fix_physical_column_name(
+    expr: Result<Arc<dyn PhysicalExpr>>,
+    input_physical_schema: &SchemaRef,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    if let Ok(e) = &expr {
+        if let Some(column) = e.as_any().downcast_ref::<Column>() {
+            let physical_field = input_physical_schema.field(column.index());
+            let expr_col_name = column.name();
+            let physical_name = physical_field.name();
+
+            if physical_name != expr_col_name {
+                // handle edge cases where the physical_name contains ':'.
+                let colon_count = physical_name.matches(':').count();
+                let mut splits = expr_col_name.match_indices(':');
+                let split_pos = splits.nth(colon_count);
+
+                if let Some((idx, _)) = split_pos {
+                    let base_name = &expr_col_name[..idx];
+                    if base_name == physical_name {
+                        let updated_column = Column::new(physical_name, column.index());
+                        return Ok(Arc::new(updated_column));
+                    }
+                }
+            }
+        }
+    }
+    expr
+}
+
 struct OptimizationInvariantChecker<'a> {
     rule: &'a Arc<dyn PhysicalOptimizerRule + Send + Sync>,
 }
@@ -2656,6 +2689,30 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_maybe_fix_colon_in_physical_name() {
+        // The physical schema has a field name with a colon
+        let schema = Schema::new(vec![Field::new("metric:avg", DataType::Int32, false)]);
+        let schema_ref: SchemaRef = Arc::new(schema);
+
+        // What might happen after deduplication
+        let logical_col_name = "metric:avg:1";
+        let expr_with_suffix =
+            Arc::new(Column::new(logical_col_name, 0)) as Arc<dyn PhysicalExpr>;
+        let expr_result = Ok(expr_with_suffix);
+
+        // Call function under test
+        let fixed_expr =
+            maybe_fix_physical_column_name(expr_result, &schema_ref).unwrap();
+
+        // Downcast back to Column so we can check the name
+        let col = fixed_expr
+            .as_any()
+            .downcast_ref::<Column>()
+            .expect("Column");
+
+        assert_eq!(col.name(), "metric:avg");
+    }
     struct ErrorExtensionPlanner {}
 
     #[async_trait]
diff --git a/datafusion/core/src/test/object_store.rs b/datafusion/core/src/test/object_store.rs
index e1328770cabdd..8b19658bb1473 100644
--- a/datafusion/core/src/test/object_store.rs
+++ b/datafusion/core/src/test/object_store.rs
@@ -66,7 +66,7 @@ pub fn local_unpartitioned_file(path: impl AsRef<std::path::Path>) -> ObjectMeta
     ObjectMeta {
         location,
         last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(),
-        size: metadata.len() as usize,
+        size: metadata.len(),
         e_tag: None,
         version: None,
     }
@@ -166,7 +166,7 @@ impl ObjectStore for BlockingObjectStore {
     fn list(
         &self,
         prefix: Option<&Path>,
-    ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
         self.inner.list(prefix)
     }
 
diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs
index 084554eecbdb0..f5753af64d93f 100644
--- a/datafusion/core/src/test_util/parquet.rs
+++ b/datafusion/core/src/test_util/parquet.rs
@@ -102,7 +102,7 @@ impl TestParquetFile {
 
         println!("Generated test dataset with {num_rows} rows");
 
-        let size = std::fs::metadata(&path)?.len() as usize;
+        let size = std::fs::metadata(&path)?.len();
 
         let mut canonical_path = path.canonicalize()?;
 
diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index 9bcb9e41f86a9..250538b133703 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -51,6 +51,9 @@ mod serde;
 /// Run all tests that are found in the `catalog` directory
 mod catalog;
 
+/// Run all tests that are found in the `tracing` directory
+mod tracing;
+
 #[cfg(test)]
 #[ctor::ctor]
 fn init() {
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index c763d4c8de2d6..40590d74ad910 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -384,7 +384,7 @@ async fn test_fn_approx_median() -> Result<()> {
 
 #[tokio::test]
 async fn test_fn_approx_percentile_cont() -> Result<()> {
-    let expr = approx_percentile_cont(col("b"), lit(0.5), None);
+    let expr = approx_percentile_cont(col("b").sort(true, false), lit(0.5), None);
 
     let df = create_test_table().await?;
     let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
@@ -392,11 +392,26 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
     assert_snapshot!(
         batches_to_string(&batches),
         @r"
-    +---------------------------------------------+
-    | approx_percentile_cont(test.b,Float64(0.5)) |
-    +---------------------------------------------+
-    | 10                                          |
-    +---------------------------------------------+
+    +---------------------------------------------------------------------------+
+    | approx_percentile_cont(Float64(0.5)) WITHIN GROUP [test.b ASC NULLS LAST] |
+    +---------------------------------------------------------------------------+
+    | 10                                                                        |
+    +---------------------------------------------------------------------------+
+    ");
+
+    let expr = approx_percentile_cont(col("b").sort(false, false), lit(0.1), None);
+
+    let df = create_test_table().await?;
+    let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
+
+    assert_snapshot!(
+        batches_to_string(&batches),
+        @r"
+    +----------------------------------------------------------------------------+
+    | approx_percentile_cont(Float64(0.1)) WITHIN GROUP [test.b DESC NULLS LAST] |
+    +----------------------------------------------------------------------------+
+    | 100                                                                        |
+    +----------------------------------------------------------------------------+
     ");
 
     // the arg2 parameter is a complex expr, but it can be evaluated to the literal value
@@ -405,23 +420,59 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
         None::<&str>,
         "arg_2".to_string(),
     ));
-    let expr = approx_percentile_cont(col("b"), alias_expr, None);
+    let expr = approx_percentile_cont(col("b").sort(true, false), alias_expr, None);
     let df = create_test_table().await?;
     let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
 
     assert_snapshot!(
         batches_to_string(&batches),
         @r"
-    +--------------------------------------+
-    | approx_percentile_cont(test.b,arg_2) |
-    +--------------------------------------+
-    | 10                                   |
-    +--------------------------------------+
+    +--------------------------------------------------------------------+
+    | approx_percentile_cont(arg_2) WITHIN GROUP [test.b ASC NULLS LAST] |
+    +--------------------------------------------------------------------+
+    | 10                                                                 |
+    +--------------------------------------------------------------------+
+    "
+    );
+
+    let alias_expr = Expr::Alias(Alias::new(
+        cast(lit(0.1), DataType::Float32),
+        None::<&str>,
+        "arg_2".to_string(),
+    ));
+    let expr = approx_percentile_cont(col("b").sort(false, false), alias_expr, None);
+    let df = create_test_table().await?;
+    let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
+
+    assert_snapshot!(
+        batches_to_string(&batches),
+        @r"
+    +---------------------------------------------------------------------+
+    | approx_percentile_cont(arg_2) WITHIN GROUP [test.b DESC NULLS LAST] |
+    +---------------------------------------------------------------------+
+    | 100                                                                 |
+    +---------------------------------------------------------------------+
     "
     );
 
     // with number of centroids set
-    let expr = approx_percentile_cont(col("b"), lit(0.5), Some(lit(2)));
+    let expr = approx_percentile_cont(col("b").sort(true, false), lit(0.5), Some(lit(2)));
+
+    let df = create_test_table().await?;
+    let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
+
+    assert_snapshot!(
+        batches_to_string(&batches),
+        @r"
+    +------------------------------------------------------------------------------------+
+    | approx_percentile_cont(Float64(0.5),Int32(2)) WITHIN GROUP [test.b ASC NULLS LAST] |
+    +------------------------------------------------------------------------------------+
+    | 30                                                                                 |
+    +------------------------------------------------------------------------------------+
+    ");
+
+    let expr =
+        approx_percentile_cont(col("b").sort(false, false), lit(0.1), Some(lit(2)));
 
     let df = create_test_table().await?;
     let batches = df.aggregate(vec![], vec![expr]).unwrap().collect().await?;
@@ -429,11 +480,11 @@ async fn test_fn_approx_percentile_cont() -> Result<()> {
     assert_snapshot!(
         batches_to_string(&batches),
         @r"
-    +------------------------------------------------------+
-    | approx_percentile_cont(test.b,Float64(0.5),Int32(2)) |
-    +------------------------------------------------------+
-    | 30                                                   |
-    +------------------------------------------------------+
+    +-------------------------------------------------------------------------------------+
+    | approx_percentile_cont(Float64(0.1),Int32(2)) WITHIN GROUP [test.b DESC NULLS LAST] |
+    +-------------------------------------------------------------------------------------+
+    | 69                                                                                  |
+    +-------------------------------------------------------------------------------------+
     ");
 
     Ok(())
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index b5923269ab8ba..1855a512048d6 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -5206,6 +5206,40 @@ fn union_fields() -> UnionFields {
     .collect()
 }
 
+#[tokio::test]
+async fn union_literal_is_null_and_not_null() -> Result<()> {
+    let str_array_1 = StringArray::from(vec![None::<String>]);
+    let str_array_2 = StringArray::from(vec![Some("a")]);
+
+    let batch_1 =
+        RecordBatch::try_from_iter(vec![("arr", Arc::new(str_array_1) as ArrayRef)])?;
+    let batch_2 =
+        RecordBatch::try_from_iter(vec![("arr", Arc::new(str_array_2) as ArrayRef)])?;
+
+    let ctx = SessionContext::new();
+    ctx.register_batch("union_batch_1", batch_1)?;
+    ctx.register_batch("union_batch_2", batch_2)?;
+
+    let df1 = ctx.table("union_batch_1").await?;
+    let df2 = ctx.table("union_batch_2").await?;
+
+    let batches = df1.union(df2)?.collect().await?;
+    let schema = batches[0].schema();
+
+    for batch in batches {
+        // Verify schema is the same for all batches
+        if !schema.contains(&batch.schema()) {
+            return Err(DataFusionError::Internal(format!(
+                "Schema mismatch. Previously had\n{:#?}\n\nGot:\n{:#?}",
+                &schema,
+                batch.schema()
+            )));
+        }
+    }
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn sparse_union_is_null() {
     // union of [{A=1}, {A=}, {B=3.2}, {B=}, {C="a"}, {C=}]
@@ -5477,6 +5511,64 @@ async fn boolean_dictionary_as_filter() {
     );
 }
 
+#[tokio::test]
+async fn test_union_by_name() -> Result<()> {
+    let df = create_test_table("test")
+        .await?
+        .select(vec![col("a"), col("b"), lit(1).alias("c")])?
+        .alias("table_alias")?;
+
+    let df2 = df.clone().select_columns(&["c", "b", "a"])?;
+    let result = df.union_by_name(df2)?.sort_by(vec![col("a"), col("b")])?;
+
+    assert_snapshot!(
+        batches_to_sort_string(&result.collect().await?),
+        @r"
+    +-----------+-----+---+
+    | a         | b   | c |
+    +-----------+-----+---+
+    | 123AbcDef | 100 | 1 |
+    | 123AbcDef | 100 | 1 |
+    | CBAdef    | 10  | 1 |
+    | CBAdef    | 10  | 1 |
+    | abc123    | 10  | 1 |
+    | abc123    | 10  | 1 |
+    | abcDEF    | 1   | 1 |
+    | abcDEF    | 1   | 1 |
+    +-----------+-----+---+
+    "
+    );
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_union_by_name_distinct() -> Result<()> {
+    let df = create_test_table("test")
+        .await?
+        .select(vec![col("a"), col("b"), lit(1).alias("c")])?
+        .alias("table_alias")?;
+
+    let df2 = df.clone().select_columns(&["c", "b", "a"])?;
+    let result = df
+        .union_by_name_distinct(df2)?
+        .sort_by(vec![col("a"), col("b")])?;
+
+    assert_snapshot!(
+        batches_to_sort_string(&result.collect().await?),
+        @r"
+    +-----------+-----+---+
+    | a         | b   | c |
+    +-----------+-----+---+
+    | 123AbcDef | 100 | 1 |
+    | CBAdef    | 10  | 1 |
+    | abc123    | 10  | 1 |
+    | abcDEF    | 1   | 1 |
+    +-----------+-----+---+
+    "
+    );
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_alias() -> Result<()> {
     let df = create_test_table("test")
diff --git a/datafusion/core/tests/execution/logical_plan.rs b/datafusion/core/tests/execution/logical_plan.rs
index b30636ddf6a81..fdee6fd5dbbce 100644
--- a/datafusion/core/tests/execution/logical_plan.rs
+++ b/datafusion/core/tests/execution/logical_plan.rs
@@ -19,15 +19,19 @@
 //! create them and depend on them. Test executable semantics of logical plans.
 
 use arrow::array::Int64Array;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, Schema};
+use datafusion::datasource::{provider_as_source, ViewTable};
 use datafusion::execution::session_state::SessionStateBuilder;
-use datafusion_common::{Column, DFSchema, Result, ScalarValue, Spans};
+use datafusion_common::{Column, DFSchema, DFSchemaRef, Result, ScalarValue, Spans};
 use datafusion_execution::TaskContext;
 use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
 use datafusion_expr::logical_plan::{LogicalPlan, Values};
-use datafusion_expr::{Aggregate, AggregateUDF, Expr};
+use datafusion_expr::{
+    Aggregate, AggregateUDF, EmptyRelation, Expr, LogicalPlanBuilder, UNNAMED_TABLE,
+};
 use datafusion_functions_aggregate::count::Count;
 use datafusion_physical_plan::collect;
+use insta::assert_snapshot;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::ops::Deref;
@@ -96,3 +100,37 @@ where
     };
     element
 }
+
+#[test]
+fn inline_scan_projection_test() -> Result<()> {
+    let name = UNNAMED_TABLE;
+    let column = "a";
+
+    let schema = Schema::new(vec![
+        Field::new("a", DataType::Int32, false),
+        Field::new("b", DataType::Int32, false),
+    ]);
+    let projection = vec![schema.index_of(column)?];
+
+    let provider = ViewTable::new(
+        LogicalPlan::EmptyRelation(EmptyRelation {
+            produce_one_row: false,
+            schema: DFSchemaRef::new(DFSchema::try_from(schema)?),
+        }),
+        None,
+    );
+    let source = provider_as_source(Arc::new(provider));
+
+    let plan = LogicalPlanBuilder::scan(name, source, Some(projection))?.build()?;
+
+    assert_snapshot!(
+        format!("{plan}"),
+        @r"
+    SubqueryAlias: ?table?
+      Projection: a
+        EmptyRelation
+    "
+    );
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 7bb21725ef401..34e0487f312fb 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -547,9 +547,9 @@ fn test_simplify_with_cycle_count(
     };
     let simplifier = ExprSimplifier::new(info);
     let (simplified_expr, count) = simplifier
-        .simplify_with_cycle_count(input_expr.clone())
+        .simplify_with_cycle_count_transformed(input_expr.clone())
         .expect("successfully evaluated");
-
+    let simplified_expr = simplified_expr.data;
     assert_eq!(
         simplified_expr, expected_expr,
         "Mismatch evaluating {input_expr}\n  Expected:{expected_expr}\n  Got:{simplified_expr}"
diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index dcf477135a377..ff3b66986ced9 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -18,16 +18,17 @@
 use std::sync::Arc;
 
 use crate::fuzz_cases::aggregation_fuzzer::{
-    AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
+    AggregationFuzzerBuilder, DatasetGeneratorConfig, QueryBuilder,
 };
 
-use arrow::array::{types::Int64Type, Array, ArrayRef, AsArray, Int64Array, RecordBatch};
-use arrow::compute::{concat_batches, SortOptions};
-use arrow::datatypes::{
-    DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
+use arrow::array::{
+    types::Int64Type, Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch,
+    StringArray,
 };
+use arrow::compute::{concat_batches, SortOptions};
+use arrow::datatypes::DataType;
 use arrow::util::pretty::pretty_format_batches;
+use arrow_schema::{Field, Schema, SchemaRef};
 use datafusion::common::Result;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
@@ -42,14 +43,20 @@ use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor}
 use datafusion_common::HashMap;
 use datafusion_common_runtime::JoinSet;
 use datafusion_functions_aggregate::sum::sum_udaf;
-use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr::expressions::{col, lit, Column};
 use datafusion_physical_expr::PhysicalSortExpr;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::InputOrderMode;
 use test_utils::{add_empty_batches, StringBatchGenerator};
 
+use datafusion_execution::memory_pool::FairSpillPool;
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+use datafusion_execution::TaskContext;
+use datafusion_physical_plan::metrics::MetricValue;
 use rand::rngs::StdRng;
-use rand::{thread_rng, Rng, SeedableRng};
+use rand::{random, thread_rng, Rng, SeedableRng};
+
+use super::record_batch_generator::get_supported_types_columns;
 
 // ========================================================================
 //  The new aggregation fuzz tests based on [`AggregationFuzzer`]
@@ -113,6 +120,32 @@ async fn test_first_val() {
         .await;
 }
 
+#[tokio::test(flavor = "multi_thread")]
+async fn test_last_val() {
+    let mut data_gen_config = baseline_config();
+
+    for i in 0..data_gen_config.columns.len() {
+        if data_gen_config.columns[i].get_max_num_distinct().is_none() {
+            data_gen_config.columns[i] = data_gen_config.columns[i]
+                .clone()
+                // Minimize the chance of identical values in the order by columns to make the test more stable
+                .with_max_num_distinct(usize::MAX);
+        }
+    }
+
+    let query_builder = QueryBuilder::new()
+        .with_table_name("fuzz_table")
+        .with_aggregate_function("last_value")
+        .with_aggregate_arguments(data_gen_config.all_columns())
+        .set_group_by_columns(data_gen_config.all_columns());
+
+    AggregationFuzzerBuilder::from(data_gen_config)
+        .add_query_builder(query_builder)
+        .build()
+        .run()
+        .await;
+}
+
 #[tokio::test(flavor = "multi_thread")]
 async fn test_max() {
     let data_gen_config = baseline_config();
@@ -201,81 +234,7 @@ async fn test_median() {
 /// 1. structured types
 fn baseline_config() -> DatasetGeneratorConfig {
     let mut rng = thread_rng();
-    let columns = vec![
-        ColumnDescr::new("i8", DataType::Int8),
-        ColumnDescr::new("i16", DataType::Int16),
-        ColumnDescr::new("i32", DataType::Int32),
-        ColumnDescr::new("i64", DataType::Int64),
-        ColumnDescr::new("u8", DataType::UInt8),
-        ColumnDescr::new("u16", DataType::UInt16),
-        ColumnDescr::new("u32", DataType::UInt32),
-        ColumnDescr::new("u64", DataType::UInt64),
-        ColumnDescr::new("date32", DataType::Date32),
-        ColumnDescr::new("date64", DataType::Date64),
-        ColumnDescr::new("time32_s", DataType::Time32(TimeUnit::Second)),
-        ColumnDescr::new("time32_ms", DataType::Time32(TimeUnit::Millisecond)),
-        ColumnDescr::new("time64_us", DataType::Time64(TimeUnit::Microsecond)),
-        ColumnDescr::new("time64_ns", DataType::Time64(TimeUnit::Nanosecond)),
-        // `None` is passed in here however when generating the array, it will generate
-        // random timezones.
-        ColumnDescr::new("timestamp_s", DataType::Timestamp(TimeUnit::Second, None)),
-        ColumnDescr::new(
-            "timestamp_ms",
-            DataType::Timestamp(TimeUnit::Millisecond, None),
-        ),
-        ColumnDescr::new(
-            "timestamp_us",
-            DataType::Timestamp(TimeUnit::Microsecond, None),
-        ),
-        ColumnDescr::new(
-            "timestamp_ns",
-            DataType::Timestamp(TimeUnit::Nanosecond, None),
-        ),
-        ColumnDescr::new("float32", DataType::Float32),
-        ColumnDescr::new("float64", DataType::Float64),
-        ColumnDescr::new(
-            "interval_year_month",
-            DataType::Interval(IntervalUnit::YearMonth),
-        ),
-        ColumnDescr::new(
-            "interval_day_time",
-            DataType::Interval(IntervalUnit::DayTime),
-        ),
-        ColumnDescr::new(
-            "interval_month_day_nano",
-            DataType::Interval(IntervalUnit::MonthDayNano),
-        ),
-        // begin decimal columns
-        ColumnDescr::new("decimal128", {
-            // Generate valid precision and scale for Decimal128 randomly.
-            let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
-            // It's safe to cast `precision` to i8 type directly.
-            let scale: i8 = rng.gen_range(
-                i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE),
-            );
-            DataType::Decimal128(precision, scale)
-        }),
-        ColumnDescr::new("decimal256", {
-            // Generate valid precision and scale for Decimal256 randomly.
-            let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION);
-            // It's safe to cast `precision` to i8 type directly.
-            let scale: i8 = rng.gen_range(
-                i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE),
-            );
-            DataType::Decimal256(precision, scale)
-        }),
-        // begin string columns
-        ColumnDescr::new("utf8", DataType::Utf8),
-        ColumnDescr::new("largeutf8", DataType::LargeUtf8),
-        ColumnDescr::new("utf8view", DataType::Utf8View),
-        // low cardinality columns
-        ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
-        ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
-        ColumnDescr::new("bool", DataType::Boolean),
-        ColumnDescr::new("binary", DataType::Binary),
-        ColumnDescr::new("large_binary", DataType::LargeBinary),
-        ColumnDescr::new("binaryview", DataType::BinaryView),
-    ];
+    let columns = get_supported_types_columns(rng.gen());
 
     let min_num_rows = 512;
     let max_num_rows = 1024;
@@ -663,3 +622,134 @@ fn extract_result_counts(results: Vec<RecordBatch>) -> HashMap<Option<String>, i
     }
     output
 }
+
+fn assert_spill_count_metric(expect_spill: bool, single_aggregate: Arc<AggregateExec>) {
+    if let Some(metrics_set) = single_aggregate.metrics() {
+        let mut spill_count = 0;
+
+        // Inspect metrics for SpillCount
+        for metric in metrics_set.iter() {
+            if let MetricValue::SpillCount(count) = metric.value() {
+                spill_count = count.value();
+                break;
+            }
+        }
+
+        if expect_spill && spill_count == 0 {
+            panic!("Expected spill but SpillCount metric not found or SpillCount was 0.");
+        } else if !expect_spill && spill_count > 0 {
+            panic!("Expected no spill but found SpillCount metric with value greater than 0.");
+        }
+    } else {
+        panic!("No metrics returned from the operator; cannot verify spilling.");
+    }
+}
+
+// Fix for https://github.com/apache/datafusion/issues/15530
+#[tokio::test]
+async fn test_single_mode_aggregate_with_spill() -> Result<()> {
+    let scan_schema = Arc::new(Schema::new(vec![
+        Field::new("col_0", DataType::Int64, true),
+        Field::new("col_1", DataType::Utf8, true),
+        Field::new("col_2", DataType::Utf8, true),
+        Field::new("col_3", DataType::Utf8, true),
+        Field::new("col_4", DataType::Utf8, true),
+        Field::new("col_5", DataType::Int32, true),
+        Field::new("col_6", DataType::Utf8, true),
+        Field::new("col_7", DataType::Utf8, true),
+        Field::new("col_8", DataType::Utf8, true),
+    ]));
+
+    let group_by = PhysicalGroupBy::new_single(vec![
+        (Arc::new(Column::new("col_1", 1)), "col_1".to_string()),
+        (Arc::new(Column::new("col_7", 7)), "col_7".to_string()),
+        (Arc::new(Column::new("col_0", 0)), "col_0".to_string()),
+        (Arc::new(Column::new("col_8", 8)), "col_8".to_string()),
+    ]);
+
+    fn generate_int64_array() -> ArrayRef {
+        Arc::new(Int64Array::from_iter_values(
+            (0..1024).map(|_| random::<i64>()),
+        ))
+    }
+    fn generate_int32_array() -> ArrayRef {
+        Arc::new(Int32Array::from_iter_values(
+            (0..1024).map(|_| random::<i32>()),
+        ))
+    }
+
+    fn generate_string_array() -> ArrayRef {
+        Arc::new(StringArray::from(
+            (0..1024)
+                .map(|_| -> String {
+                    thread_rng()
+                        .sample_iter::<char, _>(rand::distributions::Standard)
+                        .take(5)
+                        .collect()
+                })
+                .collect::<Vec<_>>(),
+        ))
+    }
+
+    fn generate_record_batch(schema: &SchemaRef) -> Result<RecordBatch> {
+        RecordBatch::try_new(
+            Arc::clone(schema),
+            vec![
+                generate_int64_array(),
+                generate_string_array(),
+                generate_string_array(),
+                generate_string_array(),
+                generate_string_array(),
+                generate_int32_array(),
+                generate_string_array(),
+                generate_string_array(),
+                generate_string_array(),
+            ],
+        )
+        .map_err(|err| err.into())
+    }
+
+    let aggregate_expressions = vec![Arc::new(
+        AggregateExprBuilder::new(sum_udaf(), vec![lit(1i64)])
+            .schema(Arc::clone(&scan_schema))
+            .alias("SUM(1i64)")
+            .build()?,
+    )];
+
+    let batches = (0..5)
+        .map(|_| generate_record_batch(&scan_schema))
+        .collect::<Result<Vec<_>>>()?;
+
+    let plan: Arc<dyn ExecutionPlan> =
+        MemorySourceConfig::try_new_exec(&[batches], Arc::clone(&scan_schema), None)
+            .unwrap();
+
+    let single_aggregate = Arc::new(AggregateExec::try_new(
+        AggregateMode::Single,
+        group_by,
+        aggregate_expressions.clone(),
+        vec![None; aggregate_expressions.len()],
+        plan,
+        Arc::clone(&scan_schema),
+    )?);
+
+    let memory_pool = Arc::new(FairSpillPool::new(250000));
+    let task_ctx = Arc::new(
+        TaskContext::default()
+            .with_session_config(SessionConfig::new().with_batch_size(248))
+            .with_runtime(Arc::new(
+                RuntimeEnvBuilder::new()
+                    .with_memory_pool(memory_pool)
+                    .build()?,
+            )),
+    );
+
+    datafusion_physical_plan::common::collect(
+        single_aggregate.execute(0, Arc::clone(&task_ctx))?,
+    )
+    .await?;
+
+    assert_spill_count_metric(true, single_aggregate);
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
index 8a8aa180b3c44..3c9fe2917251c 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
@@ -43,7 +43,7 @@ use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset;
 ///   - `skip_partial parameters`
 ///   - hint `sorted` or not
 ///   - `spilling` or not (TODO, I think a special `MemoryPool` may be needed
-///      to support this)
+///     to support this)
 ///
 pub struct SessionContextGenerator {
     /// Current testing dataset
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
index d61835a0804ed..82bfe199234ef 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
@@ -15,34 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::sync::Arc;
-
-use arrow::array::{ArrayRef, RecordBatch};
-use arrow::datatypes::{
-    BinaryType, BinaryViewType, BooleanType, ByteArrayType, ByteViewType, DataType,
-    Date32Type, Date64Type, Decimal128Type, Decimal256Type, Field, Float32Type,
-    Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, IntervalDayTimeType,
-    IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, LargeBinaryType,
-    LargeUtf8Type, Schema, StringViewType, Time32MillisecondType, Time32SecondType,
-    Time64MicrosecondType, Time64NanosecondType, TimeUnit, TimestampMicrosecondType,
-    TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type, Utf8Type,
-};
-use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
+use arrow::array::RecordBatch;
+use arrow::datatypes::DataType;
+use datafusion_common::Result;
 use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::sorts::sort::sort_batch;
-use rand::{
-    rngs::{StdRng, ThreadRng},
-    thread_rng, Rng, SeedableRng,
-};
-use test_utils::{
-    array_gen::{
-        BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator,
-        PrimitiveArrayGenerator, StringArrayGenerator,
-    },
-    stagger_batch,
-};
+use test_utils::stagger_batch;
+
+use crate::fuzz_cases::record_batch_generator::{ColumnDescr, RecordBatchGenerator};
 
 /// Config for Dataset generator
 ///
@@ -52,12 +33,12 @@ use test_utils::{
 ///     when you call `generate` function
 ///         
 ///   - `rows_num_range`, the number of rows in the datasets will be randomly generated
-///      within this range
+///     within this range
 ///
 ///   - `sort_keys`, if `sort_keys` are defined, when you call the `generate` function, the generator
-///      will generate one `base dataset` firstly. Then the `base dataset` will be sorted
-///      based on each `sort_key` respectively. And finally `len(sort_keys) + 1` datasets
-///      will be returned
+///     will generate one `base dataset` firstly. Then the `base dataset` will be sorted
+///     based on each `sort_key` respectively. And finally `len(sort_keys) + 1` datasets
+///     will be returned
 ///
 #[derive(Debug, Clone)]
 pub struct DatasetGeneratorConfig {
@@ -154,7 +135,7 @@ impl DatasetGenerator {
         }
     }
 
-    pub fn generate(&self) -> Result<Vec<Dataset>> {
+    pub fn generate(&mut self) -> Result<Vec<Dataset>> {
         let mut datasets = Vec::with_capacity(self.sort_keys_set.len() + 1);
 
         // Generate the base batch (unsorted)
@@ -204,553 +185,6 @@ impl Dataset {
     }
 }
 
-#[derive(Debug, Clone)]
-pub struct ColumnDescr {
-    /// Column name
-    name: String,
-
-    /// Data type of this column
-    column_type: DataType,
-
-    /// The maximum number of distinct values in this column.
-    ///
-    /// See [`ColumnDescr::with_max_num_distinct`] for more information
-    max_num_distinct: Option<usize>,
-}
-
-impl ColumnDescr {
-    #[inline]
-    pub fn new(name: &str, column_type: DataType) -> Self {
-        Self {
-            name: name.to_string(),
-            column_type,
-            max_num_distinct: None,
-        }
-    }
-
-    pub fn get_max_num_distinct(&self) -> Option<usize> {
-        self.max_num_distinct
-    }
-
-    /// set the maximum number of distinct values in this column
-    ///
-    /// If `None`, the number of distinct values is randomly selected between 1
-    /// and the number of rows.
-    pub fn with_max_num_distinct(mut self, num_distinct: usize) -> Self {
-        self.max_num_distinct = Some(num_distinct);
-        self
-    }
-}
-
-/// Record batch generator
-struct RecordBatchGenerator {
-    min_rows_nun: usize,
-
-    max_rows_num: usize,
-
-    columns: Vec<ColumnDescr>,
-
-    candidate_null_pcts: Vec<f64>,
-}
-
-macro_rules! generate_string_array {
-    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{
-        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
-        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-        let max_len = $BATCH_GEN_RNG.gen_range(1..50);
-
-        let mut generator = StringArrayGenerator {
-            max_len,
-            num_strings: $NUM_ROWS,
-            num_distinct_strings: $MAX_NUM_DISTINCT,
-            null_pct,
-            rng: $ARRAY_GEN_RNG,
-        };
-
-        match $ARROW_TYPE::DATA_TYPE {
-            DataType::Utf8 => generator.gen_data::<i32>(),
-            DataType::LargeUtf8 => generator.gen_data::<i64>(),
-            DataType::Utf8View => generator.gen_string_view(),
-            _ => unreachable!(),
-        }
-    }};
-}
-
-macro_rules! generate_decimal_array {
-    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT: expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $PRECISION: ident, $SCALE: ident, $ARROW_TYPE: ident) => {{
-        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
-        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-
-        let mut generator = DecimalArrayGenerator {
-            precision: $PRECISION,
-            scale: $SCALE,
-            num_decimals: $NUM_ROWS,
-            num_distinct_decimals: $MAX_NUM_DISTINCT,
-            null_pct,
-            rng: $ARRAY_GEN_RNG,
-        };
-
-        generator.gen_data::<$ARROW_TYPE>()
-    }};
-}
-
-// Generating `BooleanArray` due to it being a special type in Arrow (bit-packed)
-macro_rules! generate_boolean_array {
-    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{
-        // Select a null percentage from the candidate percentages
-        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
-        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-
-        let num_distinct_booleans = if $MAX_NUM_DISTINCT >= 2 { 2 } else { 1 };
-
-        let mut generator = BooleanArrayGenerator {
-            num_booleans: $NUM_ROWS,
-            num_distinct_booleans,
-            null_pct,
-            rng: $ARRAY_GEN_RNG,
-        };
-
-        generator.gen_data::<$ARROW_TYPE>()
-    }};
-}
-
-macro_rules! generate_primitive_array {
-    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {{
-        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
-        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-
-        let mut generator = PrimitiveArrayGenerator {
-            num_primitives: $NUM_ROWS,
-            num_distinct_primitives: $MAX_NUM_DISTINCT,
-            null_pct,
-            rng: $ARRAY_GEN_RNG,
-        };
-
-        generator.gen_data::<$ARROW_TYPE>()
-    }};
-}
-
-macro_rules! generate_binary_array {
-    (
-        $SELF:ident,
-        $NUM_ROWS:ident,
-        $MAX_NUM_DISTINCT:expr,
-        $BATCH_GEN_RNG:ident,
-        $ARRAY_GEN_RNG:ident,
-        $ARROW_TYPE:ident
-    ) => {{
-        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
-        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
-
-        let max_len = $BATCH_GEN_RNG.gen_range(1..100);
-
-        let mut generator = BinaryArrayGenerator {
-            max_len,
-            num_binaries: $NUM_ROWS,
-            num_distinct_binaries: $MAX_NUM_DISTINCT,
-            null_pct,
-            rng: $ARRAY_GEN_RNG,
-        };
-
-        match $ARROW_TYPE::DATA_TYPE {
-            DataType::Binary => generator.gen_data::<i32>(),
-            DataType::LargeBinary => generator.gen_data::<i64>(),
-            DataType::BinaryView => generator.gen_binary_view(),
-            _ => unreachable!(),
-        }
-    }};
-}
-
-impl RecordBatchGenerator {
-    fn new(min_rows_nun: usize, max_rows_num: usize, columns: Vec<ColumnDescr>) -> Self {
-        let candidate_null_pcts = vec![0.0, 0.01, 0.1, 0.5];
-
-        Self {
-            min_rows_nun,
-            max_rows_num,
-            columns,
-            candidate_null_pcts,
-        }
-    }
-
-    fn generate(&self) -> Result<RecordBatch> {
-        let mut rng = thread_rng();
-        let num_rows = rng.gen_range(self.min_rows_nun..=self.max_rows_num);
-        let array_gen_rng = StdRng::from_seed(rng.gen());
-
-        // Build arrays
-        let mut arrays = Vec::with_capacity(self.columns.len());
-        for col in self.columns.iter() {
-            let array = self.generate_array_of_type(
-                col,
-                num_rows,
-                &mut rng,
-                array_gen_rng.clone(),
-            );
-            arrays.push(array);
-        }
-
-        // Build schema
-        let fields = self
-            .columns
-            .iter()
-            .map(|col| Field::new(col.name.clone(), col.column_type.clone(), true))
-            .collect::<Vec<_>>();
-        let schema = Arc::new(Schema::new(fields));
-
-        RecordBatch::try_new(schema, arrays).map_err(|e| arrow_datafusion_err!(e))
-    }
-
-    fn generate_array_of_type(
-        &self,
-        col: &ColumnDescr,
-        num_rows: usize,
-        batch_gen_rng: &mut ThreadRng,
-        array_gen_rng: StdRng,
-    ) -> ArrayRef {
-        let num_distinct = if num_rows > 1 {
-            batch_gen_rng.gen_range(1..num_rows)
-        } else {
-            num_rows
-        };
-        // cap to at most the num_distinct values
-        let max_num_distinct = col
-            .max_num_distinct
-            .map(|max| num_distinct.min(max))
-            .unwrap_or(num_distinct);
-
-        match col.column_type {
-            DataType::Int8 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Int8Type
-                )
-            }
-            DataType::Int16 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Int16Type
-                )
-            }
-            DataType::Int32 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Int32Type
-                )
-            }
-            DataType::Int64 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Int64Type
-                )
-            }
-            DataType::UInt8 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    UInt8Type
-                )
-            }
-            DataType::UInt16 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    UInt16Type
-                )
-            }
-            DataType::UInt32 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    UInt32Type
-                )
-            }
-            DataType::UInt64 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    UInt64Type
-                )
-            }
-            DataType::Float32 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Float32Type
-                )
-            }
-            DataType::Float64 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Float64Type
-                )
-            }
-            DataType::Date32 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Date32Type
-                )
-            }
-            DataType::Date64 => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Date64Type
-                )
-            }
-            DataType::Time32(TimeUnit::Second) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Time32SecondType
-                )
-            }
-            DataType::Time32(TimeUnit::Millisecond) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Time32MillisecondType
-                )
-            }
-            DataType::Time64(TimeUnit::Microsecond) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Time64MicrosecondType
-                )
-            }
-            DataType::Time64(TimeUnit::Nanosecond) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Time64NanosecondType
-                )
-            }
-            DataType::Interval(IntervalUnit::YearMonth) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    IntervalYearMonthType
-                )
-            }
-            DataType::Interval(IntervalUnit::DayTime) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    IntervalDayTimeType
-                )
-            }
-            DataType::Interval(IntervalUnit::MonthDayNano) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    IntervalMonthDayNanoType
-                )
-            }
-            DataType::Timestamp(TimeUnit::Second, None) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    TimestampSecondType
-                )
-            }
-            DataType::Timestamp(TimeUnit::Millisecond, None) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    TimestampMillisecondType
-                )
-            }
-            DataType::Timestamp(TimeUnit::Microsecond, None) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    TimestampMicrosecondType
-                )
-            }
-            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
-                generate_primitive_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    TimestampNanosecondType
-                )
-            }
-            DataType::Binary => {
-                generate_binary_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    BinaryType
-                )
-            }
-            DataType::LargeBinary => {
-                generate_binary_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    LargeBinaryType
-                )
-            }
-            DataType::BinaryView => {
-                generate_binary_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    BinaryViewType
-                )
-            }
-            DataType::Decimal128(precision, scale) => {
-                generate_decimal_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    precision,
-                    scale,
-                    Decimal128Type
-                )
-            }
-            DataType::Decimal256(precision, scale) => {
-                generate_decimal_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    precision,
-                    scale,
-                    Decimal256Type
-                )
-            }
-            DataType::Utf8 => {
-                generate_string_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    Utf8Type
-                )
-            }
-            DataType::LargeUtf8 => {
-                generate_string_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    LargeUtf8Type
-                )
-            }
-            DataType::Utf8View => {
-                generate_string_array!(
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    StringViewType
-                )
-            }
-            DataType::Boolean => {
-                generate_boolean_array! {
-                    self,
-                    num_rows,
-                    max_num_distinct,
-                    batch_gen_rng,
-                    array_gen_rng,
-                    BooleanType
-                }
-            }
-            _ => {
-                panic!("Unsupported data generator type: {}", col.column_type)
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 mod test {
     use arrow::array::UInt32Array;
@@ -777,7 +211,7 @@ mod test {
             sort_keys_set: vec![vec!["b".to_string()]],
         };
 
-        let gen = DatasetGenerator::new(config);
+        let mut gen = DatasetGenerator::new(config);
         let datasets = gen.generate().unwrap();
 
         // Should Generate 2 datasets
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
index bb24fb554d65a..53e9288ab4af6 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
@@ -164,7 +164,7 @@ struct QueryGroup {
 
 impl AggregationFuzzer {
     /// Run the fuzzer, printing an error and panicking if any of the tasks fail
-    pub async fn run(&self) {
+    pub async fn run(&mut self) {
         let res = self.run_inner().await;
 
         if let Err(e) = res {
@@ -176,7 +176,7 @@ impl AggregationFuzzer {
         }
     }
 
-    async fn run_inner(&self) -> Result<()> {
+    async fn run_inner(&mut self) -> Result<()> {
         let mut join_set = JoinSet::new();
         let mut rng = thread_rng();
 
@@ -270,7 +270,7 @@ impl AggregationFuzzer {
 ///   - `sql`, the selected test sql
 ///
 ///   - `dataset_ref`, the input dataset, store it for error reported when found
-///      the inconsistency between the one for `ctx` and `expected results`.
+///     the inconsistency between the one for `ctx` and `expected results`.
 ///
 struct AggregationFuzzTestTask {
     /// Generated session context in current test case
@@ -503,7 +503,9 @@ impl QueryBuilder {
             let distinct = if *is_distinct { "DISTINCT " } else { "" };
             alias_gen += 1;
 
-            let (order_by, null_opt) = if function_name.eq("first_value") {
+            let (order_by, null_opt) = if function_name.eq("first_value")
+                || function_name.eq("last_value")
+            {
                 (
                     self.order_by(&order_by_black_list), /* Among the order by columns, at most one group by column can be included to avoid all order by column values being identical */
                     self.null_opt(),
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs
index 1e42ac1f4b30b..bfb3bb096326f 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/mod.rs
@@ -44,7 +44,8 @@ mod context_generator;
 mod data_generator;
 mod fuzzer;
 
-pub use data_generator::{ColumnDescr, DatasetGeneratorConfig};
+pub use crate::fuzz_cases::record_batch_generator::ColumnDescr;
+pub use data_generator::DatasetGeneratorConfig;
 pub use fuzzer::*;
 
 #[derive(Debug)]
diff --git a/datafusion/core/tests/fuzz_cases/mod.rs b/datafusion/core/tests/fuzz_cases/mod.rs
index d5511e2970f4d..8ccc2a5bc1310 100644
--- a/datafusion/core/tests/fuzz_cases/mod.rs
+++ b/datafusion/core/tests/fuzz_cases/mod.rs
@@ -20,6 +20,7 @@ mod distinct_count_string_fuzz;
 mod join_fuzz;
 mod merge_fuzz;
 mod sort_fuzz;
+mod sort_query_fuzz;
 
 mod aggregation_fuzzer;
 mod equivalence;
@@ -29,3 +30,6 @@ mod pruning;
 mod limit_fuzz;
 mod sort_preserving_repartition_fuzz;
 mod window_fuzz;
+
+// Utility modules
+mod record_batch_generator;
diff --git a/datafusion/core/tests/fuzz_cases/record_batch_generator.rs b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
new file mode 100644
index 0000000000000..9a62a6397d822
--- /dev/null
+++ b/datafusion/core/tests/fuzz_cases/record_batch_generator.rs
@@ -0,0 +1,644 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, RecordBatch};
+use arrow::datatypes::{
+    BooleanType, DataType, Date32Type, Date64Type, Decimal128Type, Decimal256Type, Field,
+    Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
+    IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
+    Schema, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
+    Time64NanosecondType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
+    TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
+    UInt8Type,
+};
+use arrow_schema::{
+    DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
+    DECIMAL256_MAX_SCALE,
+};
+use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
+use rand::{rngs::StdRng, thread_rng, Rng, SeedableRng};
+use test_utils::array_gen::{
+    BinaryArrayGenerator, BooleanArrayGenerator, DecimalArrayGenerator,
+    PrimitiveArrayGenerator, StringArrayGenerator,
+};
+
+/// Columns that are supported by the record batch generator
+/// The RNG is used to generate the precision and scale for the decimal columns, thread
+/// RNG is not used because this is used in fuzzing and deterministic results are preferred
+pub fn get_supported_types_columns(rng_seed: u64) -> Vec<ColumnDescr> {
+    let mut rng = StdRng::seed_from_u64(rng_seed);
+    vec![
+        ColumnDescr::new("i8", DataType::Int8),
+        ColumnDescr::new("i16", DataType::Int16),
+        ColumnDescr::new("i32", DataType::Int32),
+        ColumnDescr::new("i64", DataType::Int64),
+        ColumnDescr::new("u8", DataType::UInt8),
+        ColumnDescr::new("u16", DataType::UInt16),
+        ColumnDescr::new("u32", DataType::UInt32),
+        ColumnDescr::new("u64", DataType::UInt64),
+        ColumnDescr::new("date32", DataType::Date32),
+        ColumnDescr::new("date64", DataType::Date64),
+        ColumnDescr::new("time32_s", DataType::Time32(TimeUnit::Second)),
+        ColumnDescr::new("time32_ms", DataType::Time32(TimeUnit::Millisecond)),
+        ColumnDescr::new("time64_us", DataType::Time64(TimeUnit::Microsecond)),
+        ColumnDescr::new("time64_ns", DataType::Time64(TimeUnit::Nanosecond)),
+        ColumnDescr::new("timestamp_s", DataType::Timestamp(TimeUnit::Second, None)),
+        ColumnDescr::new(
+            "timestamp_ms",
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+        ),
+        ColumnDescr::new(
+            "timestamp_us",
+            DataType::Timestamp(TimeUnit::Microsecond, None),
+        ),
+        ColumnDescr::new(
+            "timestamp_ns",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+        ),
+        ColumnDescr::new("float32", DataType::Float32),
+        ColumnDescr::new("float64", DataType::Float64),
+        ColumnDescr::new(
+            "interval_year_month",
+            DataType::Interval(IntervalUnit::YearMonth),
+        ),
+        ColumnDescr::new(
+            "interval_day_time",
+            DataType::Interval(IntervalUnit::DayTime),
+        ),
+        ColumnDescr::new(
+            "interval_month_day_nano",
+            DataType::Interval(IntervalUnit::MonthDayNano),
+        ),
+        ColumnDescr::new("decimal128", {
+            let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
+            let scale: i8 = rng.gen_range(
+                i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE),
+            );
+            DataType::Decimal128(precision, scale)
+        }),
+        ColumnDescr::new("decimal256", {
+            let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION);
+            let scale: i8 = rng.gen_range(
+                i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE),
+            );
+            DataType::Decimal256(precision, scale)
+        }),
+        ColumnDescr::new("utf8", DataType::Utf8),
+        ColumnDescr::new("largeutf8", DataType::LargeUtf8),
+        ColumnDescr::new("utf8view", DataType::Utf8View),
+        ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
+        ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
+        ColumnDescr::new("bool", DataType::Boolean),
+        ColumnDescr::new("binary", DataType::Binary),
+        ColumnDescr::new("large_binary", DataType::LargeBinary),
+        ColumnDescr::new("binaryview", DataType::BinaryView),
+    ]
+}
+
+#[derive(Debug, Clone)]
+pub struct ColumnDescr {
+    /// Column name
+    pub name: String,
+
+    /// Data type of this column
+    pub column_type: DataType,
+
+    /// The maximum number of distinct values in this column.
+    ///
+    /// See [`ColumnDescr::with_max_num_distinct`] for more information
+    max_num_distinct: Option<usize>,
+}
+
+impl ColumnDescr {
+    #[inline]
+    pub fn new(name: &str, column_type: DataType) -> Self {
+        Self {
+            name: name.to_string(),
+            column_type,
+            max_num_distinct: None,
+        }
+    }
+
+    pub fn get_max_num_distinct(&self) -> Option<usize> {
+        self.max_num_distinct
+    }
+
+    /// set the maximum number of distinct values in this column
+    ///
+    /// If `None`, the number of distinct values is randomly selected between 1
+    /// and the number of rows.
+    pub fn with_max_num_distinct(mut self, num_distinct: usize) -> Self {
+        self.max_num_distinct = Some(num_distinct);
+        self
+    }
+}
+
+/// Record batch generator
+pub struct RecordBatchGenerator {
+    pub min_rows_num: usize,
+
+    pub max_rows_num: usize,
+
+    pub columns: Vec<ColumnDescr>,
+
+    pub candidate_null_pcts: Vec<f64>,
+
+    /// If a seed is provided when constructing the generator, it will be used to
+    /// create `rng` and the pseudo-randomly generated batches will be deterministic.
+    /// Otherwise, `rng` will be initialized using `thread_rng()` and the batches
+    /// generated will be different each time.
+    rng: StdRng,
+}
+
+macro_rules! generate_decimal_array {
+    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT: expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $PRECISION: ident, $SCALE: ident, $ARROW_TYPE: ident) => {{
+        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
+        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
+
+        let mut generator = DecimalArrayGenerator {
+            precision: $PRECISION,
+            scale: $SCALE,
+            num_decimals: $NUM_ROWS,
+            num_distinct_decimals: $MAX_NUM_DISTINCT,
+            null_pct,
+            rng: $ARRAY_GEN_RNG,
+        };
+
+        generator.gen_data::<$ARROW_TYPE>()
+    }};
+}
+
+// Generating `BooleanArray` due to it being a special type in Arrow (bit-packed)
+macro_rules! generate_boolean_array {
+    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE: ident) => {{
+        // Select a null percentage from the candidate percentages
+        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
+        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
+
+        let num_distinct_booleans = if $MAX_NUM_DISTINCT >= 2 { 2 } else { 1 };
+
+        let mut generator = BooleanArrayGenerator {
+            num_booleans: $NUM_ROWS,
+            num_distinct_booleans,
+            null_pct,
+            rng: $ARRAY_GEN_RNG,
+        };
+
+        generator.gen_data::<$ARROW_TYPE>()
+    }};
+}
+
+macro_rules! generate_primitive_array {
+    ($SELF:ident, $NUM_ROWS:ident, $MAX_NUM_DISTINCT:expr, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {{
+        let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
+        let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
+
+        let mut generator = PrimitiveArrayGenerator {
+            num_primitives: $NUM_ROWS,
+            num_distinct_primitives: $MAX_NUM_DISTINCT,
+            null_pct,
+            rng: $ARRAY_GEN_RNG,
+        };
+
+        generator.gen_data::<$ARROW_TYPE>()
+    }};
+}
+
+impl RecordBatchGenerator {
+    /// Create a new `RecordBatchGenerator` with a random seed. The generated
+    /// batches will be different each time.
+    pub fn new(
+        min_rows_nun: usize,
+        max_rows_num: usize,
+        columns: Vec<ColumnDescr>,
+    ) -> Self {
+        let candidate_null_pcts = vec![0.0, 0.01, 0.1, 0.5];
+
+        Self {
+            min_rows_num: min_rows_nun,
+            max_rows_num,
+            columns,
+            candidate_null_pcts,
+            rng: StdRng::from_rng(thread_rng()).unwrap(),
+        }
+    }
+
+    /// Set a seed for the generator. The pseudo-randomly generated batches will be
+    /// deterministic for the same seed.
+    pub fn with_seed(mut self, seed: u64) -> Self {
+        self.rng = StdRng::seed_from_u64(seed);
+        self
+    }
+
+    pub fn generate(&mut self) -> Result<RecordBatch> {
+        let num_rows = self.rng.gen_range(self.min_rows_num..=self.max_rows_num);
+        let array_gen_rng = StdRng::from_seed(self.rng.gen());
+        let mut batch_gen_rng = StdRng::from_seed(self.rng.gen());
+        let columns = self.columns.clone();
+
+        // Build arrays
+        let mut arrays = Vec::with_capacity(columns.len());
+        for col in columns.iter() {
+            let array = self.generate_array_of_type(
+                col,
+                num_rows,
+                &mut batch_gen_rng,
+                array_gen_rng.clone(),
+            );
+            arrays.push(array);
+        }
+
+        // Build schema
+        let fields = self
+            .columns
+            .iter()
+            .map(|col| Field::new(col.name.clone(), col.column_type.clone(), true))
+            .collect::<Vec<_>>();
+        let schema = Arc::new(Schema::new(fields));
+
+        RecordBatch::try_new(schema, arrays).map_err(|e| arrow_datafusion_err!(e))
+    }
+
+    fn generate_array_of_type(
+        &mut self,
+        col: &ColumnDescr,
+        num_rows: usize,
+        batch_gen_rng: &mut StdRng,
+        array_gen_rng: StdRng,
+    ) -> ArrayRef {
+        let num_distinct = if num_rows > 1 {
+            batch_gen_rng.gen_range(1..num_rows)
+        } else {
+            num_rows
+        };
+        // cap to at most the num_distinct values
+        let max_num_distinct = col
+            .max_num_distinct
+            .map(|max| num_distinct.min(max))
+            .unwrap_or(num_distinct);
+
+        match col.column_type {
+            DataType::Int8 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Int8Type
+                )
+            }
+            DataType::Int16 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Int16Type
+                )
+            }
+            DataType::Int32 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Int32Type
+                )
+            }
+            DataType::Int64 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Int64Type
+                )
+            }
+            DataType::UInt8 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    UInt8Type
+                )
+            }
+            DataType::UInt16 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    UInt16Type
+                )
+            }
+            DataType::UInt32 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    UInt32Type
+                )
+            }
+            DataType::UInt64 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    UInt64Type
+                )
+            }
+            DataType::Float32 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Float32Type
+                )
+            }
+            DataType::Float64 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Float64Type
+                )
+            }
+            DataType::Date32 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Date32Type
+                )
+            }
+            DataType::Date64 => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Date64Type
+                )
+            }
+            DataType::Time32(TimeUnit::Second) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Time32SecondType
+                )
+            }
+            DataType::Time32(TimeUnit::Millisecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Time32MillisecondType
+                )
+            }
+            DataType::Time64(TimeUnit::Microsecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Time64MicrosecondType
+                )
+            }
+            DataType::Time64(TimeUnit::Nanosecond) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    Time64NanosecondType
+                )
+            }
+            DataType::Interval(IntervalUnit::YearMonth) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    IntervalYearMonthType
+                )
+            }
+            DataType::Interval(IntervalUnit::DayTime) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    IntervalDayTimeType
+                )
+            }
+            DataType::Interval(IntervalUnit::MonthDayNano) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    IntervalMonthDayNanoType
+                )
+            }
+            DataType::Timestamp(TimeUnit::Second, None) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    TimestampSecondType
+                )
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, None) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    TimestampMillisecondType
+                )
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, None) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    TimestampMicrosecondType
+                )
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, None) => {
+                generate_primitive_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    TimestampNanosecondType
+                )
+            }
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
+                let null_pct_idx =
+                    batch_gen_rng.gen_range(0..self.candidate_null_pcts.len());
+                let null_pct = self.candidate_null_pcts[null_pct_idx];
+                let max_len = batch_gen_rng.gen_range(1..50);
+
+                let mut generator = StringArrayGenerator {
+                    max_len,
+                    num_strings: num_rows,
+                    num_distinct_strings: max_num_distinct,
+                    null_pct,
+                    rng: array_gen_rng,
+                };
+
+                match col.column_type {
+                    DataType::Utf8 => generator.gen_data::<i32>(),
+                    DataType::LargeUtf8 => generator.gen_data::<i64>(),
+                    DataType::Utf8View => generator.gen_string_view(),
+                    _ => unreachable!(),
+                }
+            }
+            DataType::Binary | DataType::LargeBinary | DataType::BinaryView => {
+                let null_pct_idx =
+                    batch_gen_rng.gen_range(0..self.candidate_null_pcts.len());
+                let null_pct = self.candidate_null_pcts[null_pct_idx];
+                let max_len = batch_gen_rng.gen_range(1..100);
+
+                let mut generator = BinaryArrayGenerator {
+                    max_len,
+                    num_binaries: num_rows,
+                    num_distinct_binaries: max_num_distinct,
+                    null_pct,
+                    rng: array_gen_rng,
+                };
+
+                match col.column_type {
+                    DataType::Binary => generator.gen_data::<i32>(),
+                    DataType::LargeBinary => generator.gen_data::<i64>(),
+                    DataType::BinaryView => generator.gen_binary_view(),
+                    _ => unreachable!(),
+                }
+            }
+            DataType::Decimal128(precision, scale) => {
+                generate_decimal_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    precision,
+                    scale,
+                    Decimal128Type
+                )
+            }
+            DataType::Decimal256(precision, scale) => {
+                generate_decimal_array!(
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    precision,
+                    scale,
+                    Decimal256Type
+                )
+            }
+            DataType::Boolean => {
+                generate_boolean_array! {
+                    self,
+                    num_rows,
+                    max_num_distinct,
+                    batch_gen_rng,
+                    array_gen_rng,
+                    BooleanType
+                }
+            }
+            _ => {
+                panic!("Unsupported data generator type: {}", col.column_type)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_generator_with_fixed_seed_deterministic() {
+        let mut gen1 = RecordBatchGenerator::new(
+            16,
+            32,
+            vec![
+                ColumnDescr::new("a", DataType::Utf8),
+                ColumnDescr::new("b", DataType::UInt32),
+            ],
+        )
+        .with_seed(310104);
+
+        let mut gen2 = RecordBatchGenerator::new(
+            16,
+            32,
+            vec![
+                ColumnDescr::new("a", DataType::Utf8),
+                ColumnDescr::new("b", DataType::UInt32),
+            ],
+        )
+        .with_seed(310104);
+
+        let batch1 = gen1.generate().unwrap();
+        let batch2 = gen2.generate().unwrap();
+
+        let batch1_formatted = format!("{:?}", batch1);
+        let batch2_formatted = format!("{:?}", batch2);
+
+        assert_eq!(batch1_formatted, batch2_formatted);
+    }
+}
diff --git a/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
new file mode 100644
index 0000000000000..1319d4817326d
--- /dev/null
+++ b/datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs
@@ -0,0 +1,625 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Fuzz Test for various corner cases sorting RecordBatches exceeds available memory and should spill
+
+use std::cmp::min;
+use std::sync::Arc;
+
+use arrow::array::RecordBatch;
+use arrow_schema::SchemaRef;
+use datafusion::datasource::MemTable;
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_common::{instant::Instant, Result};
+use datafusion_execution::memory_pool::{
+    human_readable_size, MemoryPool, UnboundedMemoryPool,
+};
+use datafusion_expr::display_schema;
+use datafusion_physical_plan::spill::get_record_batch_memory_size;
+use rand::seq::SliceRandom;
+use std::time::Duration;
+
+use datafusion_execution::{
+    disk_manager::DiskManagerConfig, memory_pool::FairSpillPool,
+    runtime_env::RuntimeEnvBuilder,
+};
+use rand::Rng;
+use rand::{rngs::StdRng, SeedableRng};
+
+use crate::fuzz_cases::aggregation_fuzzer::check_equality_of_batches;
+
+use super::aggregation_fuzzer::ColumnDescr;
+use super::record_batch_generator::{get_supported_types_columns, RecordBatchGenerator};
+
+/// Entry point for executing the sort query fuzzer.
+///
+/// Now memory limiting is disabled by default. See TODOs in `SortQueryFuzzer`.
+#[tokio::test(flavor = "multi_thread")]
+async fn sort_query_fuzzer_runner() {
+    let random_seed = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap()
+        .as_secs();
+    let test_generator = SortFuzzerTestGenerator::new(
+        2000,
+        3,
+        "sort_fuzz_table".to_string(),
+        get_supported_types_columns(random_seed),
+        false,
+        random_seed,
+    );
+    let mut fuzzer = SortQueryFuzzer::new(random_seed)
+        // Configs for how many random query to test
+        .with_max_rounds(Some(5))
+        .with_queries_per_round(4)
+        .with_config_variations_per_query(5)
+        // Will stop early if the time limit is reached
+        .with_time_limit(Duration::from_secs(5))
+        .with_test_generator(test_generator);
+
+    fuzzer.run().await.unwrap();
+}
+
+/// SortQueryFuzzer holds the runner configuration for executing sort query fuzz tests. The fuzzing details are managed inside `SortFuzzerTestGenerator`.
+///
+/// It defines:
+/// - `max_rounds`: Maximum number of rounds to run (or None to run until `time_limit`).
+/// - `queries_per_round`: Number of different queries to run in each round.
+/// - `config_variations_per_query`: Number of different configurations to test per query.
+/// - `time_limit`: Time limit for the entire fuzzer execution.
+///
+/// TODO: The following improvements are blocked on https://github.com/apache/datafusion/issues/14748:
+/// 1. Support generating queries with arbitrary number of ORDER BY clauses
+///    Currently limited to be smaller than number of projected columns
+/// 2. Enable special type columns like utf8_low to be used in ORDER BY clauses
+/// 3. Enable memory limiting functionality in the fuzzer runner
+pub struct SortQueryFuzzer {
+    test_gen: SortFuzzerTestGenerator,
+    /// Random number generator for the runner, used to generate seeds for inner components.
+    /// Seeds for each choice (query, config, etc.) are printed out for reproducibility.
+    runner_rng: StdRng,
+
+    // ========================================================================
+    // Runner configurations
+    // ========================================================================
+    /// For each round, a new dataset is generated. If `None`, keep running until
+    /// the time limit is reached
+    max_rounds: Option<usize>,
+    /// How many different queries to run in each round
+    queries_per_round: usize,
+    /// For each query, how many different configurations to try and make sure their
+    /// results are consistent
+    config_variations_per_query: usize,
+    /// The time limit for the entire sort query fuzzer execution.
+    time_limit: Option<Duration>,
+}
+
+impl SortQueryFuzzer {
+    pub fn new(seed: u64) -> Self {
+        let max_rounds = Some(2);
+        let queries_per_round = 3;
+        let config_variations_per_query = 5;
+        let time_limit = None;
+
+        // Filtered out one column due to a known bug https://github.com/apache/datafusion/issues/14748
+        // TODO: Remove this once the bug is fixed
+        let candidate_columns = get_supported_types_columns(seed)
+            .into_iter()
+            .filter(|col| {
+                col.name != "utf8_low"
+                    && col.name != "utf8view"
+                    && col.name != "binaryview"
+            })
+            .collect::<Vec<_>>();
+
+        let test_gen = SortFuzzerTestGenerator::new(
+            10000,
+            4,
+            "sort_fuzz_table".to_string(),
+            candidate_columns,
+            false,
+            seed,
+        );
+
+        Self {
+            max_rounds,
+            queries_per_round,
+            config_variations_per_query,
+            time_limit,
+            test_gen,
+            runner_rng: StdRng::seed_from_u64(seed),
+        }
+    }
+
+    pub fn with_test_generator(mut self, test_gen: SortFuzzerTestGenerator) -> Self {
+        self.test_gen = test_gen;
+        self
+    }
+
+    pub fn with_max_rounds(mut self, max_rounds: Option<usize>) -> Self {
+        self.max_rounds = max_rounds;
+        self
+    }
+
+    pub fn with_queries_per_round(mut self, queries_per_round: usize) -> Self {
+        self.queries_per_round = queries_per_round;
+        self
+    }
+
+    pub fn with_config_variations_per_query(
+        mut self,
+        config_variations_per_query: usize,
+    ) -> Self {
+        self.config_variations_per_query = config_variations_per_query;
+        self
+    }
+
+    pub fn with_time_limit(mut self, time_limit: Duration) -> Self {
+        self.time_limit = Some(time_limit);
+        self
+    }
+
+    fn should_stop_due_to_time_limit(
+        &self,
+        start_time: Instant,
+        n_round: usize,
+        n_query: usize,
+    ) -> bool {
+        if let Some(time_limit) = self.time_limit {
+            if Instant::now().duration_since(start_time) > time_limit {
+                println!(
+                    "[SortQueryFuzzer] Time limit reached: {} queries ({} random configs each) in {} rounds",
+                    n_round * self.queries_per_round + n_query,
+                    self.config_variations_per_query,
+                    n_round
+                );
+                return true;
+            }
+        }
+        false
+    }
+
+    pub async fn run(&mut self) -> Result<()> {
+        let start_time = Instant::now();
+
+        // Execute until either`max_rounds` or `time_limit` is reached
+        let max_rounds = self.max_rounds.unwrap_or(usize::MAX);
+        for round in 0..max_rounds {
+            let init_seed = self.runner_rng.gen();
+            for query_i in 0..self.queries_per_round {
+                let query_seed = self.runner_rng.gen();
+                let mut expected_results: Option<Vec<RecordBatch>> = None; // use first config's result as the expected result
+                for config_i in 0..self.config_variations_per_query {
+                    if self.should_stop_due_to_time_limit(start_time, round, query_i) {
+                        return Ok(());
+                    }
+
+                    let config_seed = self.runner_rng.gen();
+
+                    println!(
+                        "[SortQueryFuzzer] Round {}, Query {} (Config {})",
+                        round, query_i, config_i
+                    );
+                    println!("  Seeds:");
+                    println!("    init_seed   = {}", init_seed);
+                    println!("    query_seed  = {}", query_seed);
+                    println!("    config_seed = {}", config_seed);
+
+                    let results = self
+                        .test_gen
+                        .fuzzer_run(init_seed, query_seed, config_seed)
+                        .await?;
+                    println!("\n"); // Seperator between tested runs
+
+                    if expected_results.is_none() {
+                        expected_results = Some(results);
+                    } else if let Some(ref expected) = expected_results {
+                        // `fuzzer_run` might append `LIMIT k` to either the
+                        // expected or actual query. The number of results is
+                        // checked inside `fuzzer_run()`. Here we only check
+                        // that the first k rows of each result are consistent.
+                        check_equality_of_batches(expected, &results).unwrap();
+                    } else {
+                        unreachable!();
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Struct to generate and manage a random dataset for fuzz testing.
+/// It is able to re-run the failed test cases by setting the same seed printed out.
+/// See the unit tests for examples.
+///
+/// To use this struct:
+/// 1. Call `init_partitioned_staggered_batches` to generate a random dataset.
+/// 2. Use `generate_random_query` to create a random SQL query.
+/// 3. Use `generate_random_config` to create a random configuration.
+/// 4. Run the fuzzer check with the generated query and configuration.
+pub struct SortFuzzerTestGenerator {
+    /// The total number of rows for the registered table
+    num_rows: usize,
+    /// Max number of partitions for the registered table
+    max_partitions: usize,
+    /// The name of the registered table
+    table_name: String,
+    /// The selected columns from all available candidate columns to be used for
+    ///  this dataset
+    selected_columns: Vec<ColumnDescr>,
+    /// If true, will randomly generate a memory limit for the query. Otherwise
+    /// the query will run under the context with unlimited memory.
+    set_memory_limit: bool,
+
+    /// States related to the randomly generated dataset. `None` if not initialized
+    /// by calling `init_partitioned_staggered_batches()`
+    dataset_state: Option<DatasetState>,
+}
+
+/// Struct to hold states related to the randomly generated dataset
+pub struct DatasetState {
+    /// Dataset to construct the partitioned memory table. Outer vector is the
+    /// partitions, inner vector is staggered batches within the same partition.
+    partitioned_staggered_batches: Vec<Vec<RecordBatch>>,
+    /// Number of rows in the whole dataset
+    dataset_size: usize,
+    /// The approximate number of rows of a batch (staggered batches will be generated
+    /// with random number of rows between 1 and `approx_batch_size`)
+    approx_batch_num_rows: usize,
+    /// The schema of the dataset
+    schema: SchemaRef,
+    /// The memory size of the whole dataset
+    mem_size: usize,
+}
+
+impl SortFuzzerTestGenerator {
+    /// Randomly pick a subset of `candidate_columns` to be used for this dataset
+    pub fn new(
+        num_rows: usize,
+        max_partitions: usize,
+        table_name: String,
+        candidate_columns: Vec<ColumnDescr>,
+        set_memory_limit: bool,
+        rng_seed: u64,
+    ) -> Self {
+        let mut rng = StdRng::seed_from_u64(rng_seed);
+        let min_ncol = min(candidate_columns.len(), 5);
+        let max_ncol = min(candidate_columns.len(), 10);
+        let amount = rng.gen_range(min_ncol..=max_ncol);
+        let selected_columns = candidate_columns
+            .choose_multiple(&mut rng, amount)
+            .cloned()
+            .collect();
+
+        Self {
+            num_rows,
+            max_partitions,
+            table_name,
+            selected_columns,
+            set_memory_limit,
+            dataset_state: None,
+        }
+    }
+
+    /// The outer vector is the partitions, the inner vector is the chunked batches
+    /// within each partition.
+    /// The partition number is determined by `self.max_partitions`.
+    /// The chunked batch length is a random number between 1 and `self.num_rows` /
+    /// 100 (make sure a single batch won't exceed memory budget for external sort
+    /// executions)
+    ///
+    /// Hack: If we want the query to run under certain degree of parallelism, the
+    /// memory table should be generated with more partitions, due to https://github.com/apache/datafusion/issues/15088
+    fn init_partitioned_staggered_batches(&mut self, rng_seed: u64) {
+        let mut rng = StdRng::seed_from_u64(rng_seed);
+        let num_partitions = rng.gen_range(1..=self.max_partitions);
+
+        let max_batch_size = self.num_rows / num_partitions / 50;
+        let target_partition_size = self.num_rows / num_partitions;
+
+        let mut partitions = Vec::new();
+        let mut schema = None;
+        for _ in 0..num_partitions {
+            let mut partition = Vec::new();
+            let mut num_rows = 0;
+
+            // For each partition, generate random batches until there is about enough
+            // rows for the specified total number of rows
+            while num_rows < target_partition_size {
+                // Generate a random batch of size between 1 and max_batch_size
+
+                // Let edge case (1-row batch) more common
+                let (min_nrow, max_nrow) = if rng.gen_bool(0.1) {
+                    (1, 3)
+                } else {
+                    (1, max_batch_size)
+                };
+
+                let mut record_batch_generator = RecordBatchGenerator::new(
+                    min_nrow,
+                    max_nrow,
+                    self.selected_columns.clone(),
+                )
+                .with_seed(rng.gen());
+
+                let record_batch = record_batch_generator.generate().unwrap();
+                num_rows += record_batch.num_rows();
+
+                if schema.is_none() {
+                    schema = Some(record_batch.schema());
+                    println!("  Dataset schema:");
+                    println!("    {}", display_schema(schema.as_ref().unwrap()));
+                }
+
+                partition.push(record_batch);
+            }
+
+            partitions.push(partition);
+        }
+
+        // After all partitions are created, optionally make one partition have 0/1 batch
+        if num_partitions > 2 && rng.gen_bool(0.1) {
+            let partition_index = rng.gen_range(0..num_partitions);
+            if rng.gen_bool(0.5) {
+                // 0 batch
+                partitions[partition_index] = Vec::new();
+            } else {
+                // 1 batch, keep the old first batch
+                let first_batch = partitions[partition_index].first().cloned();
+                if let Some(batch) = first_batch {
+                    partitions[partition_index] = vec![batch];
+                }
+            }
+        }
+
+        // Init self fields
+        let mem_size: usize = partitions
+            .iter()
+            .map(|partition| {
+                partition
+                    .iter()
+                    .map(get_record_batch_memory_size)
+                    .sum::<usize>()
+            })
+            .sum();
+
+        let dataset_size = partitions
+            .iter()
+            .map(|partition| {
+                partition
+                    .iter()
+                    .map(|batch| batch.num_rows())
+                    .sum::<usize>()
+            })
+            .sum::<usize>();
+
+        let approx_batch_num_rows = max_batch_size;
+
+        self.dataset_state = Some(DatasetState {
+            partitioned_staggered_batches: partitions,
+            dataset_size,
+            approx_batch_num_rows,
+            schema: schema.unwrap(),
+            mem_size,
+        });
+    }
+
+    /// Generates a random SQL query string and an optional limit value.
+    /// Returns a tuple containing the query string and an optional limit.
+    pub fn generate_random_query(&self, rng_seed: u64) -> (String, Option<usize>) {
+        let mut rng = StdRng::seed_from_u64(rng_seed);
+
+        let num_columns = rng.gen_range(1..=3).min(self.selected_columns.len());
+        let selected_columns: Vec<_> = self
+            .selected_columns
+            .choose_multiple(&mut rng, num_columns)
+            .collect();
+
+        let mut order_by_clauses = Vec::new();
+        for col in selected_columns {
+            let mut clause = col.name.clone();
+            if rng.gen_bool(0.5) {
+                let order = if rng.gen_bool(0.5) { "ASC" } else { "DESC" };
+                clause.push_str(&format!(" {}", order));
+            }
+            if rng.gen_bool(0.5) {
+                let nulls = if rng.gen_bool(0.5) {
+                    "NULLS FIRST"
+                } else {
+                    "NULLS LAST"
+                };
+                clause.push_str(&format!(" {}", nulls));
+            }
+            order_by_clauses.push(clause);
+        }
+
+        let dataset_size = self.dataset_state.as_ref().unwrap().dataset_size;
+
+        let limit = if rng.gen_bool(0.2) {
+            // Prefer edge cases for k like 1, dataset_size, etc.
+            Some(if rng.gen_bool(0.5) {
+                let edge_cases =
+                    [1, 2, 3, dataset_size - 1, dataset_size, dataset_size + 1];
+                *edge_cases.choose(&mut rng).unwrap()
+            } else {
+                rng.gen_range(1..=dataset_size)
+            })
+        } else {
+            None
+        };
+
+        let limit_clause = limit.map_or(String::new(), |l| format!(" LIMIT {}", l));
+
+        let query = format!(
+            "SELECT * FROM {} ORDER BY {}{}",
+            self.table_name,
+            order_by_clauses.join(", "),
+            limit_clause
+        );
+
+        (query, limit)
+    }
+
+    pub fn generate_random_config(
+        &self,
+        rng_seed: u64,
+        with_memory_limit: bool,
+    ) -> Result<SessionContext> {
+        let mut rng = StdRng::seed_from_u64(rng_seed);
+        let init_state = self.dataset_state.as_ref().unwrap();
+        let dataset_size = init_state.mem_size;
+        let num_partitions = init_state.partitioned_staggered_batches.len();
+
+        // 30% to 200% of the dataset size (if `with_memory_limit` is false, config
+        // will use the default unbounded pool to override it later)
+        let memory_limit = rng.gen_range(
+            (dataset_size as f64 * 0.5) as usize..=(dataset_size as f64 * 2.0) as usize,
+        );
+        // 10% to 20% of the per-partition memory limit size
+        let per_partition_mem_limit = memory_limit / num_partitions;
+        let sort_spill_reservation_bytes = rng.gen_range(
+            (per_partition_mem_limit as f64 * 0.2) as usize
+                ..=(per_partition_mem_limit as f64 * 0.3) as usize,
+        );
+
+        // 1 to 3 times of the approx batch size. Setting this to a very large nvalue
+        // will cause external sort to fail.
+        let sort_in_place_threshold_bytes = if with_memory_limit {
+            // For memory-limited query, setting `sort_in_place_threshold_bytes` too
+            // large will cause failure.
+            0
+        } else {
+            let dataset_size = self.dataset_state.as_ref().unwrap().dataset_size;
+            rng.gen_range(0..=dataset_size * 2_usize)
+        };
+
+        // Set up strings for printing
+        let memory_limit_str = if with_memory_limit {
+            human_readable_size(memory_limit)
+        } else {
+            "Unbounded".to_string()
+        };
+        let per_partition_limit_str = if with_memory_limit {
+            human_readable_size(per_partition_mem_limit)
+        } else {
+            "Unbounded".to_string()
+        };
+
+        println!("  Config: ");
+        println!("    Dataset size: {}", human_readable_size(dataset_size));
+        println!("    Number of partitions: {}", num_partitions);
+        println!("    Batch size: {}", init_state.approx_batch_num_rows / 2);
+        println!("    Memory limit: {}", memory_limit_str);
+        println!(
+            "    Per partition memory limit: {}",
+            per_partition_limit_str
+        );
+        println!(
+            "    Sort spill reservation bytes: {}",
+            human_readable_size(sort_spill_reservation_bytes)
+        );
+        println!(
+            "    Sort in place threshold bytes: {}",
+            human_readable_size(sort_in_place_threshold_bytes)
+        );
+
+        let config = SessionConfig::new()
+            .with_target_partitions(num_partitions)
+            .with_batch_size(init_state.approx_batch_num_rows / 2)
+            .with_sort_spill_reservation_bytes(sort_spill_reservation_bytes)
+            .with_sort_in_place_threshold_bytes(sort_in_place_threshold_bytes);
+
+        let memory_pool: Arc<dyn MemoryPool> = if with_memory_limit {
+            Arc::new(FairSpillPool::new(memory_limit))
+        } else {
+            Arc::new(UnboundedMemoryPool::default())
+        };
+
+        let runtime = RuntimeEnvBuilder::new()
+            .with_memory_pool(memory_pool)
+            .with_disk_manager(DiskManagerConfig::NewOs)
+            .build_arc()?;
+
+        let ctx = SessionContext::new_with_config_rt(config, runtime);
+
+        let dataset = &init_state.partitioned_staggered_batches;
+        let schema = &init_state.schema;
+
+        let provider = MemTable::try_new(schema.clone(), dataset.clone())?;
+        ctx.register_table("sort_fuzz_table", Arc::new(provider))?;
+
+        Ok(ctx)
+    }
+
+    async fn fuzzer_run(
+        &mut self,
+        dataset_seed: u64,
+        query_seed: u64,
+        config_seed: u64,
+    ) -> Result<Vec<RecordBatch>> {
+        self.init_partitioned_staggered_batches(dataset_seed);
+        let (query_str, limit) = self.generate_random_query(query_seed);
+        println!("  Query:");
+        println!("    {}", query_str);
+
+        // ==== Execute the query ====
+
+        // Only enable memory limits if:
+        // 1. Query does not contain LIMIT (since topK does not support external execution)
+        // 2. Memory limiting is enabled in the test generator config
+        let with_mem_limit = !query_str.contains("LIMIT") && self.set_memory_limit;
+
+        let ctx = self.generate_random_config(config_seed, with_mem_limit)?;
+        let df = ctx.sql(&query_str).await.unwrap();
+        let results = df.collect().await.unwrap();
+
+        // ==== Check the result size is consistent with the limit ====
+        let result_num_rows = results.iter().map(|batch| batch.num_rows()).sum::<usize>();
+        let dataset_size = self.dataset_state.as_ref().unwrap().dataset_size;
+
+        if let Some(limit) = limit {
+            let expected_num_rows = min(limit, dataset_size);
+            assert_eq!(result_num_rows, expected_num_rows);
+        }
+
+        Ok(results)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    /// Given the same seed, the result should be the same
+    #[tokio::test]
+    async fn test_sort_query_fuzzer_deterministic() {
+        let gen_seed = 310104;
+        let mut test_generator = SortFuzzerTestGenerator::new(
+            2000,
+            3,
+            "sort_fuzz_table".to_string(),
+            get_supported_types_columns(gen_seed),
+            false,
+            gen_seed,
+        );
+
+        let res1 = test_generator.fuzzer_run(1, 2, 3).await.unwrap();
+        let res2 = test_generator.fuzzer_run(1, 2, 3).await.unwrap();
+        check_equality_of_batches(&res1, &res2).unwrap();
+    }
+}
diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
index bdf30c140afff..7b157b707a6de 100644
--- a/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
+++ b/datafusion/core/tests/memory_limit/memory_limit_validation/utils.rs
@@ -18,7 +18,7 @@
 use datafusion_common_runtime::SpawnedTask;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
-use sysinfo::System;
+use sysinfo::{ProcessRefreshKind, ProcessesToUpdate, System};
 use tokio::time::{interval, Duration};
 
 use datafusion::prelude::{SessionConfig, SessionContext};
@@ -62,7 +62,11 @@ where
 
         loop {
             interval.tick().await;
-            sys.refresh_all();
+            sys.refresh_processes_specifics(
+                ProcessesToUpdate::Some(&[pid]),
+                true,
+                ProcessRefreshKind::nothing().with_memory(),
+            );
             if let Some(process) = sys.process(pid) {
                 let rss_bytes = process.memory();
                 max_rss_clone
@@ -116,8 +120,8 @@ where
 /// # Example
 ///
 ///     utils::validate_query_with_memory_limits(
-///         40_000_000 * 2,                   
-///         Some(40_000_000),              
+///         40_000_000 * 2,
+///         Some(40_000_000),
 ///         "SELECT * FROM generate_series(1, 100000000) AS t(i) ORDER BY i",
 ///         "SELECT * FROM generate_series(1, 10000000) AS t(i) ORDER BY i"
 ///     );
diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index dd5acc8d8908a..01342d1604fca 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -44,11 +44,14 @@ use datafusion_common::{assert_contains, Result};
 use datafusion_execution::memory_pool::{
     FairSpillPool, GreedyMemoryPool, MemoryPool, TrackConsumersPool,
 };
-use datafusion_execution::TaskContext;
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_execution::{DiskManager, TaskContext};
 use datafusion_expr::{Expr, TableType};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use datafusion_physical_optimizer::join_selection::JoinSelection;
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::collect as collect_batches;
+use datafusion_physical_plan::common::collect;
 use datafusion_physical_plan::spill::get_record_batch_memory_size;
 use rand::Rng;
 use test_utils::AccessLogGenerator;
@@ -493,6 +496,125 @@ async fn test_in_mem_buffer_almost_full() {
     let _ = df.collect().await.unwrap();
 }
 
+/// External sort should be able to run if there is very little pre-reserved memory
+/// for merge (set configuration sort_spill_reservation_bytes to 0).
+#[tokio::test]
+async fn test_external_sort_zero_merge_reservation() {
+    let config = SessionConfig::new()
+        .with_sort_spill_reservation_bytes(0)
+        .with_target_partitions(14);
+    let runtime = RuntimeEnvBuilder::new()
+        .with_memory_pool(Arc::new(FairSpillPool::new(10 * 1024 * 1024)))
+        .build_arc()
+        .unwrap();
+
+    let ctx = SessionContext::new_with_config_rt(config, runtime);
+
+    let query = "select * from generate_series(1,10000000) as t1(v1) order by v1;";
+    let df = ctx.sql(query).await.unwrap();
+
+    let physical_plan = df.create_physical_plan().await.unwrap();
+    let task_ctx = Arc::new(TaskContext::from(&ctx.state()));
+    let stream = physical_plan.execute(0, task_ctx).unwrap();
+
+    // Ensures execution succeed
+    let _result = collect(stream).await;
+
+    // Ensures the query spilled during execution
+    let metrics = physical_plan.metrics().unwrap();
+    let spill_count = metrics.spill_count().unwrap();
+    assert!(spill_count > 0);
+}
+
+// Tests for disk limit (`max_temp_directory_size` in `DiskManager`)
+// ------------------------------------------------------------------
+
+// Create a new `SessionContext` with speicified disk limit and memory pool limit
+async fn setup_context(
+    disk_limit: u64,
+    memory_pool_limit: usize,
+) -> Result<SessionContext> {
+    let disk_manager = DiskManager::try_new(DiskManagerConfig::NewOs)?;
+
+    let disk_manager = Arc::try_unwrap(disk_manager)
+        .expect("DiskManager should be a single instance")
+        .with_max_temp_directory_size(disk_limit)?;
+
+    let runtime = RuntimeEnvBuilder::new()
+        .with_memory_pool(Arc::new(FairSpillPool::new(memory_pool_limit)))
+        .build_arc()
+        .unwrap();
+
+    let runtime = Arc::new(RuntimeEnv {
+        memory_pool: runtime.memory_pool.clone(),
+        disk_manager: Arc::new(disk_manager),
+        cache_manager: runtime.cache_manager.clone(),
+        object_store_registry: runtime.object_store_registry.clone(),
+    });
+
+    let config = SessionConfig::new()
+        .with_sort_spill_reservation_bytes(64 * 1024) // 256KB
+        .with_sort_in_place_threshold_bytes(0)
+        .with_batch_size(64) // To reduce test memory usage
+        .with_target_partitions(1);
+
+    Ok(SessionContext::new_with_config_rt(config, runtime))
+}
+
+/// If the spilled bytes exceed the disk limit, the query should fail
+/// (specified by `max_temp_directory_size` in `DiskManager`)
+#[tokio::test]
+async fn test_disk_spill_limit_reached() -> Result<()> {
+    let ctx = setup_context(1024 * 1024, 1024 * 1024).await?; // 1MB disk limit, 1MB memory limit
+
+    let df = ctx
+        .sql("select * from generate_series(1, 1000000000000) as t1(v1) order by v1")
+        .await
+        .unwrap();
+
+    let err = df.collect().await.unwrap_err();
+    assert_contains!(
+    err.to_string(),
+    "The used disk space during the spilling process has exceeded the allowable limit"
+    );
+
+    Ok(())
+}
+
+/// External query should succeed, if the spilled bytes is less than the disk limit
+/// Also verify that after the query is finished, all the disk usage accounted by
+/// tempfiles are cleaned up.
+#[tokio::test]
+async fn test_disk_spill_limit_not_reached() -> Result<()> {
+    let disk_spill_limit = 1024 * 1024; // 1MB
+    let ctx = setup_context(disk_spill_limit, 128 * 1024).await?; // 1MB disk limit, 128KB memory limit
+
+    let df = ctx
+        .sql("select * from generate_series(1, 10000) as t1(v1) order by v1")
+        .await
+        .unwrap();
+    let plan = df.create_physical_plan().await.unwrap();
+
+    let task_ctx = ctx.task_ctx();
+    let _ = collect_batches(Arc::clone(&plan), task_ctx)
+        .await
+        .expect("Query execution failed");
+
+    let spill_count = plan.metrics().unwrap().spill_count().unwrap();
+    let spilled_bytes = plan.metrics().unwrap().spilled_bytes().unwrap();
+
+    println!("spill count {}, spill bytes {}", spill_count, spilled_bytes);
+    assert!(spill_count > 0);
+    assert!((spilled_bytes as u64) < disk_spill_limit);
+
+    // Verify that all temporary files have been properly cleaned up by checking
+    // that the total disk usage tracked by the disk manager is zero
+    let current_disk_usage = ctx.runtime_env().disk_manager.used_disk_space();
+    assert_eq!(current_disk_usage, 0);
+
+    Ok(())
+}
+
 /// Run the query with the specified memory limit,
 /// and verifies the expected errors are returned
 #[derive(Clone, Debug)]
@@ -741,11 +863,10 @@ impl Scenario {
                 single_row_batches,
             } => {
                 use datafusion::physical_expr::expressions::col;
-                let batches: Vec<Vec<_>> = std::iter::repeat(maybe_split_batches(
-                    dict_batches(),
-                    *single_row_batches,
-                ))
-                .take(*partitions)
+                let batches: Vec<Vec<_>> = std::iter::repeat_n(
+                    maybe_split_batches(dict_batches(), *single_row_batches),
+                    *partitions,
+                )
                 .collect();
 
                 let schema = batches[0][0].schema();
diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs
index ce5c0d720174d..761a78a29fd3a 100644
--- a/datafusion/core/tests/parquet/custom_reader.rs
+++ b/datafusion/core/tests/parquet/custom_reader.rs
@@ -44,6 +44,7 @@ use insta::assert_snapshot;
 use object_store::memory::InMemory;
 use object_store::path::Path;
 use object_store::{ObjectMeta, ObjectStore};
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::ArrowWriter;
 use parquet::errors::ParquetError;
@@ -186,7 +187,7 @@ async fn store_parquet_in_memory(
                 location: Path::parse(format!("file-{offset}.parquet"))
                     .expect("creating path"),
                 last_modified: chrono::DateTime::from(SystemTime::now()),
-                size: buf.len(),
+                size: buf.len() as u64,
                 e_tag: None,
                 version: None,
             };
@@ -218,9 +219,10 @@ struct ParquetFileReader {
 impl AsyncFileReader for ParquetFileReader {
     fn get_bytes(
         &mut self,
-        range: Range<usize>,
+        range: Range<u64>,
     ) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
-        self.metrics.bytes_scanned.add(range.end - range.start);
+        let bytes_scanned = range.end - range.start;
+        self.metrics.bytes_scanned.add(bytes_scanned as usize);
 
         self.store
             .get_range(&self.meta.location, range)
@@ -232,6 +234,7 @@ impl AsyncFileReader for ParquetFileReader {
 
     fn get_metadata(
         &mut self,
+        _options: Option<&ArrowReaderOptions>,
     ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
         Box::pin(async move {
             let metadata = fetch_parquet_metadata(
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index f45eacce18df5..87a5ed33f127d 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -611,7 +611,7 @@ fn make_bytearray_batch(
     large_binary_values: Vec<&[u8]>,
 ) -> RecordBatch {
     let num_rows = string_values.len();
-    let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect();
+    let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect();
     let service_string: StringArray = string_values.iter().map(Some).collect();
     let service_binary: BinaryArray = binary_values.iter().map(Some).collect();
     let service_fixedsize: FixedSizeBinaryArray = fixedsize_values
@@ -659,7 +659,7 @@ fn make_bytearray_batch(
 /// name | service.name
 fn make_names_batch(name: &str, service_name_values: Vec<&str>) -> RecordBatch {
     let num_rows = service_name_values.len();
-    let name: StringArray = std::iter::repeat(Some(name)).take(num_rows).collect();
+    let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect();
     let service_name: StringArray = service_name_values.iter().map(Some).collect();
 
     let schema = Schema::new(vec![
@@ -698,7 +698,7 @@ fn make_int_batches_with_null(
                 Int8Array::from_iter(
                     v8.into_iter()
                         .map(Some)
-                        .chain(std::iter::repeat(None).take(null_values)),
+                        .chain(std::iter::repeat_n(None, null_values)),
                 )
                 .to_data(),
             ),
@@ -706,7 +706,7 @@ fn make_int_batches_with_null(
                 Int16Array::from_iter(
                     v16.into_iter()
                         .map(Some)
-                        .chain(std::iter::repeat(None).take(null_values)),
+                        .chain(std::iter::repeat_n(None, null_values)),
                 )
                 .to_data(),
             ),
@@ -714,7 +714,7 @@ fn make_int_batches_with_null(
                 Int32Array::from_iter(
                     v32.into_iter()
                         .map(Some)
-                        .chain(std::iter::repeat(None).take(null_values)),
+                        .chain(std::iter::repeat_n(None, null_values)),
                 )
                 .to_data(),
             ),
@@ -722,7 +722,7 @@ fn make_int_batches_with_null(
                 Int64Array::from_iter(
                     v64.into_iter()
                         .map(Some)
-                        .chain(std::iter::repeat(None).take(null_values)),
+                        .chain(std::iter::repeat_n(None, null_values)),
                 )
                 .to_data(),
             ),
diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs
index 7006bf083eeed..f693485cbe018 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -52,7 +52,7 @@ async fn get_parquet_exec(state: &SessionState, filter: Expr) -> DataSourceExec
     let meta = ObjectMeta {
         location,
         last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(),
-        size: metadata.len() as usize,
+        size: metadata.len(),
         e_tag: None,
         version: None,
     };
diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
index 9898f6204e880..5e182cb93b39c 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -52,6 +52,7 @@ use datafusion_physical_plan::aggregates::{
     AggregateExec, AggregateMode, PhysicalGroupBy,
 };
 use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::execution_plan::ExecutionPlan;
 use datafusion_physical_plan::expressions::col;
 use datafusion_physical_plan::filter::FilterExec;
@@ -3471,3 +3472,47 @@ fn optimize_away_unnecessary_repartition2() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_replace_order_preserving_variants_with_fetch() -> Result<()> {
+    // Create a base plan
+    let parquet_exec = parquet_exec();
+
+    let sort_expr = PhysicalSortExpr {
+        expr: Arc::new(Column::new("id", 0)),
+        options: SortOptions::default(),
+    };
+
+    let ordering = LexOrdering::new(vec![sort_expr]);
+
+    // Create a SortPreservingMergeExec with fetch=5
+    let spm_exec = Arc::new(
+        SortPreservingMergeExec::new(ordering, parquet_exec.clone()).with_fetch(Some(5)),
+    );
+
+    // Create distribution context
+    let dist_context = DistributionContext::new(
+        spm_exec,
+        true,
+        vec![DistributionContext::new(parquet_exec, false, vec![])],
+    );
+
+    // Apply the function
+    let result = replace_order_preserving_variants(dist_context)?;
+
+    // Verify the plan was transformed to CoalescePartitionsExec
+    result
+        .plan
+        .as_any()
+        .downcast_ref::<CoalescePartitionsExec>()
+        .expect("Expected CoalescePartitionsExec");
+
+    // Verify fetch was preserved
+    assert_eq!(
+        result.plan.fetch(),
+        Some(5),
+        "Fetch value was not preserved after transformation"
+    );
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index 4d2c875d3f1d4..052db454ef3f5 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -1652,7 +1652,7 @@ async fn test_remove_unnecessary_sort7() -> Result<()> {
     ) as Arc<dyn ExecutionPlan>;
 
     let expected_input = [
-        "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false]",
+        "SortExec: TopK(fetch=2), expr=[non_nullable_col@1 ASC], preserve_partitioning=[false], sort_prefix=[non_nullable_col@1 ASC]",
         "  SortExec: expr=[non_nullable_col@1 ASC, nullable_col@0 ASC], preserve_partitioning=[false]",
         "    DataSourceExec: partitions=1, partition_sizes=[0]",
     ];
@@ -3440,3 +3440,38 @@ fn test_handles_multiple_orthogonal_sorts() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn test_parallelize_sort_preserves_fetch() -> Result<()> {
+    // Create a schema
+    let schema = create_test_schema3()?;
+    let parquet_exec = parquet_exec(&schema);
+    let coalesced = Arc::new(CoalescePartitionsExec::new(parquet_exec.clone()));
+    let top_coalesced = CoalescePartitionsExec::new(coalesced.clone())
+        .with_fetch(Some(10))
+        .unwrap();
+
+    let requirements = PlanWithCorrespondingCoalescePartitions::new(
+        top_coalesced.clone(),
+        true,
+        vec![PlanWithCorrespondingCoalescePartitions::new(
+            coalesced,
+            true,
+            vec![PlanWithCorrespondingCoalescePartitions::new(
+                parquet_exec,
+                false,
+                vec![],
+            )],
+        )],
+    );
+
+    let res = parallelize_sorts(requirements)?;
+
+    // Verify fetch was preserved
+    assert_eq!(
+        res.data.plan.fetch(),
+        Some(10),
+        "Fetch value was not preserved after transformation"
+    );
+    Ok(())
+}
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 7d5d07715eebc..6643e7fd59b7a 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -25,6 +25,7 @@ mod join_selection;
 mod limit_pushdown;
 mod limited_distinct_aggregation;
 mod projection_pushdown;
+mod push_down_filter;
 mod replace_with_order_preserving_variants;
 mod sanity_checker;
 mod test_utils;
diff --git a/datafusion/core/tests/physical_optimizer/push_down_filter.rs b/datafusion/core/tests/physical_optimizer/push_down_filter.rs
new file mode 100644
index 0000000000000..b19144f1bcffe
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/push_down_filter.rs
@@ -0,0 +1,542 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::{Arc, OnceLock};
+use std::{
+    any::Any,
+    fmt::{Display, Formatter},
+};
+
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use datafusion::{
+    datasource::object_store::ObjectStoreUrl,
+    logical_expr::Operator,
+    physical_plan::{
+        expressions::{BinaryExpr, Column, Literal},
+        PhysicalExpr,
+    },
+    scalar::ScalarValue,
+};
+use datafusion_common::{config::ConfigOptions, Statistics};
+use datafusion_common::{internal_err, Result};
+use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_datasource::{
+    file::FileSource, file_scan_config::FileScanConfig, file_stream::FileOpener,
+};
+use datafusion_expr::test::function_stub::count_udaf;
+use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr::{
+    aggregate::AggregateExprBuilder, conjunction, Partitioning,
+};
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
+use datafusion_physical_optimizer::push_down_filter::PushdownFilter;
+use datafusion_physical_optimizer::PhysicalOptimizerRule;
+use datafusion_physical_plan::filter_pushdown::{
+    filter_pushdown_not_supported, FilterDescription, FilterPushdownResult,
+    FilterPushdownSupport,
+};
+use datafusion_physical_plan::{
+    aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy},
+    coalesce_batches::CoalesceBatchesExec,
+    filter::FilterExec,
+    repartition::RepartitionExec,
+};
+use datafusion_physical_plan::{
+    displayable, metrics::ExecutionPlanMetricsSet, DisplayFormatType, ExecutionPlan,
+};
+
+use object_store::ObjectStore;
+
+/// A placeholder data source that accepts filter pushdown
+#[derive(Clone, Default)]
+struct TestSource {
+    support: bool,
+    predicate: Option<Arc<dyn PhysicalExpr>>,
+    statistics: Option<Statistics>,
+}
+
+impl TestSource {
+    fn new(support: bool) -> Self {
+        Self {
+            support,
+            predicate: None,
+            statistics: None,
+        }
+    }
+}
+
+impl FileSource for TestSource {
+    fn create_file_opener(
+        &self,
+        _object_store: Arc<dyn ObjectStore>,
+        _base_config: &FileScanConfig,
+        _partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        todo!("should not be called")
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        todo!("should not be called")
+    }
+
+    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
+        todo!("should not be called")
+    }
+
+    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
+        todo!("should not be called")
+    }
+
+    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
+        todo!("should not be called")
+    }
+
+    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
+        Arc::new(TestSource {
+            statistics: Some(statistics),
+            ..self.clone()
+        })
+    }
+
+    fn metrics(&self) -> &ExecutionPlanMetricsSet {
+        todo!("should not be called")
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        Ok(self
+            .statistics
+            .as_ref()
+            .expect("statistics not set")
+            .clone())
+    }
+
+    fn file_type(&self) -> &str {
+        "test"
+    }
+
+    fn fmt_extra(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                let support = format!(", pushdown_supported={}", self.support);
+
+                let predicate_string = self
+                    .predicate
+                    .as_ref()
+                    .map(|p| format!(", predicate={p}"))
+                    .unwrap_or_default();
+
+                write!(f, "{}{}", support, predicate_string)
+            }
+            DisplayFormatType::TreeRender => {
+                if let Some(predicate) = &self.predicate {
+                    writeln!(f, "pushdown_supported={}", fmt_sql(predicate.as_ref()))?;
+                    writeln!(f, "predicate={}", fmt_sql(predicate.as_ref()))?;
+                }
+                Ok(())
+            }
+        }
+    }
+
+    fn try_pushdown_filters(
+        &self,
+        mut fd: FilterDescription,
+        config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn FileSource>>> {
+        if self.support && config.execution.parquet.pushdown_filters {
+            if let Some(internal) = self.predicate.as_ref() {
+                fd.filters.push(Arc::clone(internal));
+            }
+            let all_filters = fd.take_description();
+
+            Ok(FilterPushdownResult {
+                support: FilterPushdownSupport::Supported {
+                    child_descriptions: vec![],
+                    op: Arc::new(TestSource {
+                        support: true,
+                        predicate: Some(conjunction(all_filters)),
+                        statistics: self.statistics.clone(), // should be updated in reality
+                    }),
+                    revisit: false,
+                },
+                remaining_description: FilterDescription::empty(),
+            })
+        } else {
+            Ok(filter_pushdown_not_supported(fd))
+        }
+    }
+}
+
+fn test_scan(support: bool) -> Arc<dyn ExecutionPlan> {
+    let schema = schema();
+    let source = Arc::new(TestSource::new(support));
+    let base_config = FileScanConfigBuilder::new(
+        ObjectStoreUrl::parse("test://").unwrap(),
+        Arc::clone(schema),
+        source,
+    )
+    .build();
+    DataSourceExec::from_data_source(base_config)
+}
+
+#[test]
+fn test_pushdown_into_scan() {
+    let scan = test_scan(true);
+    let predicate = col_lit_predicate("a", "foo", schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, scan).unwrap());
+
+    // expect the predicate to be pushed down into the DataSource
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{}, true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+/// Show that we can use config options to determine how to do pushdown.
+#[test]
+fn test_pushdown_into_scan_with_config_options() {
+    let scan = test_scan(true);
+    let predicate = col_lit_predicate("a", "foo", schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, scan).unwrap()) as _;
+
+    let mut cfg = ConfigOptions::default();
+    insta::assert_snapshot!(
+        OptimizationTest::new(
+            Arc::clone(&plan),
+            PushdownFilter {},
+            false
+        ),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: a@0 = foo
+          -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+    "
+    );
+
+    cfg.execution.parquet.pushdown_filters = true;
+    insta::assert_snapshot!(
+        OptimizationTest::new(
+            plan,
+            PushdownFilter {},
+            true
+        ),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_filter_collapse() {
+    // filter should be pushed down into the parquet scan with two filters
+    let scan = test_scan(true);
+    let predicate1 = col_lit_predicate("a", "foo", schema());
+    let filter1 = Arc::new(FilterExec::try_new(predicate1, scan).unwrap());
+    let predicate2 = col_lit_predicate("b", "bar", schema());
+    let plan = Arc::new(FilterExec::try_new(predicate2, filter1).unwrap());
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{}, true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: b@1 = bar
+        -   FilterExec: a@0 = foo
+        -     DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar AND a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_filter_with_projection() {
+    let scan = test_scan(true);
+    let projection = vec![1, 0];
+    let predicate = col_lit_predicate("a", "foo", schema());
+    let plan = Arc::new(
+        FilterExec::try_new(predicate, Arc::clone(&scan))
+            .unwrap()
+            .with_projection(Some(projection))
+            .unwrap(),
+    );
+
+    // expect the predicate to be pushed down into the DataSource but the FilterExec to be converted to ProjectionExec
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{}, true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo, projection=[b@1, a@0]
+        -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - ProjectionExec: expr=[b@1 as b, a@0 as a]
+          -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    ",
+    );
+
+    // add a test where the filter is on a column that isn't included in the output
+    let projection = vec![1];
+    let predicate = col_lit_predicate("a", "foo", schema());
+    let plan = Arc::new(
+        FilterExec::try_new(predicate, scan)
+            .unwrap()
+            .with_projection(Some(projection))
+            .unwrap(),
+    );
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{},true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo, projection=[b@1]
+        -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - ProjectionExec: expr=[b@1 as b]
+          -   DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_push_down_through_transparent_nodes() {
+    // expect the predicate to be pushed down into the DataSource
+    let scan = test_scan(true);
+    let coalesce = Arc::new(CoalesceBatchesExec::new(scan, 1));
+    let predicate = col_lit_predicate("a", "foo", schema());
+    let filter = Arc::new(FilterExec::try_new(predicate, coalesce).unwrap());
+    let repartition = Arc::new(
+        RepartitionExec::try_new(filter, Partitioning::RoundRobinBatch(1)).unwrap(),
+    );
+    let predicate = col_lit_predicate("b", "bar", schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, repartition).unwrap());
+
+    // expect the predicate to be pushed down into the DataSource
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{},true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: b@1 = bar
+        -   RepartitionExec: partitioning=RoundRobinBatch(1), input_partitions=0
+        -     FilterExec: a@0 = foo
+        -       CoalesceBatchesExec: target_batch_size=1
+        -         DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - RepartitionExec: partitioning=RoundRobinBatch(1), input_partitions=0
+          -   CoalesceBatchesExec: target_batch_size=1
+          -     DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar AND a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_no_pushdown_through_aggregates() {
+    // There are 2 important points here:
+    // 1. The outer filter **is not** pushed down at all because we haven't implemented pushdown support
+    //    yet for AggregateExec.
+    // 2. The inner filter **is** pushed down into the DataSource.
+    let scan = test_scan(true);
+
+    let coalesce = Arc::new(CoalesceBatchesExec::new(scan, 10));
+
+    let filter = Arc::new(
+        FilterExec::try_new(col_lit_predicate("a", "foo", schema()), coalesce).unwrap(),
+    );
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("a", schema()).unwrap()])
+                .schema(Arc::clone(schema()))
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+    let group_by = PhysicalGroupBy::new_single(vec![
+        (col("a", schema()).unwrap(), "a".to_string()),
+        (col("b", schema()).unwrap(), "b".to_string()),
+    ]);
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            filter,
+            Arc::clone(schema()),
+        )
+        .unwrap(),
+    );
+
+    let coalesce = Arc::new(CoalesceBatchesExec::new(aggregate, 100));
+
+    let predicate = col_lit_predicate("b", "bar", schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, coalesce).unwrap());
+
+    // expect the predicate to be pushed down into the DataSource
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, PushdownFilter{}, true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: b@1 = bar
+        -   CoalesceBatchesExec: target_batch_size=100
+        -     AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=PartiallySorted([0])
+        -       FilterExec: a@0 = foo
+        -         CoalesceBatchesExec: target_batch_size=10
+        -           DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: b@1 = bar
+          -   CoalesceBatchesExec: target_batch_size=100
+          -     AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt]
+          -       CoalesceBatchesExec: target_batch_size=10
+          -         DataSourceExec: file_groups={0 groups: []}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+/// Schema:
+/// a: String
+/// b: String
+/// c: f64
+static TEST_SCHEMA: OnceLock<SchemaRef> = OnceLock::new();
+
+fn schema() -> &'static SchemaRef {
+    TEST_SCHEMA.get_or_init(|| {
+        let fields = vec![
+            Field::new("a", DataType::Utf8, false),
+            Field::new("b", DataType::Utf8, false),
+            Field::new("c", DataType::Float64, false),
+        ];
+        Arc::new(Schema::new(fields))
+    })
+}
+
+/// Returns a predicate that is a binary expression col = lit
+fn col_lit_predicate(
+    column_name: &str,
+    scalar_value: impl Into<ScalarValue>,
+    schema: &Schema,
+) -> Arc<dyn PhysicalExpr> {
+    let scalar_value = scalar_value.into();
+    Arc::new(BinaryExpr::new(
+        Arc::new(Column::new_with_schema(column_name, schema).unwrap()),
+        Operator::Eq,
+        Arc::new(Literal::new(scalar_value)),
+    ))
+}
+
+/// A harness for testing physical optimizers.
+///
+/// You can use this to test the output of a physical optimizer rule using insta snapshots
+#[derive(Debug)]
+pub struct OptimizationTest {
+    input: Vec<String>,
+    output: Result<Vec<String>, String>,
+}
+
+impl OptimizationTest {
+    pub fn new<O>(
+        input_plan: Arc<dyn ExecutionPlan>,
+        opt: O,
+        allow_pushdown_filters: bool,
+    ) -> Self
+    where
+        O: PhysicalOptimizerRule,
+    {
+        let mut parquet_pushdown_config = ConfigOptions::default();
+        parquet_pushdown_config.execution.parquet.pushdown_filters =
+            allow_pushdown_filters;
+
+        let input = format_execution_plan(&input_plan);
+        let input_schema = input_plan.schema();
+
+        let output_result = opt.optimize(input_plan, &parquet_pushdown_config);
+        let output = output_result
+            .and_then(|plan| {
+                if opt.schema_check() && (plan.schema() != input_schema) {
+                    internal_err!(
+                        "Schema mismatch:\n\nBefore:\n{:?}\n\nAfter:\n{:?}",
+                        input_schema,
+                        plan.schema()
+                    )
+                } else {
+                    Ok(plan)
+                }
+            })
+            .map(|plan| format_execution_plan(&plan))
+            .map_err(|e| e.to_string());
+
+        Self { input, output }
+    }
+}
+
+impl Display for OptimizationTest {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        writeln!(f, "OptimizationTest:")?;
+        writeln!(f, "  input:")?;
+        for line in &self.input {
+            writeln!(f, "    - {line}")?;
+        }
+        writeln!(f, "  output:")?;
+        match &self.output {
+            Ok(output) => {
+                writeln!(f, "    Ok:")?;
+                for line in output {
+                    writeln!(f, "      - {line}")?;
+                }
+            }
+            Err(err) => {
+                writeln!(f, "    Err: {err}")?;
+            }
+        }
+        Ok(())
+    }
+}
+
+pub fn format_execution_plan(plan: &Arc<dyn ExecutionPlan>) -> Vec<String> {
+    format_lines(&displayable(plan.as_ref()).indent(false).to_string())
+}
+
+fn format_lines(s: &str) -> Vec<String> {
+    s.trim().split('\n').map(|s| s.to_string()).collect()
+}
diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
index 58eb866c590cc..eb517c42b0ebb 100644
--- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -18,7 +18,8 @@
 use std::sync::Arc;
 
 use crate::physical_optimizer::test_utils::{
-    check_integrity, sort_preserving_merge_exec, stream_exec_ordered_with_projection,
+    check_integrity, create_test_schema3, sort_preserving_merge_exec,
+    stream_exec_ordered_with_projection,
 };
 
 use datafusion::prelude::SessionContext;
@@ -40,13 +41,14 @@ use datafusion_physical_plan::{
 };
 use datafusion::datasource::source::DataSourceExec;
 use datafusion_common::tree_node::{TransformedResult, TreeNode};
-use datafusion_common::Result;
+use datafusion_common::{assert_contains, Result};
 use datafusion_expr::{JoinType, Operator};
 use datafusion_physical_expr::expressions::{self, col, Column};
 use datafusion_physical_expr::PhysicalSortExpr;
-use datafusion_physical_optimizer::enforce_sorting::replace_with_order_preserving_variants::{replace_with_order_preserving_variants, OrderPreservationContext};
+use datafusion_physical_optimizer::enforce_sorting::replace_with_order_preserving_variants::{plan_with_order_preserving_variants, replace_with_order_preserving_variants, OrderPreservationContext};
 use datafusion_common::config::ConfigOptions;
 
+use crate::physical_optimizer::enforce_sorting::parquet_exec_sorted;
 use object_store::memory::InMemory;
 use object_store::ObjectStore;
 use rstest::rstest;
@@ -1259,3 +1261,52 @@ fn memory_exec_sorted(
         ))
     })
 }
+
+#[test]
+fn test_plan_with_order_preserving_variants_preserves_fetch() -> Result<()> {
+    // Create a schema
+    let schema = create_test_schema3()?;
+    let parquet_sort_exprs = vec![crate::physical_optimizer::test_utils::sort_expr(
+        "a", &schema,
+    )];
+    let parquet_exec = parquet_exec_sorted(&schema, parquet_sort_exprs);
+    let coalesced = CoalescePartitionsExec::new(parquet_exec.clone())
+        .with_fetch(Some(10))
+        .unwrap();
+
+    // Test sort's fetch is greater than coalesce fetch, return error because it's not reasonable
+    let requirements = OrderPreservationContext::new(
+        coalesced.clone(),
+        false,
+        vec![OrderPreservationContext::new(
+            parquet_exec.clone(),
+            false,
+            vec![],
+        )],
+    );
+    let res = plan_with_order_preserving_variants(requirements, false, true, Some(15));
+    assert_contains!(res.unwrap_err().to_string(), "CoalescePartitionsExec fetch [10] should be greater than or equal to SortExec fetch [15]");
+
+    // Test sort is without fetch, expected to get the fetch value from the coalesced
+    let requirements = OrderPreservationContext::new(
+        coalesced.clone(),
+        false,
+        vec![OrderPreservationContext::new(
+            parquet_exec.clone(),
+            false,
+            vec![],
+        )],
+    );
+    let res = plan_with_order_preserving_variants(requirements, false, true, None)?;
+    assert_eq!(res.plan.fetch(), Some(10),);
+
+    // Test sort's fetch is less than coalesces fetch, expected to get the fetch value from the sort
+    let requirements = OrderPreservationContext::new(
+        coalesced,
+        false,
+        vec![OrderPreservationContext::new(parquet_exec, false, vec![])],
+    );
+    let res = plan_with_order_preserving_variants(requirements, false, true, Some(5))?;
+    assert_eq!(res.plan.fetch(), Some(5),);
+    Ok(())
+}
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 579049692e7dc..2a5597b9fb7ee 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -63,6 +63,7 @@ pub mod create_drop;
 pub mod explain_analyze;
 pub mod joins;
 mod path_partition;
+mod runtime_config;
 pub mod select;
 mod sql_api;
 
diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs
index bf8466d849f25..fa6c7432413f1 100644
--- a/datafusion/core/tests/sql/path_partition.rs
+++ b/datafusion/core/tests/sql/path_partition.rs
@@ -712,7 +712,7 @@ impl ObjectStore for MirroringObjectStore {
         let meta = ObjectMeta {
             location: location.clone(),
             last_modified: metadata.modified().map(chrono::DateTime::from).unwrap(),
-            size: metadata.len() as usize,
+            size: metadata.len(),
             e_tag: None,
             version: None,
         };
@@ -728,14 +728,15 @@ impl ObjectStore for MirroringObjectStore {
     async fn get_range(
         &self,
         location: &Path,
-        range: Range<usize>,
+        range: Range<u64>,
     ) -> object_store::Result<Bytes> {
         self.files.iter().find(|x| *x == location).unwrap();
         let path = std::path::PathBuf::from(&self.mirrored_file);
         let mut file = File::open(path).unwrap();
-        file.seek(SeekFrom::Start(range.start as u64)).unwrap();
+        file.seek(SeekFrom::Start(range.start)).unwrap();
 
         let to_read = range.end - range.start;
+        let to_read: usize = to_read.try_into().unwrap();
         let mut data = Vec::with_capacity(to_read);
         let read = file.take(to_read as u64).read_to_end(&mut data).unwrap();
         assert_eq!(read, to_read);
@@ -750,9 +751,10 @@ impl ObjectStore for MirroringObjectStore {
     fn list(
         &self,
         prefix: Option<&Path>,
-    ) -> BoxStream<'_, object_store::Result<ObjectMeta>> {
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
         let prefix = prefix.cloned().unwrap_or_default();
-        Box::pin(stream::iter(self.files.iter().filter_map(
+        let size = self.file_size;
+        Box::pin(stream::iter(self.files.clone().into_iter().filter_map(
             move |location| {
                 // Don't return for exact prefix match
                 let filter = location
@@ -762,9 +764,9 @@ impl ObjectStore for MirroringObjectStore {
 
                 filter.then(|| {
                     Ok(ObjectMeta {
-                        location: location.clone(),
+                        location,
                         last_modified: Utc.timestamp_nanos(0),
-                        size: self.file_size as usize,
+                        size,
                         e_tag: None,
                         version: None,
                     })
@@ -802,7 +804,7 @@ impl ObjectStore for MirroringObjectStore {
                 let object = ObjectMeta {
                     location: k.clone(),
                     last_modified: Utc.timestamp_nanos(0),
-                    size: self.file_size as usize,
+                    size: self.file_size,
                     e_tag: None,
                     version: None,
                 };
diff --git a/datafusion/core/tests/sql/runtime_config.rs b/datafusion/core/tests/sql/runtime_config.rs
new file mode 100644
index 0000000000000..18e07bb61ed94
--- /dev/null
+++ b/datafusion/core/tests/sql/runtime_config.rs
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for runtime configuration SQL interface
+
+use std::sync::Arc;
+
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::context::TaskContext;
+use datafusion_physical_plan::common::collect;
+
+#[tokio::test]
+async fn test_memory_limit_with_spill() {
+    let ctx = SessionContext::new();
+
+    ctx.sql("SET datafusion.runtime.memory_limit = '1M'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    ctx.sql("SET datafusion.execution.sort_spill_reservation_bytes = 0")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let query = "select * from generate_series(1,10000000) as t1(v1) order by v1;";
+    let df = ctx.sql(query).await.unwrap();
+
+    let plan = df.create_physical_plan().await.unwrap();
+    let task_ctx = Arc::new(TaskContext::from(&ctx.state()));
+    let stream = plan.execute(0, task_ctx).unwrap();
+
+    let _results = collect(stream).await;
+    let metrics = plan.metrics().unwrap();
+    let spill_count = metrics.spill_count().unwrap();
+    assert!(spill_count > 0, "Expected spills but none occurred");
+}
+
+#[tokio::test]
+async fn test_no_spill_with_adequate_memory() {
+    let ctx = SessionContext::new();
+
+    ctx.sql("SET datafusion.runtime.memory_limit = '10M'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+    ctx.sql("SET datafusion.execution.sort_spill_reservation_bytes = 0")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let query = "select * from generate_series(1,100000) as t1(v1) order by v1;";
+    let df = ctx.sql(query).await.unwrap();
+
+    let plan = df.create_physical_plan().await.unwrap();
+    let task_ctx = Arc::new(TaskContext::from(&ctx.state()));
+    let stream = plan.execute(0, task_ctx).unwrap();
+
+    let _results = collect(stream).await;
+    let metrics = plan.metrics().unwrap();
+    let spill_count = metrics.spill_count().unwrap();
+    assert_eq!(spill_count, 0, "Expected no spills but some occurred");
+}
+
+#[tokio::test]
+async fn test_multiple_configs() {
+    let ctx = SessionContext::new();
+
+    ctx.sql("SET datafusion.runtime.memory_limit = '100M'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+    ctx.sql("SET datafusion.execution.batch_size = '2048'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let query = "select * from generate_series(1,100000) as t1(v1) order by v1;";
+    let result = ctx.sql(query).await.unwrap().collect().await;
+
+    assert!(result.is_ok(), "Should not fail due to memory limit");
+
+    let state = ctx.state();
+    let batch_size = state.config().options().execution.batch_size;
+    assert_eq!(batch_size, 2048);
+}
+
+#[tokio::test]
+async fn test_memory_limit_enforcement() {
+    let ctx = SessionContext::new();
+
+    ctx.sql("SET datafusion.runtime.memory_limit = '1M'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let query = "select * from generate_series(1,100000) as t1(v1) order by v1;";
+    let result = ctx.sql(query).await.unwrap().collect().await;
+
+    assert!(result.is_err(), "Should fail due to memory limit");
+
+    ctx.sql("SET datafusion.runtime.memory_limit = '100M'")
+        .await
+        .unwrap()
+        .collect()
+        .await
+        .unwrap();
+
+    let result = ctx.sql(query).await.unwrap().collect().await;
+
+    assert!(result.is_ok(), "Should not fail due to memory limit");
+}
+
+#[tokio::test]
+async fn test_invalid_memory_limit() {
+    let ctx = SessionContext::new();
+
+    let result = ctx
+        .sql("SET datafusion.runtime.memory_limit = '100X'")
+        .await;
+
+    assert!(result.is_err());
+    let error_message = result.unwrap_err().to_string();
+    assert!(error_message.contains("Unsupported unit 'X'"));
+}
+
+#[tokio::test]
+async fn test_unknown_runtime_config() {
+    let ctx = SessionContext::new();
+
+    let result = ctx
+        .sql("SET datafusion.runtime.unknown_config = 'value'")
+        .await;
+
+    assert!(result.is_err());
+    let error_message = result.unwrap_err().to_string();
+    assert!(error_message.contains("Unknown runtime configuration"));
+}
diff --git a/datafusion/core/tests/sql/sql_api.rs b/datafusion/core/tests/sql/sql_api.rs
index 034d6fa23d9cb..ec086bcc50c76 100644
--- a/datafusion/core/tests/sql/sql_api.rs
+++ b/datafusion/core/tests/sql/sql_api.rs
@@ -19,6 +19,23 @@ use datafusion::prelude::*;
 
 use tempfile::TempDir;
 
+#[tokio::test]
+async fn test_window_function() {
+    let ctx = SessionContext::new();
+    let df = ctx
+        .sql(
+            r#"SELECT
+        t1.v1,
+        SUM(t1.v1) OVER w + 1
+        FROM
+        generate_series(1, 10000) AS t1(v1)
+        WINDOW
+        w AS (ORDER BY t1.v1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW);"#,
+        )
+        .await;
+    assert!(df.is_ok());
+}
+
 #[tokio::test]
 async fn unsupported_ddl_returns_error() {
     // Verify SessionContext::with_sql_options errors appropriately
diff --git a/datafusion/core/tests/tracing/asserting_tracer.rs b/datafusion/core/tests/tracing/asserting_tracer.rs
new file mode 100644
index 0000000000000..292e066e5f121
--- /dev/null
+++ b/datafusion/core/tests/tracing/asserting_tracer.rs
@@ -0,0 +1,142 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::collections::VecDeque;
+use std::ops::Deref;
+use std::sync::{Arc, LazyLock};
+
+use datafusion_common::{HashMap, HashSet};
+use datafusion_common_runtime::{set_join_set_tracer, JoinSetTracer};
+use futures::future::BoxFuture;
+use tokio::sync::{Mutex, MutexGuard};
+
+/// Initializes the global join set tracer with the asserting tracer.
+/// Call this function before spawning any tasks that should be traced.
+pub fn init_asserting_tracer() {
+    set_join_set_tracer(ASSERTING_TRACER.deref())
+        .expect("Failed to initialize asserting tracer");
+}
+
+/// Verifies that the current task has a traceable ancestry back to "root".
+///
+/// The function performs a breadth-first search (BFS) in the global spawn graph:
+/// - It starts at the current task and follows parent links.
+/// - If it reaches the "root" task, the ancestry is valid.
+/// - If a task is missing from the graph, it panics.
+///
+/// Note: Tokio task IDs are unique only while a task is active.
+/// Once a task completes, its ID may be reused.
+pub async fn assert_traceability() {
+    // Acquire the spawn graph lock.
+    let spawn_graph = acquire_spawn_graph().await;
+
+    // Start BFS with the current task.
+    let mut tasks_to_check = VecDeque::from(vec![current_task()]);
+
+    while let Some(task_id) = tasks_to_check.pop_front() {
+        if task_id == "root" {
+            // Ancestry reached the root.
+            continue;
+        }
+        // Obtain parent tasks, panicking if the task is not present.
+        let parents = spawn_graph
+            .get(&task_id)
+            .expect("Task ID not found in spawn graph");
+        // Queue each parent for checking.
+        for parent in parents {
+            tasks_to_check.push_back(parent.clone());
+        }
+    }
+}
+
+/// Tracer that maintains a graph of task ancestry for tracing purposes.
+///
+/// For each task, it records a set of parent task IDs to ensure that every
+/// asynchronous task can be traced back to "root".
+struct AssertingTracer {
+    /// An asynchronous map from task IDs to their parent task IDs.
+    spawn_graph: Arc<Mutex<HashMap<String, HashSet<String>>>>,
+}
+
+/// Lazily initialized global instance of `AssertingTracer`.
+static ASSERTING_TRACER: LazyLock<AssertingTracer> = LazyLock::new(AssertingTracer::new);
+
+impl AssertingTracer {
+    /// Creates a new `AssertingTracer` with an empty spawn graph.
+    fn new() -> Self {
+        Self {
+            spawn_graph: Arc::default(),
+        }
+    }
+}
+
+/// Returns the current task's ID as a string, or "root" if unavailable.
+///
+/// Tokio guarantees task IDs are unique only among active tasks,
+/// so completed tasks may have their IDs reused.
+fn current_task() -> String {
+    tokio::task::try_id()
+        .map(|id| format!("{id}"))
+        .unwrap_or_else(|| "root".to_string())
+}
+
+/// Asynchronously locks and returns the spawn graph.
+///
+/// The returned guard allows inspection or modification of task ancestry.
+async fn acquire_spawn_graph<'a>() -> MutexGuard<'a, HashMap<String, HashSet<String>>> {
+    ASSERTING_TRACER.spawn_graph.lock().await
+}
+
+/// Registers the current task as a child of `parent_id` in the spawn graph.
+async fn register_task(parent_id: String) {
+    acquire_spawn_graph()
+        .await
+        .entry(current_task())
+        .or_insert_with(HashSet::new)
+        .insert(parent_id);
+}
+
+impl JoinSetTracer for AssertingTracer {
+    /// Wraps an asynchronous future to record its parent task before execution.
+    fn trace_future(
+        &self,
+        fut: BoxFuture<'static, Box<dyn Any + Send>>,
+    ) -> BoxFuture<'static, Box<dyn Any + Send>> {
+        // Capture the parent task ID.
+        let parent_id = current_task();
+        Box::pin(async move {
+            // Register the parent-child relationship.
+            register_task(parent_id).await;
+            // Execute the wrapped future.
+            fut.await
+        })
+    }
+
+    /// Wraps a blocking closure to record its parent task before execution.
+    fn trace_block(
+        &self,
+        f: Box<dyn FnOnce() -> Box<dyn Any + Send> + Send>,
+    ) -> Box<dyn FnOnce() -> Box<dyn Any + Send> + Send> {
+        let parent_id = current_task();
+        Box::new(move || {
+            // Synchronously record the task relationship.
+            futures::executor::block_on(register_task(parent_id));
+            f()
+        })
+    }
+}
diff --git a/datafusion/core/tests/tracing/mod.rs b/datafusion/core/tests/tracing/mod.rs
new file mode 100644
index 0000000000000..787dd9f4f3cbc
--- /dev/null
+++ b/datafusion/core/tests/tracing/mod.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # JoinSetTracer Integration Tests
+//!
+//! These are smoke tests that verify `JoinSetTracer` can be correctly injected into DataFusion.
+//!
+//! They run a SQL query that reads Parquet data and performs an aggregation,
+//! which causes DataFusion to spawn multiple tasks.
+//! The object store is wrapped to assert that every task can be traced back to the root.
+//!
+//! These tests don't cover all edge cases, but they should fail if changes to
+//! DataFusion's task spawning break tracing.
+
+mod asserting_tracer;
+mod traceable_object_store;
+
+use asserting_tracer::init_asserting_tracer;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::listing::ListingOptions;
+use datafusion::prelude::*;
+use datafusion::test_util::parquet_test_data;
+use datafusion_common::assert_contains;
+use datafusion_common_runtime::SpawnedTask;
+use log::info;
+use object_store::local::LocalFileSystem;
+use std::sync::Arc;
+use traceable_object_store::traceable_object_store;
+use url::Url;
+
+/// Combined test that first verifies the query panics when no tracer is registered,
+/// then initializes the tracer and confirms the query runs successfully.
+///
+/// Using a single test function prevents global tracer leakage between tests.
+#[tokio::test(flavor = "multi_thread", worker_threads = 8)]
+async fn test_tracer_injection() {
+    // Without initializing the tracer, run the query.
+    // Spawn the query in a separate task so we can catch its panic.
+    info!("Running query without tracer");
+    // The absence of the tracer should cause the task to panic inside the `TraceableObjectStore`.
+    let untraced_result = SpawnedTask::spawn(run_query()).join().await;
+    if let Err(e) = untraced_result {
+        // Check if the error message contains the expected error.
+        assert!(e.is_panic(), "Expected a panic, but got: {:?}", e);
+        assert_contains!(e.to_string(), "Task ID not found in spawn graph");
+        info!("Caught expected panic: {}", e);
+    } else {
+        panic!("Expected the task to panic, but it completed successfully");
+    };
+
+    // Initialize the asserting tracer and run the query.
+    info!("Initializing tracer and re-running query");
+    init_asserting_tracer();
+    SpawnedTask::spawn(run_query()).join().await.unwrap(); // Should complete without panics or errors.
+}
+
+/// Executes a sample task-spawning SQL query using a traceable object store.
+async fn run_query() {
+    info!("Starting query execution");
+
+    // Create a new session context
+    let ctx = SessionContext::new();
+
+    // Get the test data directory
+    let test_data = parquet_test_data();
+
+    // Define a Parquet file format with pruning enabled
+    let file_format = ParquetFormat::default().with_enable_pruning(true);
+
+    // Set listing options for the parquet file with a specific extension
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_file_extension("alltypes_tiny_pages_plain.parquet");
+
+    // Wrap the local file system in a traceable object store to verify task traceability.
+    let local_fs = Arc::new(LocalFileSystem::new());
+    let traceable_store = traceable_object_store(local_fs);
+
+    // Register the traceable object store with a test URL.
+    let url = Url::parse("test://").unwrap();
+    ctx.register_object_store(&url, traceable_store.clone());
+
+    // Register a listing table from the test data directory.
+    let table_path = format!("test://{}/", test_data);
+    ctx.register_listing_table("alltypes", &table_path, listing_options, None, None)
+        .await
+        .expect("Failed to register table");
+
+    // Define and execute an SQL query against the registered table, which should
+    // spawn multiple tasks due to the aggregation and parquet file read.
+    let sql = "SELECT COUNT(*), string_col FROM alltypes GROUP BY string_col";
+    let result_batches = ctx.sql(sql).await.unwrap().collect().await.unwrap();
+
+    info!("Query complete: {} batches returned", result_batches.len());
+}
diff --git a/datafusion/core/tests/tracing/traceable_object_store.rs b/datafusion/core/tests/tracing/traceable_object_store.rs
new file mode 100644
index 0000000000000..dfcafc3a63da1
--- /dev/null
+++ b/datafusion/core/tests/tracing/traceable_object_store.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Object store implementation used for testing
+
+use crate::tracing::asserting_tracer::assert_traceability;
+use futures::stream::BoxStream;
+use object_store::{
+    path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult,
+};
+use std::fmt::{Debug, Display, Formatter};
+use std::sync::Arc;
+
+/// Returns an `ObjectStore` that asserts it can trace its calls back to the root tokio task.
+pub fn traceable_object_store(
+    object_store: Arc<dyn ObjectStore>,
+) -> Arc<dyn ObjectStore> {
+    Arc::new(TraceableObjectStore::new(object_store))
+}
+
+/// An object store that asserts it can trace all its calls back to the root tokio task.
+#[derive(Debug)]
+struct TraceableObjectStore {
+    inner: Arc<dyn ObjectStore>,
+}
+
+impl TraceableObjectStore {
+    fn new(inner: Arc<dyn ObjectStore>) -> Self {
+        Self { inner }
+    }
+}
+
+impl Display for TraceableObjectStore {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        Display::fmt(&self.inner, f)
+    }
+}
+
+/// All trait methods are forwarded to the inner object store,
+/// after asserting they can trace their calls back to the root tokio task.
+#[async_trait::async_trait]
+impl ObjectStore for TraceableObjectStore {
+    async fn put_opts(
+        &self,
+        location: &Path,
+        payload: PutPayload,
+        opts: PutOptions,
+    ) -> object_store::Result<PutResult> {
+        assert_traceability().await;
+        self.inner.put_opts(location, payload, opts).await
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        location: &Path,
+        opts: PutMultipartOpts,
+    ) -> object_store::Result<Box<dyn MultipartUpload>> {
+        assert_traceability().await;
+        self.inner.put_multipart_opts(location, opts).await
+    }
+
+    async fn get_opts(
+        &self,
+        location: &Path,
+        options: GetOptions,
+    ) -> object_store::Result<GetResult> {
+        assert_traceability().await;
+        self.inner.get_opts(location, options).await
+    }
+
+    async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
+        assert_traceability().await;
+        self.inner.head(location).await
+    }
+
+    async fn delete(&self, location: &Path) -> object_store::Result<()> {
+        assert_traceability().await;
+        self.inner.delete(location).await
+    }
+
+    fn list(
+        &self,
+        prefix: Option<&Path>,
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+        futures::executor::block_on(assert_traceability());
+        self.inner.list(prefix)
+    }
+
+    async fn list_with_delimiter(
+        &self,
+        prefix: Option<&Path>,
+    ) -> object_store::Result<ListResult> {
+        assert_traceability().await;
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, from: &Path, to: &Path) -> object_store::Result<()> {
+        assert_traceability().await;
+        self.inner.copy(from, to).await
+    }
+
+    async fn copy_if_not_exists(
+        &self,
+        from: &Path,
+        to: &Path,
+    ) -> object_store::Result<()> {
+        assert_traceability().await;
+        self.inner.copy_if_not_exists(from, to).await
+    }
+}
diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index 28394f0b9dfaf..7c56507acd451 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -633,7 +633,7 @@ fn odd_count(arr: &Int64Array) -> i64 {
 
 /// returns an array of num_rows that has the number of odd values in `arr`
 fn odd_count_arr(arr: &Int64Array, num_rows: usize) -> ArrayRef {
-    let array: Int64Array = std::iter::repeat(odd_count(arr)).take(num_rows).collect();
+    let array: Int64Array = std::iter::repeat_n(odd_count(arr), num_rows).collect();
     Arc::new(array)
 }
 
diff --git a/datafusion/datasource-csv/src/source.rs b/datafusion/datasource-csv/src/source.rs
index 6db4d18703204..f5d45cd3fc881 100644
--- a/datafusion/datasource-csv/src/source.rs
+++ b/datafusion/datasource-csv/src/source.rs
@@ -704,6 +704,7 @@ impl FileOpener for CsvOpener {
             let result = store.get_opts(file_meta.location(), options).await?;
 
             match result.payload {
+                #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(mut file, _) => {
                     let is_whole_file_scanned = file_meta.range.is_none();
                     let decoder = if is_whole_file_scanned {
diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs
index a6c52312e4127..8d0515804fc7b 100644
--- a/datafusion/datasource-json/src/file_format.rs
+++ b/datafusion/datasource-json/src/file_format.rs
@@ -209,6 +209,7 @@ impl FileFormat for JsonFormat {
 
             let r = store.as_ref().get(&object.location).await?;
             let schema = match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(file, _) => {
                     let decoder = file_compression_type.convert_read(file)?;
                     let mut reader = BufReader::new(decoder);
diff --git a/datafusion/datasource-json/src/source.rs b/datafusion/datasource-json/src/source.rs
index f1adccf9ded7d..ee96d050966d6 100644
--- a/datafusion/datasource-json/src/source.rs
+++ b/datafusion/datasource-json/src/source.rs
@@ -355,6 +355,7 @@ impl FileOpener for JsonOpener {
             let result = store.get_opts(file_meta.location(), options).await?;
 
             match result.payload {
+                #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(mut file, _) => {
                     let bytes = match file_meta.range {
                         None => file_compression_type.convert_read(file)?,
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index 1d9a67fd2eb6d..ee4db50a6eda5 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -24,9 +24,18 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use arrow::array::RecordBatch;
+use arrow::datatypes::{Fields, Schema, SchemaRef, TimeUnit};
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
+use datafusion_datasource::write::{create_writer, get_writer_schema, SharedBuffer};
+
+use datafusion_datasource::file_format::{
+    FileFormat, FileFormatFactory, FilePushdownSupport,
+};
+use datafusion_datasource::write::demux::DemuxedStreamReceiver;
+
 use arrow::compute::sum;
 use arrow::datatypes::{DataType, Field, FieldRef};
-use arrow::datatypes::{Fields, Schema, SchemaRef};
 use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions};
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::stats::Precision;
@@ -38,15 +47,8 @@ use datafusion_common::{HashMap, Statistics};
 use datafusion_common_runtime::{JoinSet, SpawnedTask};
 use datafusion_datasource::display::FileGroupDisplay;
 use datafusion_datasource::file::FileSource;
-use datafusion_datasource::file_compression_type::FileCompressionType;
-use datafusion_datasource::file_format::{
-    FileFormat, FileFormatFactory, FilePushdownSupport,
-};
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
-use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
 use datafusion_datasource::sink::{DataSink, DataSinkExec};
-use datafusion_datasource::write::demux::DemuxedStreamReceiver;
-use datafusion_datasource::write::{create_writer, get_writer_schema, SharedBuffer};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
@@ -59,7 +61,7 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
 
 use crate::can_expr_be_pushed_down_with_schemas;
-use crate::source::ParquetSource;
+use crate::source::{parse_coerce_int96_string, ParquetSource};
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion_datasource::source::DataSourceExec;
@@ -76,11 +78,13 @@ use parquet::arrow::arrow_writer::{
 };
 use parquet::arrow::async_reader::MetadataFetch;
 use parquet::arrow::{parquet_to_arrow_schema, ArrowSchemaConverter, AsyncArrowWriter};
+use parquet::basic::Type;
 use parquet::errors::ParquetError;
 use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData};
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
 use parquet::file::writer::SerializedFileWriter;
 use parquet::format::FileMetaData;
+use parquet::schema::types::SchemaDescriptor;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
 
@@ -268,6 +272,15 @@ impl ParquetFormat {
         self.options.global.binary_as_string = binary_as_string;
         self
     }
+
+    pub fn coerce_int96(&self) -> Option<String> {
+        self.options.global.coerce_int96.clone()
+    }
+
+    pub fn with_coerce_int96(mut self, time_unit: Option<String>) -> Self {
+        self.options.global.coerce_int96 = time_unit;
+        self
+    }
 }
 
 /// Clears all metadata (Schema level and field level) on an iterator
@@ -291,9 +304,10 @@ async fn fetch_schema_with_location(
     store: &dyn ObjectStore,
     file: &ObjectMeta,
     metadata_size_hint: Option<usize>,
+    coerce_int96: Option<TimeUnit>,
 ) -> Result<(Path, Schema)> {
     let loc_path = file.location.clone();
-    let schema = fetch_schema(store, file, metadata_size_hint).await?;
+    let schema = fetch_schema(store, file, metadata_size_hint, coerce_int96).await?;
     Ok((loc_path, schema))
 }
 
@@ -324,12 +338,17 @@ impl FileFormat for ParquetFormat {
         store: &Arc<dyn ObjectStore>,
         objects: &[ObjectMeta],
     ) -> Result<SchemaRef> {
+        let coerce_int96 = match self.coerce_int96() {
+            Some(time_unit) => Some(parse_coerce_int96_string(time_unit.as_str())?),
+            None => None,
+        };
         let mut schemas: Vec<_> = futures::stream::iter(objects)
             .map(|object| {
                 fetch_schema_with_location(
                     store.as_ref(),
                     object,
                     self.metadata_size_hint(),
+                    coerce_int96,
                 )
             })
             .boxed() // Workaround https://github.com/rust-lang/rust/issues/64552
@@ -569,6 +588,46 @@ pub fn apply_file_schema_type_coercions(
     ))
 }
 
+/// Coerces the file schema's Timestamps to the provided TimeUnit if Parquet schema contains INT96.
+pub fn coerce_int96_to_resolution(
+    parquet_schema: &SchemaDescriptor,
+    file_schema: &Schema,
+    time_unit: &TimeUnit,
+) -> Option<Schema> {
+    let mut transform = false;
+    let parquet_fields: HashMap<_, _> = parquet_schema
+        .columns()
+        .iter()
+        .map(|f| {
+            let dt = f.physical_type();
+            if dt.eq(&Type::INT96) {
+                transform = true;
+            }
+            (f.name(), dt)
+        })
+        .collect();
+
+    if !transform {
+        return None;
+    }
+
+    let transformed_fields: Vec<Arc<Field>> = file_schema
+        .fields
+        .iter()
+        .map(|field| match parquet_fields.get(field.name().as_str()) {
+            Some(Type::INT96) => {
+                field_with_new_type(field, DataType::Timestamp(*time_unit, None))
+            }
+            _ => Arc::clone(field),
+        })
+        .collect();
+
+    Some(Schema::new_with_metadata(
+        transformed_fields,
+        file_schema.metadata.clone(),
+    ))
+}
+
 /// Coerces the file schema if the table schema uses a view type.
 #[deprecated(
     since = "47.0.0",
@@ -735,10 +794,7 @@ impl<'a> ObjectStoreFetch<'a> {
 }
 
 impl MetadataFetch for ObjectStoreFetch<'_> {
-    fn fetch(
-        &mut self,
-        range: Range<usize>,
-    ) -> BoxFuture<'_, Result<Bytes, ParquetError>> {
+    fn fetch(&mut self, range: Range<u64>) -> BoxFuture<'_, Result<Bytes, ParquetError>> {
         async {
             self.store
                 .get_range(&self.meta.location, range)
@@ -775,6 +831,7 @@ async fn fetch_schema(
     store: &dyn ObjectStore,
     file: &ObjectMeta,
     metadata_size_hint: Option<usize>,
+    coerce_int96: Option<TimeUnit>,
 ) -> Result<Schema> {
     let metadata = fetch_parquet_metadata(store, file, metadata_size_hint).await?;
     let file_metadata = metadata.file_metadata();
@@ -782,6 +839,11 @@ async fn fetch_schema(
         file_metadata.schema_descr(),
         file_metadata.key_value_metadata(),
     )?;
+    let schema = coerce_int96
+        .and_then(|time_unit| {
+            coerce_int96_to_resolution(file_metadata.schema_descr(), &schema, &time_unit)
+        })
+        .unwrap_or(schema);
     Ok(schema)
 }
 
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 732fef47d5a75..cfe8213f86e4b 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -22,25 +22,27 @@ use std::sync::Arc;
 use crate::page_filter::PagePruningAccessPlanFilter;
 use crate::row_group_filter::RowGroupAccessPlanFilter;
 use crate::{
-    apply_file_schema_type_coercions, row_filter, should_enable_page_index,
-    ParquetAccessPlan, ParquetFileMetrics, ParquetFileReaderFactory,
+    apply_file_schema_type_coercions, coerce_int96_to_resolution, row_filter,
+    should_enable_page_index, ParquetAccessPlan, ParquetFileMetrics,
+    ParquetFileReaderFactory,
 };
 use datafusion_datasource::file_meta::FileMeta;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 
-use arrow::datatypes::SchemaRef;
+use arrow::datatypes::{SchemaRef, TimeUnit};
 use arrow::error::ArrowError;
 use datafusion_common::{exec_err, Result};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_optimizer::pruning::PruningPredicate;
-use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
+use datafusion_physical_plan::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder};
 
 use futures::{StreamExt, TryStreamExt};
 use log::debug;
 use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::file::metadata::ParquetMetaDataReader;
 
 /// Implements [`FileOpener`] for a parquet file
 pub(super) struct ParquetOpener {
@@ -54,10 +56,6 @@ pub(super) struct ParquetOpener {
     pub limit: Option<usize>,
     /// Optional predicate to apply during the scan
     pub predicate: Option<Arc<dyn PhysicalExpr>>,
-    /// Optional pruning predicate applied to row group statistics
-    pub pruning_predicate: Option<Arc<PruningPredicate>>,
-    /// Optional pruning predicate applied to data page statistics
-    pub page_pruning_predicate: Option<Arc<PagePruningAccessPlanFilter>>,
     /// Schema of the output table
     pub table_schema: SchemaRef,
     /// Optional hint for how large the initial request to read parquet metadata
@@ -80,6 +78,10 @@ pub(super) struct ParquetOpener {
     pub enable_bloom_filter: bool,
     /// Schema adapter factory
     pub schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    /// Should row group pruning be applied
+    pub enable_row_group_stats_pruning: bool,
+    /// Coerce INT96 timestamps to specific TimeUnit
+    pub coerce_int96: Option<TimeUnit>,
 }
 
 impl FileOpener for ParquetOpener {
@@ -92,7 +94,7 @@ impl FileOpener for ParquetOpener {
 
         let metadata_size_hint = file_meta.metadata_size_hint.or(self.metadata_size_hint);
 
-        let mut reader: Box<dyn AsyncFileReader> =
+        let mut async_file_reader: Box<dyn AsyncFileReader> =
             self.parquet_file_reader_factory.create_reader(
                 self.partition_index,
                 file_meta,
@@ -109,47 +111,100 @@ impl FileOpener for ParquetOpener {
             .schema_adapter_factory
             .create(projected_schema, Arc::clone(&self.table_schema));
         let predicate = self.predicate.clone();
-        let pruning_predicate = self.pruning_predicate.clone();
-        let page_pruning_predicate = self.page_pruning_predicate.clone();
         let table_schema = Arc::clone(&self.table_schema);
         let reorder_predicates = self.reorder_filters;
         let pushdown_filters = self.pushdown_filters;
-        let enable_page_index = should_enable_page_index(
-            self.enable_page_index,
-            &self.page_pruning_predicate,
-        );
+        let coerce_int96 = self.coerce_int96;
         let enable_bloom_filter = self.enable_bloom_filter;
+        let enable_row_group_stats_pruning = self.enable_row_group_stats_pruning;
         let limit = self.limit;
 
-        Ok(Box::pin(async move {
-            let options = ArrowReaderOptions::new().with_page_index(enable_page_index);
+        let predicate_creation_errors = MetricBuilder::new(&self.metrics)
+            .global_counter("num_predicate_creation_errors");
+
+        let enable_page_index = self.enable_page_index;
 
+        Ok(Box::pin(async move {
+            // Don't load the page index yet. Since it is not stored inline in
+            // the footer, loading the page index if it is not needed will do
+            // unecessary I/O. We decide later if it is needed to evaluate the
+            // pruning predicates. Thus default to not requesting if from the
+            // underlying reader.
+            let mut options = ArrowReaderOptions::new().with_page_index(false);
             let mut metadata_timer = file_metrics.metadata_load_time.timer();
-            let metadata =
-                ArrowReaderMetadata::load_async(&mut reader, options.clone()).await?;
-            let mut schema = Arc::clone(metadata.schema());
 
-            // read with view types
-            if let Some(merged) = apply_file_schema_type_coercions(&table_schema, &schema)
+            // Begin by loading the metadata from the underlying reader (note
+            // the returned metadata may actually include page indexes as some
+            // readers may return page indexes even when not requested -- for
+            // example when they are cached)
+            let mut reader_metadata =
+                ArrowReaderMetadata::load_async(&mut async_file_reader, options.clone())
+                    .await?;
+
+            // Note about schemas: we are actually dealing with **3 different schemas** here:
+            // - The table schema as defined by the TableProvider. This is what the user sees, what they get when they `SELECT * FROM table`, etc.
+            // - The "virtual" file schema: this is the table schema minus any hive partition columns and projections. This is what the file schema is coerced to.
+            // - The physical file schema: this is the schema as defined by the parquet file. This is what the parquet file actually contains.
+            let mut physical_file_schema = Arc::clone(reader_metadata.schema());
+
+            // The schema loaded from the file may not be the same as the
+            // desired schema (for example if we want to instruct the parquet
+            // reader to read strings using Utf8View instead). Update if necessary
+            if let Some(merged) =
+                apply_file_schema_type_coercions(&table_schema, &physical_file_schema)
             {
-                schema = Arc::new(merged);
+                physical_file_schema = Arc::new(merged);
+                options = options.with_schema(Arc::clone(&physical_file_schema));
+                reader_metadata = ArrowReaderMetadata::try_new(
+                    Arc::clone(reader_metadata.metadata()),
+                    options.clone(),
+                )?;
             }
 
-            let options = ArrowReaderOptions::new()
-                .with_page_index(enable_page_index)
-                .with_schema(Arc::clone(&schema));
-            let metadata =
-                ArrowReaderMetadata::try_new(Arc::clone(metadata.metadata()), options)?;
+            if coerce_int96.is_some() {
+                if let Some(merged) = coerce_int96_to_resolution(
+                    reader_metadata.parquet_schema(),
+                    &physical_file_schema,
+                    &(coerce_int96.unwrap()),
+                ) {
+                    physical_file_schema = Arc::new(merged);
+                    options = options.with_schema(Arc::clone(&physical_file_schema));
+                    reader_metadata = ArrowReaderMetadata::try_new(
+                        Arc::clone(reader_metadata.metadata()),
+                        options.clone(),
+                    )?;
+                }
+            }
 
-            metadata_timer.stop();
+            // Build predicates for this specific file
+            let (pruning_predicate, page_pruning_predicate) = build_pruning_predicates(
+                &predicate,
+                &physical_file_schema,
+                &predicate_creation_errors,
+            );
 
-            let mut builder =
-                ParquetRecordBatchStreamBuilder::new_with_metadata(reader, metadata);
+            // The page index is not stored inline in the parquet footer so the
+            // code above may not have read the page index structures yet. If we
+            // need them for reading and they aren't yet loaded, we need to load them now.
+            if should_enable_page_index(enable_page_index, &page_pruning_predicate) {
+                reader_metadata = load_page_index(
+                    reader_metadata,
+                    &mut async_file_reader,
+                    // Since we're manually loading the page index the option here should not matter but we pass it in for consistency
+                    options.with_page_index(true),
+                )
+                .await?;
+            }
 
-            let file_schema = Arc::clone(builder.schema());
+            metadata_timer.stop();
+
+            let mut builder = ParquetRecordBatchStreamBuilder::new_with_metadata(
+                async_file_reader,
+                reader_metadata,
+            );
 
             let (schema_mapping, adapted_projections) =
-                schema_adapter.map_schema(&file_schema)?;
+                schema_adapter.map_schema(&physical_file_schema)?;
 
             let mask = ProjectionMask::roots(
                 builder.parquet_schema(),
@@ -160,7 +215,7 @@ impl FileOpener for ParquetOpener {
             if let Some(predicate) = pushdown_filters.then_some(predicate).flatten() {
                 let row_filter = row_filter::build_row_filter(
                     &predicate,
-                    &file_schema,
+                    &physical_file_schema,
                     &table_schema,
                     builder.metadata(),
                     reorder_predicates,
@@ -197,18 +252,20 @@ impl FileOpener for ParquetOpener {
             }
             // If there is a predicate that can be evaluated against the metadata
             if let Some(predicate) = predicate.as_ref() {
-                row_groups.prune_by_statistics(
-                    &file_schema,
-                    builder.parquet_schema(),
-                    rg_metadata,
-                    predicate,
-                    &file_metrics,
-                );
+                if enable_row_group_stats_pruning {
+                    row_groups.prune_by_statistics(
+                        &physical_file_schema,
+                        builder.parquet_schema(),
+                        rg_metadata,
+                        predicate,
+                        &file_metrics,
+                    );
+                }
 
                 if enable_bloom_filter && !row_groups.is_empty() {
                     row_groups
                         .prune_by_bloom_filters(
-                            &file_schema,
+                            &physical_file_schema,
                             &mut builder,
                             predicate,
                             &file_metrics,
@@ -226,7 +283,7 @@ impl FileOpener for ParquetOpener {
                 if let Some(p) = page_pruning_predicate {
                     access_plan = p.prune_plan_with_page_index(
                         access_plan,
-                        &file_schema,
+                        &physical_file_schema,
                         builder.parquet_schema(),
                         file_metadata.as_ref(),
                         &file_metrics,
@@ -295,3 +352,91 @@ fn create_initial_plan(
     // default to scanning all row groups
     Ok(ParquetAccessPlan::new_all(row_group_count))
 }
+
+/// Build a pruning predicate from an optional predicate expression.
+/// If the predicate is None or the predicate cannot be converted to a pruning
+/// predicate, return None.
+/// If there is an error creating the pruning predicate it is recorded by incrementing
+/// the `predicate_creation_errors` counter.
+pub(crate) fn build_pruning_predicate(
+    predicate: Arc<dyn PhysicalExpr>,
+    file_schema: &SchemaRef,
+    predicate_creation_errors: &Count,
+) -> Option<Arc<PruningPredicate>> {
+    match PruningPredicate::try_new(predicate, Arc::clone(file_schema)) {
+        Ok(pruning_predicate) => {
+            if !pruning_predicate.always_true() {
+                return Some(Arc::new(pruning_predicate));
+            }
+        }
+        Err(e) => {
+            debug!("Could not create pruning predicate for: {e}");
+            predicate_creation_errors.add(1);
+        }
+    }
+    None
+}
+
+/// Build a page pruning predicate from an optional predicate expression.
+/// If the predicate is None or the predicate cannot be converted to a page pruning
+/// predicate, return None.
+pub(crate) fn build_page_pruning_predicate(
+    predicate: &Arc<dyn PhysicalExpr>,
+    file_schema: &SchemaRef,
+) -> Arc<PagePruningAccessPlanFilter> {
+    Arc::new(PagePruningAccessPlanFilter::new(
+        predicate,
+        Arc::clone(file_schema),
+    ))
+}
+
+fn build_pruning_predicates(
+    predicate: &Option<Arc<dyn PhysicalExpr>>,
+    file_schema: &SchemaRef,
+    predicate_creation_errors: &Count,
+) -> (
+    Option<Arc<PruningPredicate>>,
+    Option<Arc<PagePruningAccessPlanFilter>>,
+) {
+    let Some(predicate) = predicate.as_ref() else {
+        return (None, None);
+    };
+    let pruning_predicate = build_pruning_predicate(
+        Arc::clone(predicate),
+        file_schema,
+        predicate_creation_errors,
+    );
+    let page_pruning_predicate = build_page_pruning_predicate(predicate, file_schema);
+    (pruning_predicate, Some(page_pruning_predicate))
+}
+
+/// Returns a `ArrowReaderMetadata` with the page index loaded, loading
+/// it from the underlying `AsyncFileReader` if necessary.
+async fn load_page_index<T: AsyncFileReader>(
+    reader_metadata: ArrowReaderMetadata,
+    input: &mut T,
+    options: ArrowReaderOptions,
+) -> Result<ArrowReaderMetadata> {
+    let parquet_metadata = reader_metadata.metadata();
+    let missing_column_index = parquet_metadata.column_index().is_none();
+    let missing_offset_index = parquet_metadata.offset_index().is_none();
+    // You may ask yourself: why are we even checking if the page index is already loaded here?
+    // Didn't we explicitly *not* load it above?
+    // Well it's possible that a custom implementation of `AsyncFileReader` gives you
+    // the page index even if you didn't ask for it (e.g. because it's cached)
+    // so it's important to check that here to avoid extra work.
+    if missing_column_index || missing_offset_index {
+        let m = Arc::try_unwrap(Arc::clone(parquet_metadata))
+            .unwrap_or_else(|e| e.as_ref().clone());
+        let mut reader =
+            ParquetMetaDataReader::new_with_metadata(m).with_page_indexes(true);
+        reader.load_page_index(input).await?;
+        let new_parquet_metadata = reader.finish()?;
+        let new_arrow_reader =
+            ArrowReaderMetadata::try_new(Arc::new(new_parquet_metadata), options)?;
+        Ok(new_arrow_reader)
+    } else {
+        // No need to load the page index again, just return the existing metadata
+        Ok(reader_metadata)
+    }
+}
diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs
index ef832d808647c..148527998ab53 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -249,9 +249,9 @@ impl PagePruningAccessPlanFilter {
             }
 
             if let Some(overall_selection) = overall_selection {
-                if overall_selection.selects_any() {
-                    let rows_skipped = rows_skipped(&overall_selection);
-                    let rows_selected = rows_selected(&overall_selection);
+                let rows_selected = overall_selection.row_count();
+                if rows_selected > 0 {
+                    let rows_skipped = overall_selection.skipped_row_count();
                     trace!("Overall selection from predicate skipped {rows_skipped}, selected {rows_selected}: {overall_selection:?}");
                     total_skip += rows_skipped;
                     total_select += rows_selected;
@@ -280,22 +280,6 @@ impl PagePruningAccessPlanFilter {
     }
 }
 
-/// returns the number of rows skipped in the selection
-/// TODO should this be upstreamed to RowSelection?
-fn rows_skipped(selection: &RowSelection) -> usize {
-    selection
-        .iter()
-        .fold(0, |acc, x| if x.skip { acc + x.row_count } else { acc })
-}
-
-/// returns the number of rows not skipped in the selection
-/// TODO should this be upstreamed to RowSelection?
-fn rows_selected(selection: &RowSelection) -> usize {
-    selection
-        .iter()
-        .fold(0, |acc, x| if x.skip { acc } else { acc + x.row_count })
-}
-
 fn update_selection(
     current_selection: Option<RowSelection>,
     row_selection: RowSelection,
diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs
index 5924a5b5038fc..27ec843c1991d 100644
--- a/datafusion/datasource-parquet/src/reader.rs
+++ b/datafusion/datasource-parquet/src/reader.rs
@@ -18,19 +18,19 @@
 //! [`ParquetFileReaderFactory`] and [`DefaultParquetFileReaderFactory`] for
 //! low level control of parquet file readers
 
+use crate::ParquetFileMetrics;
 use bytes::Bytes;
 use datafusion_datasource::file_meta::FileMeta;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use futures::future::BoxFuture;
 use object_store::ObjectStore;
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
 use parquet::arrow::async_reader::{AsyncFileReader, ParquetObjectReader};
 use parquet::file::metadata::ParquetMetaData;
 use std::fmt::Debug;
 use std::ops::Range;
 use std::sync::Arc;
 
-use crate::ParquetFileMetrics;
-
 /// Interface for reading parquet files.
 ///
 /// The combined implementations of [`ParquetFileReaderFactory`] and
@@ -96,28 +96,30 @@ pub(crate) struct ParquetFileReader {
 impl AsyncFileReader for ParquetFileReader {
     fn get_bytes(
         &mut self,
-        range: Range<usize>,
+        range: Range<u64>,
     ) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
-        self.file_metrics.bytes_scanned.add(range.end - range.start);
+        let bytes_scanned = range.end - range.start;
+        self.file_metrics.bytes_scanned.add(bytes_scanned as usize);
         self.inner.get_bytes(range)
     }
 
     fn get_byte_ranges(
         &mut self,
-        ranges: Vec<Range<usize>>,
+        ranges: Vec<Range<u64>>,
     ) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>>
     where
         Self: Send,
     {
-        let total = ranges.iter().map(|r| r.end - r.start).sum();
-        self.file_metrics.bytes_scanned.add(total);
+        let total: u64 = ranges.iter().map(|r| r.end - r.start).sum();
+        self.file_metrics.bytes_scanned.add(total as usize);
         self.inner.get_byte_ranges(ranges)
     }
 
-    fn get_metadata(
-        &mut self,
-    ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
-        self.inner.get_metadata()
+    fn get_metadata<'a>(
+        &'a mut self,
+        options: Option<&'a ArrowReaderOptions>,
+    ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
+        self.inner.get_metadata(options)
     }
 }
 
@@ -135,7 +137,8 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
             metrics,
         );
         let store = Arc::clone(&self.store);
-        let mut inner = ParquetObjectReader::new(store, file_meta.object_meta);
+        let mut inner = ParquetObjectReader::new(store, file_meta.object_meta.location)
+            .with_file_size(file_meta.object_meta.size);
 
         if let Some(hint) = metadata_size_hint {
             inner = inner.with_footer_size_hint(hint)
diff --git a/datafusion/datasource-parquet/src/row_filter.rs b/datafusion/datasource-parquet/src/row_filter.rs
index da6bf114d71dd..2d2993c29a6f2 100644
--- a/datafusion/datasource-parquet/src/row_filter.rs
+++ b/datafusion/datasource-parquet/src/row_filter.rs
@@ -449,7 +449,7 @@ fn columns_sorted(_columns: &[usize], _metadata: &ParquetMetaData) -> Result<boo
 /// `a = 1` and `c = 3`.
 pub fn build_row_filter(
     expr: &Arc<dyn PhysicalExpr>,
-    file_schema: &SchemaRef,
+    physical_file_schema: &SchemaRef,
     table_schema: &SchemaRef,
     metadata: &ParquetMetaData,
     reorder_predicates: bool,
@@ -470,7 +470,7 @@ pub fn build_row_filter(
         .map(|expr| {
             FilterCandidateBuilder::new(
                 Arc::clone(expr),
-                Arc::clone(file_schema),
+                Arc::clone(physical_file_schema),
                 Arc::clone(table_schema),
                 Arc::clone(schema_adapter_factory),
             )
diff --git a/datafusion/datasource-parquet/src/row_group_filter.rs b/datafusion/datasource-parquet/src/row_group_filter.rs
index 9d5f9fa16b6eb..13418cdeee223 100644
--- a/datafusion/datasource-parquet/src/row_group_filter.rs
+++ b/datafusion/datasource-parquet/src/row_group_filter.rs
@@ -1513,7 +1513,7 @@ mod tests {
         let object_meta = ObjectMeta {
             location: object_store::path::Path::parse(file_name).expect("creating path"),
             last_modified: chrono::DateTime::from(std::time::SystemTime::now()),
-            size: data.len(),
+            size: data.len() as u64,
             e_tag: None,
             version: None,
         };
@@ -1526,8 +1526,11 @@ mod tests {
         let metrics = ExecutionPlanMetricsSet::new();
         let file_metrics =
             ParquetFileMetrics::new(0, object_meta.location.as_ref(), &metrics);
+        let inner = ParquetObjectReader::new(Arc::new(in_memory), object_meta.location)
+            .with_file_size(object_meta.size);
+
         let reader = ParquetFileReader {
-            inner: ParquetObjectReader::new(Arc::new(in_memory), object_meta),
+            inner,
             file_metrics: file_metrics.clone(),
         };
         let mut builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index 66d4d313d5a61..e15f5243cd27e 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -17,9 +17,12 @@
 
 //! ParquetSource implementation for reading parquet files
 use std::any::Any;
+use std::fmt::Debug;
 use std::fmt::Formatter;
 use std::sync::Arc;
 
+use crate::opener::build_page_pruning_predicate;
+use crate::opener::build_pruning_predicate;
 use crate::opener::ParquetOpener;
 use crate::page_filter::PagePruningAccessPlanFilter;
 use crate::DefaultParquetFileReaderFactory;
@@ -29,9 +32,9 @@ use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapterFactory,
 };
 
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::{Schema, SchemaRef, TimeUnit};
 use datafusion_common::config::TableParquetOptions;
-use datafusion_common::Statistics;
+use datafusion_common::{DataFusionError, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_physical_expr_common::physical_expr::fmt_sql;
@@ -41,7 +44,6 @@ use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder};
 use datafusion_physical_plan::DisplayFormatType;
 
 use itertools::Itertools;
-use log::debug;
 use object_store::ObjectStore;
 
 /// Execution plan for reading one or more Parquet files.
@@ -316,24 +318,10 @@ impl ParquetSource {
         conf = conf.with_metrics(metrics);
         conf.predicate = Some(Arc::clone(&predicate));
 
-        match PruningPredicate::try_new(Arc::clone(&predicate), Arc::clone(&file_schema))
-        {
-            Ok(pruning_predicate) => {
-                if !pruning_predicate.always_true() {
-                    conf.pruning_predicate = Some(Arc::new(pruning_predicate));
-                }
-            }
-            Err(e) => {
-                debug!("Could not create pruning predicate for: {e}");
-                predicate_creation_errors.add(1);
-            }
-        };
-
-        let page_pruning_predicate = Arc::new(PagePruningAccessPlanFilter::new(
-            &predicate,
-            Arc::clone(&file_schema),
-        ));
-        conf.page_pruning_predicate = Some(page_pruning_predicate);
+        conf.page_pruning_predicate =
+            Some(build_page_pruning_predicate(&predicate, &file_schema));
+        conf.pruning_predicate =
+            build_pruning_predicate(predicate, &file_schema, &predicate_creation_errors);
 
         conf
     }
@@ -348,16 +336,6 @@ impl ParquetSource {
         self.predicate.as_ref()
     }
 
-    /// Optional reference to this parquet scan's pruning predicate
-    pub fn pruning_predicate(&self) -> Option<&Arc<PruningPredicate>> {
-        self.pruning_predicate.as_ref()
-    }
-
-    /// Optional reference to this parquet scan's page pruning predicate
-    pub fn page_pruning_predicate(&self) -> Option<&Arc<PagePruningAccessPlanFilter>> {
-        self.page_pruning_predicate.as_ref()
-    }
-
     /// return the optional file reader factory
     pub fn parquet_file_reader_factory(
         &self,
@@ -460,6 +438,24 @@ impl ParquetSource {
     }
 }
 
+/// Parses datafusion.common.config.ParquetOptions.coerce_int96 String to a arrow_schema.datatype.TimeUnit
+pub(crate) fn parse_coerce_int96_string(
+    str_setting: &str,
+) -> datafusion_common::Result<TimeUnit> {
+    let str_setting_lower: &str = &str_setting.to_lowercase();
+
+    match str_setting_lower {
+        "ns" => Ok(TimeUnit::Nanosecond),
+        "us" => Ok(TimeUnit::Microsecond),
+        "ms" => Ok(TimeUnit::Millisecond),
+        "s" => Ok(TimeUnit::Second),
+        _ => Err(DataFusionError::Configuration(format!(
+            "Unknown or unsupported parquet coerce_int96: \
+        {str_setting}. Valid values are: ns, us, ms, and s."
+        ))),
+    }
+}
+
 impl FileSource for ParquetSource {
     fn create_file_opener(
         &self,
@@ -480,6 +476,13 @@ impl FileSource for ParquetSource {
                 Arc::new(DefaultParquetFileReaderFactory::new(object_store)) as _
             });
 
+        let coerce_int96 = self
+            .table_parquet_options
+            .global
+            .coerce_int96
+            .as_ref()
+            .map(|time_unit| parse_coerce_int96_string(time_unit.as_str()).unwrap());
+
         Arc::new(ParquetOpener {
             partition_index: partition,
             projection: Arc::from(projection),
@@ -488,8 +491,6 @@ impl FileSource for ParquetSource {
                 .expect("Batch size must set before creating ParquetOpener"),
             limit: base_config.limit,
             predicate: self.predicate.clone(),
-            pruning_predicate: self.pruning_predicate.clone(),
-            page_pruning_predicate: self.page_pruning_predicate.clone(),
             table_schema: Arc::clone(&base_config.file_schema),
             metadata_size_hint: self.metadata_size_hint,
             metrics: self.metrics().clone(),
@@ -498,7 +499,9 @@ impl FileSource for ParquetSource {
             reorder_filters: self.reorder_filters(),
             enable_page_index: self.enable_page_index(),
             enable_bloom_filter: self.bloom_filter_on_read(),
+            enable_row_group_stats_pruning: self.table_parquet_options.global.pruning,
             schema_adapter_factory,
+            coerce_int96,
         })
     }
 
@@ -537,11 +540,10 @@ impl FileSource for ParquetSource {
             .expect("projected_statistics must be set");
         // When filters are pushed down, we have no way of knowing the exact statistics.
         // Note that pruning predicate is also a kind of filter pushdown.
-        // (bloom filters use `pruning_predicate` too)
-        if self.pruning_predicate().is_some()
-            || self.page_pruning_predicate().is_some()
-            || (self.predicate().is_some() && self.pushdown_filters())
-        {
+        // (bloom filters use `pruning_predicate` too).
+        // Because filter pushdown may happen dynamically as long as there is a predicate
+        // if we have *any* predicate applied, we can't guarantee the statistics are exact.
+        if self.predicate().is_some() {
             Ok(statistics.to_inexact())
         } else {
             Ok(statistics)
@@ -560,7 +562,8 @@ impl FileSource for ParquetSource {
                     .map(|p| format!(", predicate={p}"))
                     .unwrap_or_default();
                 let pruning_predicate_string = self
-                    .pruning_predicate()
+                    .pruning_predicate
+                    .as_ref()
                     .map(|pre| {
                         let mut guarantees = pre
                             .literal_guarantees()
diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml
index 2132272b5768d..1088efc268c9b 100644
--- a/datafusion/datasource/Cargo.toml
+++ b/datafusion/datasource/Cargo.toml
@@ -56,7 +56,7 @@ datafusion-physical-expr = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-session = { workspace = true }
-flate2 = { version = "1.0.24", optional = true }
+flate2 = { version = "1.1.1", optional = true }
 futures = { workspace = true }
 glob = "0.3.0"
 itertools = { workspace = true }
@@ -72,6 +72,7 @@ xz2 = { version = "0.1", optional = true, features = ["static"] }
 zstd = { version = "0.13", optional = true, default-features = false }
 
 [dev-dependencies]
+criterion = { workspace = true }
 tempfile = { workspace = true }
 
 [lints]
@@ -80,3 +81,7 @@ workspace = true
 [lib]
 name = "datafusion_datasource"
 path = "src/mod.rs"
+
+[[bench]]
+name = "split_groups_by_statistics"
+harness = false
diff --git a/datafusion/datasource/benches/split_groups_by_statistics.rs b/datafusion/datasource/benches/split_groups_by_statistics.rs
new file mode 100644
index 0000000000000..f7c5e1b44ae00
--- /dev/null
+++ b/datafusion/datasource/benches/split_groups_by_statistics.rs
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use datafusion_datasource::file_scan_config::FileScanConfig;
+use datafusion_datasource::{generate_test_files, verify_sort_integrity};
+use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use std::sync::Arc;
+use std::time::Duration;
+
+pub fn compare_split_groups_by_statistics_algorithms(c: &mut Criterion) {
+    let file_schema = Arc::new(Schema::new(vec![Field::new(
+        "value",
+        DataType::Float64,
+        false,
+    )]));
+
+    let sort_expr = PhysicalSortExpr {
+        expr: Arc::new(datafusion_physical_expr::expressions::Column::new(
+            "value", 0,
+        )),
+        options: arrow::compute::SortOptions::default(),
+    };
+    let sort_ordering = LexOrdering::from(vec![sort_expr]);
+
+    // Small, medium, large number of files
+    let file_counts = [10, 100, 1000];
+    let overlap_factors = [0.0, 0.2, 0.5, 0.8]; // No, low, medium, high overlap
+
+    let target_partitions: [usize; 4] = [4, 8, 16, 32];
+
+    let mut group = c.benchmark_group("split_groups");
+    group.measurement_time(Duration::from_secs(10));
+
+    for &num_files in &file_counts {
+        for &overlap in &overlap_factors {
+            let file_groups = generate_test_files(num_files, overlap);
+            // Benchmark original algorithm
+            group.bench_with_input(
+                BenchmarkId::new(
+                    "original",
+                    format!("files={},overlap={:.1}", num_files, overlap),
+                ),
+                &(
+                    file_groups.clone(),
+                    file_schema.clone(),
+                    sort_ordering.clone(),
+                ),
+                |b, (fg, schema, order)| {
+                    let mut result = Vec::new();
+                    b.iter(|| {
+                        result =
+                            FileScanConfig::split_groups_by_statistics(schema, fg, order)
+                                .unwrap();
+                    });
+                    assert!(verify_sort_integrity(&result));
+                },
+            );
+
+            // Benchmark new algorithm with different target partitions
+            for &tp in &target_partitions {
+                group.bench_with_input(
+                    BenchmarkId::new(
+                        format!("v2_partitions={}", tp),
+                        format!("files={},overlap={:.1}", num_files, overlap),
+                    ),
+                    &(
+                        file_groups.clone(),
+                        file_schema.clone(),
+                        sort_ordering.clone(),
+                        tp,
+                    ),
+                    |b, (fg, schema, order, target)| {
+                        let mut result = Vec::new();
+                        b.iter(|| {
+                            result = FileScanConfig::split_groups_by_statistics_with_target_partitions(
+                                schema, fg, order, *target,
+                            )
+                            .unwrap();
+                        });
+                        assert!(verify_sort_integrity(&result));
+                    },
+                );
+            }
+        }
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, compare_split_groups_by_statistics_algorithms);
+criterion_main!(benches);
diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs
index 0066f39801a1b..835285b21e38a 100644
--- a/datafusion/datasource/src/file.rs
+++ b/datafusion/datasource/src/file.rs
@@ -26,8 +26,12 @@ use crate::file_groups::FileGroupPartitioner;
 use crate::file_scan_config::FileScanConfig;
 use crate::file_stream::FileOpener;
 use arrow::datatypes::SchemaRef;
-use datafusion_common::Statistics;
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::LexOrdering;
+use datafusion_physical_plan::filter_pushdown::{
+    filter_pushdown_not_supported, FilterDescription, FilterPushdownResult,
+};
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion_physical_plan::DisplayFormatType;
 
@@ -57,7 +61,7 @@ pub trait FileSource: Send + Sync {
     /// Return execution plan metrics
     fn metrics(&self) -> &ExecutionPlanMetricsSet;
     /// Return projected statistics
-    fn statistics(&self) -> datafusion_common::Result<Statistics>;
+    fn statistics(&self) -> Result<Statistics>;
     /// String representation of file source such as "csv", "json", "parquet"
     fn file_type(&self) -> &str;
     /// Format FileType specific information
@@ -75,7 +79,7 @@ pub trait FileSource: Send + Sync {
         repartition_file_min_size: usize,
         output_ordering: Option<LexOrdering>,
         config: &FileScanConfig,
-    ) -> datafusion_common::Result<Option<FileScanConfig>> {
+    ) -> Result<Option<FileScanConfig>> {
         if config.file_compression_type.is_compressed() || config.new_lines_in_values {
             return Ok(None);
         }
@@ -93,4 +97,16 @@ pub trait FileSource: Send + Sync {
         }
         Ok(None)
     }
+
+    /// Try to push down filters into this FileSource.
+    /// See [`ExecutionPlan::try_pushdown_filters`] for more details.
+    ///
+    /// [`ExecutionPlan::try_pushdown_filters`]: datafusion_physical_plan::ExecutionPlan::try_pushdown_filters
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn FileSource>>> {
+        Ok(filter_pushdown_not_supported(fd))
+    }
 }
diff --git a/datafusion/datasource/src/file_groups.rs b/datafusion/datasource/src/file_groups.rs
index 5fe3e25eaa1fe..15c86427ed00a 100644
--- a/datafusion/datasource/src/file_groups.rs
+++ b/datafusion/datasource/src/file_groups.rs
@@ -25,6 +25,7 @@ use std::collections::BinaryHeap;
 use std::iter::repeat_with;
 use std::mem;
 use std::ops::{Index, IndexMut};
+use std::sync::Arc;
 
 /// Repartition input files into `target_partitions` partitions, if total file size exceed
 /// `repartition_file_min_size`
@@ -223,10 +224,11 @@ impl FileGroupPartitioner {
             return None;
         }
 
-        let target_partition_size = (total_size as usize).div_ceil(target_partitions);
+        let target_partition_size =
+            (total_size as u64).div_ceil(target_partitions as u64);
 
         let current_partition_index: usize = 0;
-        let current_partition_size: usize = 0;
+        let current_partition_size: u64 = 0;
 
         // Partition byte range evenly for all `PartitionedFile`s
         let repartitioned_files = flattened_files
@@ -368,7 +370,7 @@ pub struct FileGroup {
     /// The files in this group
     files: Vec<PartitionedFile>,
     /// Optional statistics for the data across all files in the group
-    statistics: Option<Statistics>,
+    statistics: Option<Arc<Statistics>>,
 }
 
 impl FileGroup {
@@ -386,7 +388,7 @@ impl FileGroup {
     }
 
     /// Set the statistics for this group
-    pub fn with_statistics(mut self, statistics: Statistics) -> Self {
+    pub fn with_statistics(mut self, statistics: Arc<Statistics>) -> Self {
         self.statistics = Some(statistics);
         self
     }
@@ -418,6 +420,11 @@ impl FileGroup {
         self.files.push(file);
     }
 
+    /// Get the statistics for this group
+    pub fn statistics(&self) -> Option<&Statistics> {
+        self.statistics.as_deref()
+    }
+
     /// Partition the list of files into `n` groups
     pub fn split_files(mut self, n: usize) -> Vec<FileGroup> {
         if self.is_empty() {
@@ -491,15 +498,15 @@ struct ToRepartition {
     /// the index from which the original file will be taken
     source_index: usize,
     /// the size of the original file
-    file_size: usize,
+    file_size: u64,
     /// indexes of which group(s) will this be distributed to (including `source_index`)
     new_groups: Vec<usize>,
 }
 
 impl ToRepartition {
-    // how big will each file range be when this file is read in its new groups?
-    fn range_size(&self) -> usize {
-        self.file_size / self.new_groups.len()
+    /// How big will each file range be when this file is read in its new groups?
+    fn range_size(&self) -> u64 {
+        self.file_size / (self.new_groups.len() as u64)
     }
 }
 
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index 5172dafb1f91e..fb756cc11fbbc 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -23,6 +23,16 @@ use std::{
     fmt::Result as FmtResult, marker::PhantomData, sync::Arc,
 };
 
+use crate::file_groups::FileGroup;
+use crate::{
+    display::FileGroupsDisplay,
+    file::FileSource,
+    file_compression_type::FileCompressionType,
+    file_stream::FileStream,
+    source::{DataSource, DataSourceExec},
+    statistics::MinMaxStatistics,
+    PartitionedFile,
+};
 use arrow::{
     array::{
         ArrayData, ArrayRef, BufferBuilder, DictionaryArray, RecordBatch,
@@ -31,7 +41,9 @@ use arrow::{
     buffer::Buffer,
     datatypes::{ArrowNativeType, DataType, Field, Schema, SchemaRef, UInt16Type},
 };
-use datafusion_common::{exec_err, ColumnStatistics, Constraints, Result, Statistics};
+use datafusion_common::{
+    config::ConfigOptions, exec_err, ColumnStatistics, Constraints, Result, Statistics,
+};
 use datafusion_common::{DataFusionError, ScalarValue};
 use datafusion_execution::{
     object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext,
@@ -40,6 +52,10 @@ use datafusion_physical_expr::{
     expressions::Column, EquivalenceProperties, LexOrdering, Partitioning,
     PhysicalSortExpr,
 };
+use datafusion_physical_plan::filter_pushdown::{
+    filter_pushdown_not_supported, FilterDescription, FilterPushdownResult,
+    FilterPushdownSupport,
+};
 use datafusion_physical_plan::{
     display::{display_orderings, ProjectSchemaDisplay},
     metrics::ExecutionPlanMetricsSet,
@@ -48,17 +64,6 @@ use datafusion_physical_plan::{
 };
 use log::{debug, warn};
 
-use crate::file_groups::FileGroup;
-use crate::{
-    display::FileGroupsDisplay,
-    file::FileSource,
-    file_compression_type::FileCompressionType,
-    file_stream::FileStream,
-    source::{DataSource, DataSourceExec},
-    statistics::MinMaxStatistics,
-    PartitionedFile,
-};
-
 /// The base configurations for a [`DataSourceExec`], the a physical plan for
 /// any given file format.
 ///
@@ -138,6 +143,9 @@ pub struct FileScanConfig {
     /// Schema before `projection` is applied. It contains the all columns that may
     /// appear in the files. It does not include table partition columns
     /// that may be added.
+    /// Note that this is **not** the schema of the physical files.
+    /// This is the schema that the physical file schema will be
+    /// mapped onto, and the schema that the [`DataSourceExec`] will return.
     pub file_schema: SchemaRef,
     /// List of files to be processed, grouped into partitions
     ///
@@ -151,9 +159,6 @@ pub struct FileScanConfig {
     pub file_groups: Vec<FileGroup>,
     /// Table constraints
     pub constraints: Constraints,
-    /// Estimated overall statistics of the files, taking `filters` into account.
-    /// Defaults to [`Statistics::new_unknown`].
-    pub statistics: Statistics,
     /// Columns on which to project the data. Indexes that are higher than the
     /// number of columns of `file_schema` refer to `table_partition_cols`.
     pub projection: Option<Vec<usize>>,
@@ -227,6 +232,10 @@ pub struct FileScanConfig {
 #[derive(Clone)]
 pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
+    /// Table schema before any projections or partition columns are applied.
+    /// This schema is used to read the files, but is **not** necessarily the schema of the physical files.
+    /// Rather this is the schema that the physical file schema will be mapped onto, and the schema that the
+    /// [`DataSourceExec`] will return.
     file_schema: SchemaRef,
     file_source: Arc<dyn FileSource>,
 
@@ -412,7 +421,6 @@ impl FileScanConfigBuilder {
             table_partition_cols,
             constraints,
             file_groups,
-            statistics,
             output_ordering,
             file_compression_type,
             new_lines_in_values,
@@ -426,9 +434,9 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
         Self {
             object_store_url: config.object_store_url,
             file_schema: config.file_schema,
-            file_source: config.file_source,
+            file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
-            statistics: Some(config.statistics),
+            statistics: config.file_source.statistics().ok(),
             output_ordering: config.output_ordering,
             file_compression_type: Some(config.file_compression_type),
             new_lines_in_values: Some(config.new_lines_in_values),
@@ -471,7 +479,8 @@ impl DataSource for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                let (schema, _, _, orderings) = self.project();
+                let schema = self.projected_schema();
+                let orderings = get_projected_output_ordering(self, &schema);
 
                 write!(f, "file_groups=")?;
                 FileGroupsDisplay(&self.file_groups).fmt_as(t, f)?;
@@ -584,6 +593,46 @@ impl DataSource for FileScanConfig {
             ) as _
         }))
     }
+
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn DataSource>>> {
+        let FilterPushdownResult {
+            support,
+            remaining_description,
+        } = self.file_source.try_pushdown_filters(fd, config)?;
+
+        match support {
+            FilterPushdownSupport::Supported {
+                child_descriptions,
+                op,
+                revisit,
+            } => {
+                let new_data_source = Arc::new(
+                    FileScanConfigBuilder::from(self.clone())
+                        .with_source(op)
+                        .build(),
+                );
+
+                debug_assert!(child_descriptions.is_empty());
+                debug_assert!(!revisit);
+
+                Ok(FilterPushdownResult {
+                    support: FilterPushdownSupport::Supported {
+                        child_descriptions,
+                        op: new_data_source,
+                        revisit,
+                    },
+                    remaining_description,
+                })
+            }
+            FilterPushdownSupport::NotSupported => {
+                Ok(filter_pushdown_not_supported(remaining_description))
+            }
+        }
+    }
 }
 
 impl FileScanConfig {
@@ -610,7 +659,6 @@ impl FileScanConfig {
             file_schema,
             file_groups: vec![],
             constraints: Constraints::empty(),
-            statistics,
             projection: None,
             limit: None,
             table_partition_cols: vec![],
@@ -625,7 +673,8 @@ impl FileScanConfig {
     /// Set the file source
     #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
     pub fn with_source(mut self, file_source: Arc<dyn FileSource>) -> Self {
-        self.file_source = file_source.with_statistics(self.statistics.clone());
+        self.file_source =
+            file_source.with_statistics(Statistics::new_unknown(&self.file_schema));
         self
     }
 
@@ -639,7 +688,6 @@ impl FileScanConfig {
     /// Set the statistics of the files
     #[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
     pub fn with_statistics(mut self, statistics: Statistics) -> Self {
-        self.statistics = statistics.clone();
         self.file_source = self.file_source.with_statistics(statistics);
         self
     }
@@ -653,11 +701,8 @@ impl FileScanConfig {
         }
     }
 
-    fn projected_stats(&self) -> Statistics {
-        let statistics = self
-            .file_source
-            .statistics()
-            .unwrap_or(self.statistics.clone());
+    pub fn projected_stats(&self) -> Statistics {
+        let statistics = self.file_source.statistics().unwrap();
 
         let table_cols_stats = self
             .projection_indices()
@@ -680,7 +725,7 @@ impl FileScanConfig {
         }
     }
 
-    fn projected_schema(&self) -> Arc<Schema> {
+    pub fn projected_schema(&self) -> Arc<Schema> {
         let table_fields: Vec<_> = self
             .projection_indices()
             .into_iter()
@@ -700,7 +745,7 @@ impl FileScanConfig {
         ))
     }
 
-    fn projected_constraints(&self) -> Constraints {
+    pub fn projected_constraints(&self) -> Constraints {
         let indexes = self.projection_indices();
 
         self.constraints
@@ -804,7 +849,7 @@ impl FileScanConfig {
             return (
                 Arc::clone(&self.file_schema),
                 self.constraints.clone(),
-                self.statistics.clone(),
+                self.file_source.statistics().unwrap().clone(),
                 self.output_ordering.clone(),
             );
         }
@@ -858,6 +903,96 @@ impl FileScanConfig {
         })
     }
 
+    /// Splits file groups into new groups based on statistics to enable efficient parallel processing.
+    ///
+    /// The method distributes files across a target number of partitions while ensuring
+    /// files within each partition maintain sort order based on their min/max statistics.
+    ///
+    /// The algorithm works by:
+    /// 1. Takes files sorted by minimum values
+    /// 2. For each file:
+    ///   - Finds eligible groups (empty or where file's min > group's last max)
+    ///   - Selects the smallest eligible group
+    ///   - Creates a new group if needed
+    ///
+    /// # Parameters
+    /// * `table_schema`: Schema containing information about the columns
+    /// * `file_groups`: The original file groups to split
+    /// * `sort_order`: The lexicographical ordering to maintain within each group
+    /// * `target_partitions`: The desired number of output partitions
+    ///
+    /// # Returns
+    /// A new set of file groups, where files within each group are non-overlapping with respect to
+    /// their min/max statistics and maintain the specified sort order.
+    pub fn split_groups_by_statistics_with_target_partitions(
+        table_schema: &SchemaRef,
+        file_groups: &[FileGroup],
+        sort_order: &LexOrdering,
+        target_partitions: usize,
+    ) -> Result<Vec<FileGroup>> {
+        if target_partitions == 0 {
+            return Err(DataFusionError::Internal(
+                "target_partitions must be greater than 0".to_string(),
+            ));
+        }
+
+        let flattened_files = file_groups
+            .iter()
+            .flat_map(FileGroup::iter)
+            .collect::<Vec<_>>();
+
+        if flattened_files.is_empty() {
+            return Ok(vec![]);
+        }
+
+        let statistics = MinMaxStatistics::new_from_files(
+            sort_order,
+            table_schema,
+            None,
+            flattened_files.iter().copied(),
+        )?;
+
+        let indices_sorted_by_min = statistics.min_values_sorted();
+
+        // Initialize with target_partitions empty groups
+        let mut file_groups_indices: Vec<Vec<usize>> = vec![vec![]; target_partitions];
+
+        for (idx, min) in indices_sorted_by_min {
+            if let Some((_, group)) = file_groups_indices
+                .iter_mut()
+                .enumerate()
+                .filter(|(_, group)| {
+                    group.is_empty()
+                        || min
+                            > statistics
+                                .max(*group.last().expect("groups should not be empty"))
+                })
+                .min_by_key(|(_, group)| group.len())
+            {
+                group.push(idx);
+            } else {
+                // Create a new group if no existing group fits
+                file_groups_indices.push(vec![idx]);
+            }
+        }
+
+        // Remove any empty groups
+        file_groups_indices.retain(|group| !group.is_empty());
+
+        // Assemble indices back into groups of PartitionedFiles
+        Ok(file_groups_indices
+            .into_iter()
+            .map(|file_group_indices| {
+                FileGroup::new(
+                    file_group_indices
+                        .into_iter()
+                        .map(|idx| flattened_files[idx].clone())
+                        .collect(),
+                )
+            })
+            .collect())
+    }
+
     /// Attempts to do a bin-packing on files into file groups, such that any two files
     /// in a file group are ordered and non-overlapping with respect to their statistics.
     /// It will produce the smallest number of file groups possible.
@@ -949,7 +1084,11 @@ impl Debug for FileScanConfig {
         write!(f, "FileScanConfig {{")?;
         write!(f, "object_store_url={:?}, ", self.object_store_url)?;
 
-        write!(f, "statistics={:?}, ", self.statistics)?;
+        write!(
+            f,
+            "statistics={:?}, ",
+            self.file_source.statistics().unwrap()
+        )?;
 
         DisplayAs::fmt_as(self, DisplayFormatType::Verbose, f)?;
         write!(f, "}}")
@@ -958,7 +1097,8 @@ impl Debug for FileScanConfig {
 
 impl DisplayAs for FileScanConfig {
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> FmtResult {
-        let (schema, _, _, orderings) = self.project();
+        let schema = self.projected_schema();
+        let orderings = get_projected_output_ordering(self, &schema);
 
         write!(f, "file_groups=")?;
         FileGroupsDisplay(&self.file_groups).fmt_as(t, f)?;
@@ -1377,7 +1517,10 @@ pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
 
 #[cfg(test)]
 mod tests {
-    use crate::{test_util::MockSource, tests::aggr_test_schema};
+    use crate::{
+        generate_test_files, test_util::MockSource, tests::aggr_test_schema,
+        verify_sort_integrity,
+    };
 
     use super::*;
     use arrow::{
@@ -1468,7 +1611,7 @@ mod tests {
         );
 
         // verify the proj_schema includes the last column and exactly the same the field it is defined
-        let (proj_schema, _, _, _) = conf.project();
+        let proj_schema = conf.projected_schema();
         assert_eq!(proj_schema.fields().len(), file_schema.fields().len() + 1);
         assert_eq!(
             *proj_schema.field(file_schema.fields().len()),
@@ -1574,7 +1717,7 @@ mod tests {
         assert_eq!(source_statistics, statistics);
         assert_eq!(source_statistics.column_statistics.len(), 3);
 
-        let (proj_schema, ..) = conf.project();
+        let proj_schema = conf.projected_schema();
         // created a projector for that projected schema
         let mut proj = PartitionColumnProjector::new(
             proj_schema,
@@ -2000,7 +2143,7 @@ mod tests {
                     },
                     partition_values: vec![ScalarValue::from(file.date)],
                     range: None,
-                    statistics: Some(Statistics {
+                    statistics: Some(Arc::new(Statistics {
                         num_rows: Precision::Absent,
                         total_byte_size: Precision::Absent,
                         column_statistics: file
@@ -2020,7 +2163,7 @@ mod tests {
                                     .unwrap_or_default()
                             })
                             .collect::<Vec<_>>(),
-                    }),
+                    })),
                     extensions: None,
                     metadata_size_hint: None,
                 }
@@ -2161,13 +2304,24 @@ mod tests {
         assert!(config.constraints.is_empty());
 
         // Verify statistics are set to unknown
-        assert_eq!(config.statistics.num_rows, Precision::Absent);
-        assert_eq!(config.statistics.total_byte_size, Precision::Absent);
         assert_eq!(
-            config.statistics.column_statistics.len(),
+            config.file_source.statistics().unwrap().num_rows,
+            Precision::Absent
+        );
+        assert_eq!(
+            config.file_source.statistics().unwrap().total_byte_size,
+            Precision::Absent
+        );
+        assert_eq!(
+            config
+                .file_source
+                .statistics()
+                .unwrap()
+                .column_statistics
+                .len(),
             file_schema.fields().len()
         );
-        for stat in config.statistics.column_statistics {
+        for stat in config.file_source.statistics().unwrap().column_statistics {
             assert_eq!(stat.distinct_count, Precision::Absent);
             assert_eq!(stat.min_value, Precision::Absent);
             assert_eq!(stat.max_value, Precision::Absent);
@@ -2222,4 +2376,163 @@ mod tests {
         assert_eq!(new_config.constraints, Constraints::default());
         assert!(new_config.new_lines_in_values);
     }
+
+    #[test]
+    fn test_split_groups_by_statistics_with_target_partitions() -> Result<()> {
+        use datafusion_common::DFSchema;
+        use datafusion_expr::{col, execution_props::ExecutionProps};
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "value",
+            DataType::Float64,
+            false,
+        )]));
+
+        // Setup sort expression
+        let exec_props = ExecutionProps::new();
+        let df_schema = DFSchema::try_from_qualified_schema("test", schema.as_ref())?;
+        let sort_expr = vec![col("value").sort(true, false)];
+
+        let physical_sort_exprs: Vec<_> = sort_expr
+            .iter()
+            .map(|expr| create_physical_sort_expr(expr, &df_schema, &exec_props).unwrap())
+            .collect();
+
+        let sort_ordering = LexOrdering::from(physical_sort_exprs);
+
+        // Test case parameters
+        struct TestCase {
+            name: String,
+            file_count: usize,
+            overlap_factor: f64,
+            target_partitions: usize,
+            expected_partition_count: usize,
+        }
+
+        let test_cases = vec![
+            // Basic cases
+            TestCase {
+                name: "no_overlap_10_files_4_partitions".to_string(),
+                file_count: 10,
+                overlap_factor: 0.0,
+                target_partitions: 4,
+                expected_partition_count: 4,
+            },
+            TestCase {
+                name: "medium_overlap_20_files_5_partitions".to_string(),
+                file_count: 20,
+                overlap_factor: 0.5,
+                target_partitions: 5,
+                expected_partition_count: 5,
+            },
+            TestCase {
+                name: "high_overlap_30_files_3_partitions".to_string(),
+                file_count: 30,
+                overlap_factor: 0.8,
+                target_partitions: 3,
+                expected_partition_count: 7,
+            },
+            // Edge cases
+            TestCase {
+                name: "fewer_files_than_partitions".to_string(),
+                file_count: 3,
+                overlap_factor: 0.0,
+                target_partitions: 10,
+                expected_partition_count: 3, // Should only create as many partitions as files
+            },
+            TestCase {
+                name: "single_file".to_string(),
+                file_count: 1,
+                overlap_factor: 0.0,
+                target_partitions: 5,
+                expected_partition_count: 1, // Should create only one partition
+            },
+            TestCase {
+                name: "empty_files".to_string(),
+                file_count: 0,
+                overlap_factor: 0.0,
+                target_partitions: 3,
+                expected_partition_count: 0, // Empty result for empty input
+            },
+        ];
+
+        for case in test_cases {
+            println!("Running test case: {}", case.name);
+
+            // Generate files using bench utility function
+            let file_groups = generate_test_files(case.file_count, case.overlap_factor);
+
+            // Call the function under test
+            let result =
+                FileScanConfig::split_groups_by_statistics_with_target_partitions(
+                    &schema,
+                    &file_groups,
+                    &sort_ordering,
+                    case.target_partitions,
+                )?;
+
+            // Verify results
+            println!(
+                "Created {} partitions (target was {})",
+                result.len(),
+                case.target_partitions
+            );
+
+            // Check partition count
+            assert_eq!(
+                result.len(),
+                case.expected_partition_count,
+                "Case '{}': Unexpected partition count",
+                case.name
+            );
+
+            // Verify sort integrity
+            assert!(
+                verify_sort_integrity(&result),
+                "Case '{}': Files within partitions are not properly ordered",
+                case.name
+            );
+
+            // Distribution check for partitions
+            if case.file_count > 1 && case.expected_partition_count > 1 {
+                let group_sizes: Vec<usize> = result.iter().map(FileGroup::len).collect();
+                let max_size = *group_sizes.iter().max().unwrap();
+                let min_size = *group_sizes.iter().min().unwrap();
+
+                // Check partition balancing - difference shouldn't be extreme
+                let avg_files_per_partition =
+                    case.file_count as f64 / case.expected_partition_count as f64;
+                assert!(
+                    (max_size as f64) < 2.0 * avg_files_per_partition,
+                    "Case '{}': Unbalanced distribution. Max partition size {} exceeds twice the average {}",
+                    case.name,
+                    max_size,
+                    avg_files_per_partition
+                );
+
+                println!(
+                    "Distribution - min files: {}, max files: {}",
+                    min_size, max_size
+                );
+            }
+        }
+
+        // Test error case: zero target partitions
+        let empty_groups: Vec<FileGroup> = vec![];
+        let err = FileScanConfig::split_groups_by_statistics_with_target_partitions(
+            &schema,
+            &empty_groups,
+            &sort_ordering,
+            0,
+        )
+        .unwrap_err();
+
+        assert!(
+            err.to_string()
+                .contains("target_partitions must be greater than 0"),
+            "Expected error for zero target partitions"
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/datasource/src/file_sink_config.rs b/datafusion/datasource/src/file_sink_config.rs
index 465167fea9546..2968bd1ee0449 100644
--- a/datafusion/datasource/src/file_sink_config.rs
+++ b/datafusion/datasource/src/file_sink_config.rs
@@ -89,6 +89,7 @@ pub trait FileSink: DataSink {
 
 /// The base configurations to provide when creating a physical plan for
 /// writing to any given file format.
+#[derive(Debug, Clone)]
 pub struct FileSinkConfig {
     /// The unresolved URL specified by the user
     pub original_url: String,
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index 1caefc3277aca..1dc53bd6b9319 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -78,7 +78,7 @@ impl FileStream {
         file_opener: Arc<dyn FileOpener>,
         metrics: &ExecutionPlanMetricsSet,
     ) -> Result<Self> {
-        let (projected_schema, ..) = config.project();
+        let projected_schema = config.projected_schema();
         let pc_projector = PartitionColumnProjector::new(
             Arc::clone(&projected_schema),
             &config
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index f2e36672cd5c9..6d0e16ef4b916 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -19,9 +19,12 @@
 
 use std::any::Any;
 use std::fmt;
+use std::fmt::Debug;
 use std::sync::Arc;
 
+use crate::sink::DataSink;
 use crate::source::{DataSource, DataSourceExec};
+use async_trait::async_trait;
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::memory::MemoryStream;
 use datafusion_physical_plan::projection::{
@@ -42,6 +45,8 @@ use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
+use futures::StreamExt;
+use tokio::sync::RwLock;
 
 /// Execution plan for reading in-memory batches of data
 #[derive(Clone)]
@@ -62,7 +67,7 @@ pub struct MemoryExec {
 }
 
 #[allow(unused, deprecated)]
-impl fmt::Debug for MemoryExec {
+impl Debug for MemoryExec {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         self.inner.fmt_as(DisplayFormatType::Default, f)
     }
@@ -720,6 +725,91 @@ impl MemorySourceConfig {
     }
 }
 
+/// Type alias for partition data
+pub type PartitionData = Arc<RwLock<Vec<RecordBatch>>>;
+
+/// Implements for writing to a [`MemTable`]
+///
+/// [`MemTable`]: <https://docs.rs/datafusion/latest/datafusion/datasource/memory/struct.MemTable.html>
+pub struct MemSink {
+    /// Target locations for writing data
+    batches: Vec<PartitionData>,
+    schema: SchemaRef,
+}
+
+impl Debug for MemSink {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("MemSink")
+            .field("num_partitions", &self.batches.len())
+            .finish()
+    }
+}
+
+impl DisplayAs for MemSink {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                let partition_count = self.batches.len();
+                write!(f, "MemoryTable (partitions={partition_count})")
+            }
+            DisplayFormatType::TreeRender => {
+                // TODO: collect info
+                write!(f, "")
+            }
+        }
+    }
+}
+
+impl MemSink {
+    /// Creates a new [`MemSink`].
+    ///
+    /// The caller is responsible for ensuring that there is at least one partition to insert into.
+    pub fn try_new(batches: Vec<PartitionData>, schema: SchemaRef) -> Result<Self> {
+        if batches.is_empty() {
+            return plan_err!("Cannot insert into MemTable with zero partitions");
+        }
+        Ok(Self { batches, schema })
+    }
+}
+
+#[async_trait]
+impl DataSink for MemSink {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+
+    async fn write_all(
+        &self,
+        mut data: SendableRecordBatchStream,
+        _context: &Arc<TaskContext>,
+    ) -> Result<u64> {
+        let num_partitions = self.batches.len();
+
+        // buffer up the data round robin style into num_partitions
+
+        let mut new_batches = vec![vec![]; num_partitions];
+        let mut i = 0;
+        let mut row_count = 0;
+        while let Some(batch) = data.next().await.transpose()? {
+            row_count += batch.num_rows();
+            new_batches[i].push(batch);
+            i = (i + 1) % num_partitions;
+        }
+
+        // write the outputs into the batches
+        for (target, mut batches) in self.batches.iter().zip(new_batches.into_iter()) {
+            // Append all the new batches in one go to minimize locking overhead
+            target.write().await.append(&mut batches);
+        }
+
+        Ok(row_count as u64)
+    }
+}
+
 #[cfg(test)]
 mod memory_source_tests {
     use std::sync::Arc;
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index fb119d1b3d2db..3e44851d145b8 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -44,23 +44,28 @@ pub mod source;
 mod statistics;
 
 #[cfg(test)]
-mod test_util;
+pub mod test_util;
 
 pub mod url;
 pub mod write;
+pub use self::url::ListingTableUrl;
+use crate::file_groups::FileGroup;
 use chrono::TimeZone;
-use datafusion_common::Result;
+use datafusion_common::stats::Precision;
+use datafusion_common::{exec_datafusion_err, ColumnStatistics, Result};
 use datafusion_common::{ScalarValue, Statistics};
 use file_meta::FileMeta;
 use futures::{Stream, StreamExt};
 use object_store::{path::Path, ObjectMeta};
 use object_store::{GetOptions, GetRange, ObjectStore};
+// Remove when add_row_stats is remove
+#[allow(deprecated)]
+pub use statistics::add_row_stats;
+pub use statistics::compute_all_files_statistics;
 use std::ops::Range;
 use std::pin::Pin;
 use std::sync::Arc;
 
-pub use self::url::ListingTableUrl;
-
 /// Stream of files get listed from object store
 pub type PartitionedFileStream =
     Pin<Box<dyn Stream<Item = Result<PartitionedFile>> + Send + Sync + 'static>>;
@@ -106,7 +111,7 @@ pub struct PartitionedFile {
     ///
     /// DataFusion relies on these statistics for planning (in particular to sort file groups),
     /// so if they are incorrect, incorrect answers may result.
-    pub statistics: Option<Statistics>,
+    pub statistics: Option<Arc<Statistics>>,
     /// An optional field for user defined per object metadata
     pub extensions: Option<Arc<dyn std::any::Any + Send + Sync>>,
     /// The estimated size of the parquet metadata, in bytes
@@ -120,7 +125,7 @@ impl PartitionedFile {
             object_meta: ObjectMeta {
                 location: Path::from(path.into()),
                 last_modified: chrono::Utc.timestamp_nanos(0),
-                size: size as usize,
+                size,
                 e_tag: None,
                 version: None,
             },
@@ -138,7 +143,7 @@ impl PartitionedFile {
             object_meta: ObjectMeta {
                 location: Path::from(path),
                 last_modified: chrono::Utc.timestamp_nanos(0),
-                size: size as usize,
+                size,
                 e_tag: None,
                 version: None,
             },
@@ -186,6 +191,12 @@ impl PartitionedFile {
         self.extensions = Some(extensions);
         self
     }
+
+    // Update the statistics for this file.
+    pub fn with_statistics(mut self, statistics: Arc<Statistics>) -> Self {
+        self.statistics = Some(statistics);
+        self
+    }
 }
 
 impl From<ObjectMeta> for PartitionedFile {
@@ -215,7 +226,7 @@ impl From<ObjectMeta> for PartitionedFile {
 ///   Indicates that the range calculation determined no further action is
 ///   necessary, possibly because the calculated range is empty or invalid.
 pub enum RangeCalculation {
-    Range(Option<Range<usize>>),
+    Range(Option<Range<u64>>),
     TerminateEarly,
 }
 
@@ -241,7 +252,12 @@ pub async fn calculate_range(
     match file_meta.range {
         None => Ok(RangeCalculation::Range(None)),
         Some(FileRange { start, end }) => {
-            let (start, end) = (start as usize, end as usize);
+            let start: u64 = start.try_into().map_err(|_| {
+                exec_datafusion_err!("Expect start range to fit in u64, got {start}")
+            })?;
+            let end: u64 = end.try_into().map_err(|_| {
+                exec_datafusion_err!("Expect end range to fit in u64, got {end}")
+            })?;
 
             let start_delta = if start != 0 {
                 find_first_newline(store, location, start - 1, file_size, newline).await?
@@ -280,10 +296,10 @@ pub async fn calculate_range(
 async fn find_first_newline(
     object_store: &Arc<dyn ObjectStore>,
     location: &Path,
-    start: usize,
-    end: usize,
+    start: u64,
+    end: u64,
     newline: u8,
-) -> Result<usize> {
+) -> Result<u64> {
     let options = GetOptions {
         range: Some(GetRange::Bounded(start..end)),
         ..Default::default()
@@ -296,15 +312,125 @@ async fn find_first_newline(
 
     while let Some(chunk) = result_stream.next().await.transpose()? {
         if let Some(position) = chunk.iter().position(|&byte| byte == newline) {
+            let position = position as u64;
             return Ok(index + position);
         }
 
-        index += chunk.len();
+        index += chunk.len() as u64;
     }
 
     Ok(index)
 }
 
+/// Generates test files with min-max statistics in different overlap patterns.
+///
+/// Used by tests and benchmarks.
+///
+/// # Overlap Factors
+///
+/// The `overlap_factor` parameter controls how much the value ranges in generated test files overlap:
+/// - `0.0`: No overlap between files (completely disjoint ranges)
+/// - `0.2`: Low overlap (20% of the range size overlaps with adjacent files)
+/// - `0.5`: Medium overlap (50% of ranges overlap)
+/// - `0.8`: High overlap (80% of ranges overlap between files)
+///
+/// # Examples
+///
+/// With 5 files and different overlap factors showing `[min, max]` ranges:
+///
+/// overlap_factor = 0.0 (no overlap):
+///
+/// File 0: [0, 20]
+/// File 1: [20, 40]
+/// File 2: [40, 60]
+/// File 3: [60, 80]
+/// File 4: [80, 100]
+///
+/// overlap_factor = 0.5 (50% overlap):
+///
+/// File 0: [0, 40]
+/// File 1: [20, 60]
+/// File 2: [40, 80]
+/// File 3: [60, 100]
+/// File 4: [80, 120]
+///
+/// overlap_factor = 0.8 (80% overlap):
+///
+/// File 0: [0, 100]
+/// File 1: [20, 120]
+/// File 2: [40, 140]
+/// File 3: [60, 160]
+/// File 4: [80, 180]
+pub fn generate_test_files(num_files: usize, overlap_factor: f64) -> Vec<FileGroup> {
+    let mut files = Vec::with_capacity(num_files);
+    if num_files == 0 {
+        return vec![];
+    }
+    let range_size = if overlap_factor == 0.0 {
+        100 / num_files as i64
+    } else {
+        (100.0 / (overlap_factor * num_files as f64)).max(1.0) as i64
+    };
+
+    for i in 0..num_files {
+        let base = (i as f64 * range_size as f64 * (1.0 - overlap_factor)) as i64;
+        let min = base as f64;
+        let max = (base + range_size) as f64;
+
+        let file = PartitionedFile {
+            object_meta: ObjectMeta {
+                location: Path::from(format!("file_{}.parquet", i)),
+                last_modified: chrono::Utc::now(),
+                size: 1000,
+                e_tag: None,
+                version: None,
+            },
+            partition_values: vec![],
+            range: None,
+            statistics: Some(Arc::new(Statistics {
+                num_rows: Precision::Exact(100),
+                total_byte_size: Precision::Exact(1000),
+                column_statistics: vec![ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    max_value: Precision::Exact(ScalarValue::Float64(Some(max))),
+                    min_value: Precision::Exact(ScalarValue::Float64(Some(min))),
+                    sum_value: Precision::Absent,
+                    distinct_count: Precision::Absent,
+                }],
+            })),
+            extensions: None,
+            metadata_size_hint: None,
+        };
+        files.push(file);
+    }
+
+    vec![FileGroup::new(files)]
+}
+
+// Helper function to verify that files within each group maintain sort order
+/// Used by tests and benchmarks
+pub fn verify_sort_integrity(file_groups: &[FileGroup]) -> bool {
+    for group in file_groups {
+        let files = group.iter().collect::<Vec<_>>();
+        for i in 1..files.len() {
+            let prev_file = files[i - 1];
+            let curr_file = files[i];
+
+            // Check if the min value of current file is greater than max value of previous file
+            if let (Some(prev_stats), Some(curr_stats)) =
+                (&prev_file.statistics, &curr_file.statistics)
+            {
+                let prev_max = &prev_stats.column_statistics[0].max_value;
+                let curr_min = &curr_stats.column_statistics[0].min_value;
+                if curr_min.get_value().unwrap() <= prev_max.get_value().unwrap() {
+                    return false;
+                }
+            }
+        }
+    }
+    true
+}
+
 #[cfg(test)]
 mod tests {
     use super::ListingTableUrl;
diff --git a/datafusion/datasource/src/schema_adapter.rs b/datafusion/datasource/src/schema_adapter.rs
index 4164cda8cba11..eafddecd05f50 100644
--- a/datafusion/datasource/src/schema_adapter.rs
+++ b/datafusion/datasource/src/schema_adapter.rs
@@ -42,7 +42,7 @@ pub trait SchemaAdapterFactory: Debug + Send + Sync + 'static {
     /// Arguments:
     ///
     /// * `projected_table_schema`: The schema for the table, projected to
-    ///    include only the fields being output (projected) by the this mapping.
+    ///   include only the fields being output (projected) by the this mapping.
     ///
     /// * `table_schema`: The entire table schema for the table
     fn create(
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index 6c9122ce1ac10..2d6ea1a8b3915 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -31,10 +31,14 @@ use datafusion_physical_plan::{
 
 use crate::file_scan_config::FileScanConfig;
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{Constraints, Statistics};
+use datafusion_common::{Constraints, Result, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::filter_pushdown::{
+    filter_pushdown_not_supported, FilterDescription, FilterPushdownResult,
+    FilterPushdownSupport,
+};
 
 /// Common behaviors in Data Sources for both from Files and Memory.
 ///
@@ -51,7 +55,7 @@ pub trait DataSource: Send + Sync + Debug {
         &self,
         partition: usize,
         context: Arc<TaskContext>,
-    ) -> datafusion_common::Result<SendableRecordBatchStream>;
+    ) -> Result<SendableRecordBatchStream>;
     fn as_any(&self) -> &dyn Any;
     /// Format this source for display in explain plans
     fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> fmt::Result;
@@ -62,13 +66,13 @@ pub trait DataSource: Send + Sync + Debug {
         _target_partitions: usize,
         _repartition_file_min_size: usize,
         _output_ordering: Option<LexOrdering>,
-    ) -> datafusion_common::Result<Option<Arc<dyn DataSource>>> {
+    ) -> Result<Option<Arc<dyn DataSource>>> {
         Ok(None)
     }
 
     fn output_partitioning(&self) -> Partitioning;
     fn eq_properties(&self) -> EquivalenceProperties;
-    fn statistics(&self) -> datafusion_common::Result<Statistics>;
+    fn statistics(&self) -> Result<Statistics>;
     /// Return a copy of this DataSource with a new fetch limit
     fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>;
     fn fetch(&self) -> Option<usize>;
@@ -78,7 +82,16 @@ pub trait DataSource: Send + Sync + Debug {
     fn try_swapping_with_projection(
         &self,
         _projection: &ProjectionExec,
-    ) -> datafusion_common::Result<Option<Arc<dyn ExecutionPlan>>>;
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>>;
+    /// Try to push down filters into this DataSource.
+    /// See [`ExecutionPlan::try_pushdown_filters`] for more details.
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn DataSource>>> {
+        Ok(filter_pushdown_not_supported(fd))
+    }
 }
 
 /// [`ExecutionPlan`] handles different file formats like JSON, CSV, AVRO, ARROW, PARQUET
@@ -131,7 +144,7 @@ impl ExecutionPlan for DataSourceExec {
     fn with_new_children(
         self: Arc<Self>,
         _: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+    ) -> Result<Arc<dyn ExecutionPlan>> {
         Ok(self)
     }
 
@@ -139,7 +152,7 @@ impl ExecutionPlan for DataSourceExec {
         &self,
         target_partitions: usize,
         config: &ConfigOptions,
-    ) -> datafusion_common::Result<Option<Arc<dyn ExecutionPlan>>> {
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         let data_source = self.data_source.repartitioned(
             target_partitions,
             config.optimizer.repartition_file_min_size,
@@ -163,7 +176,7 @@ impl ExecutionPlan for DataSourceExec {
         &self,
         partition: usize,
         context: Arc<TaskContext>,
-    ) -> datafusion_common::Result<SendableRecordBatchStream> {
+    ) -> Result<SendableRecordBatchStream> {
         self.data_source.open(partition, context)
     }
 
@@ -171,7 +184,7 @@ impl ExecutionPlan for DataSourceExec {
         Some(self.data_source.metrics().clone_inner())
     }
 
-    fn statistics(&self) -> datafusion_common::Result<Statistics> {
+    fn statistics(&self) -> Result<Statistics> {
         self.data_source.statistics()
     }
 
@@ -189,9 +202,45 @@ impl ExecutionPlan for DataSourceExec {
     fn try_swapping_with_projection(
         &self,
         projection: &ProjectionExec,
-    ) -> datafusion_common::Result<Option<Arc<dyn ExecutionPlan>>> {
+    ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         self.data_source.try_swapping_with_projection(projection)
     }
+
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn ExecutionPlan>>> {
+        let FilterPushdownResult {
+            support,
+            remaining_description,
+        } = self.data_source.try_pushdown_filters(fd, config)?;
+
+        match support {
+            FilterPushdownSupport::Supported {
+                child_descriptions,
+                op,
+                revisit,
+            } => {
+                let new_exec = Arc::new(DataSourceExec::new(op));
+
+                debug_assert!(child_descriptions.is_empty());
+                debug_assert!(!revisit);
+
+                Ok(FilterPushdownResult {
+                    support: FilterPushdownSupport::Supported {
+                        child_descriptions,
+                        op: new_exec,
+                        revisit,
+                    },
+                    remaining_description,
+                })
+            }
+            FilterPushdownSupport::NotSupported => {
+                Ok(filter_pushdown_not_supported(remaining_description))
+            }
+        }
+    }
 }
 
 impl DataSourceExec {
@@ -254,3 +303,13 @@ impl DataSourceExec {
             })
     }
 }
+
+/// Create a new `DataSourceExec` from a `DataSource`
+impl<S> From<S> for DataSourceExec
+where
+    S: DataSource + 'static,
+{
+    fn from(source: S) -> Self {
+        Self::new(Arc::new(source))
+    }
+}
diff --git a/datafusion/datasource/src/statistics.rs b/datafusion/datasource/src/statistics.rs
index cd002a96683a5..8a04d77b273d4 100644
--- a/datafusion/datasource/src/statistics.rs
+++ b/datafusion/datasource/src/statistics.rs
@@ -20,8 +20,10 @@
 //! Currently, this module houses code to sort file groups if they are non-overlapping with
 //! respect to the required sort order. See [`MinMaxStatistics`]
 
+use futures::{Stream, StreamExt};
 use std::sync::Arc;
 
+use crate::file_groups::FileGroup;
 use crate::PartitionedFile;
 
 use arrow::array::RecordBatch;
@@ -30,9 +32,11 @@ use arrow::{
     compute::SortColumn,
     row::{Row, Rows},
 };
+use datafusion_common::stats::Precision;
 use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result};
 use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::{ColumnStatistics, Statistics};
 
 /// A normalized representation of file min/max statistics that allows for efficient sorting & comparison.
 /// The min/max values are ordered by [`Self::sort_order`].
@@ -281,3 +285,213 @@ fn sort_columns_from_physical_sort_exprs(
         .map(|expr| expr.expr.as_any().downcast_ref::<Column>())
         .collect::<Option<Vec<_>>>()
 }
+
+/// Get all files as well as the file level summary statistics (no statistic for partition columns).
+/// If the optional `limit` is provided, includes only sufficient files. Needed to read up to
+/// `limit` number of rows. `collect_stats` is passed down from the configuration parameter on
+/// `ListingTable`. If it is false we only construct bare statistics and skip a potentially expensive
+///  call to `multiunzip` for constructing file level summary statistics.
+#[deprecated(
+    since = "47.0.0",
+    note = "Please use `get_files_with_limit` and  `compute_all_files_statistics` instead"
+)]
+#[allow(unused)]
+pub async fn get_statistics_with_limit(
+    all_files: impl Stream<Item = Result<(PartitionedFile, Arc<Statistics>)>>,
+    file_schema: SchemaRef,
+    limit: Option<usize>,
+    collect_stats: bool,
+) -> Result<(FileGroup, Statistics)> {
+    let mut result_files = FileGroup::default();
+    // These statistics can be calculated as long as at least one file provides
+    // useful information. If none of the files provides any information, then
+    // they will end up having `Precision::Absent` values. Throughout calculations,
+    // missing values will be imputed as:
+    // - zero for summations, and
+    // - neutral element for extreme points.
+    let size = file_schema.fields().len();
+    let mut col_stats_set = vec![ColumnStatistics::default(); size];
+    let mut num_rows = Precision::<usize>::Absent;
+    let mut total_byte_size = Precision::<usize>::Absent;
+
+    // Fusing the stream allows us to call next safely even once it is finished.
+    let mut all_files = Box::pin(all_files.fuse());
+
+    if let Some(first_file) = all_files.next().await {
+        let (mut file, file_stats) = first_file?;
+        file.statistics = Some(Arc::clone(&file_stats));
+        result_files.push(file);
+
+        // First file, we set them directly from the file statistics.
+        num_rows = file_stats.num_rows;
+        total_byte_size = file_stats.total_byte_size;
+        for (index, file_column) in
+            file_stats.column_statistics.clone().into_iter().enumerate()
+        {
+            col_stats_set[index].null_count = file_column.null_count;
+            col_stats_set[index].max_value = file_column.max_value;
+            col_stats_set[index].min_value = file_column.min_value;
+            col_stats_set[index].sum_value = file_column.sum_value;
+        }
+
+        // If the number of rows exceeds the limit, we can stop processing
+        // files. This only applies when we know the number of rows. It also
+        // currently ignores tables that have no statistics regarding the
+        // number of rows.
+        let conservative_num_rows = match num_rows {
+            Precision::Exact(nr) => nr,
+            _ => usize::MIN,
+        };
+        if conservative_num_rows <= limit.unwrap_or(usize::MAX) {
+            while let Some(current) = all_files.next().await {
+                let (mut file, file_stats) = current?;
+                file.statistics = Some(Arc::clone(&file_stats));
+                result_files.push(file);
+                if !collect_stats {
+                    continue;
+                }
+
+                // We accumulate the number of rows, total byte size and null
+                // counts across all the files in question. If any file does not
+                // provide any information or provides an inexact value, we demote
+                // the statistic precision to inexact.
+                num_rows = num_rows.add(&file_stats.num_rows);
+
+                total_byte_size = total_byte_size.add(&file_stats.total_byte_size);
+
+                for (file_col_stats, col_stats) in file_stats
+                    .column_statistics
+                    .iter()
+                    .zip(col_stats_set.iter_mut())
+                {
+                    let ColumnStatistics {
+                        null_count: file_nc,
+                        max_value: file_max,
+                        min_value: file_min,
+                        sum_value: file_sum,
+                        distinct_count: _,
+                    } = file_col_stats;
+
+                    col_stats.null_count = col_stats.null_count.add(file_nc);
+                    col_stats.max_value = col_stats.max_value.max(file_max);
+                    col_stats.min_value = col_stats.min_value.min(file_min);
+                    col_stats.sum_value = col_stats.sum_value.add(file_sum);
+                }
+
+                // If the number of rows exceeds the limit, we can stop processing
+                // files. This only applies when we know the number of rows. It also
+                // currently ignores tables that have no statistics regarding the
+                // number of rows.
+                if num_rows.get_value().unwrap_or(&usize::MIN)
+                    > &limit.unwrap_or(usize::MAX)
+                {
+                    break;
+                }
+            }
+        }
+    };
+
+    let mut statistics = Statistics {
+        num_rows,
+        total_byte_size,
+        column_statistics: col_stats_set,
+    };
+    if all_files.next().await.is_some() {
+        // If we still have files in the stream, it means that the limit kicked
+        // in, and the statistic could have been different had we processed the
+        // files in a different order.
+        statistics = statistics.to_inexact()
+    }
+
+    Ok((result_files, statistics))
+}
+
+/// Computes the summary statistics for a group of files(`FileGroup` level's statistics).
+///
+/// This function combines statistics from all files in the file group to create
+/// summary statistics. It handles the following aspects:
+/// - Merges row counts and byte sizes across files
+/// - Computes column-level statistics like min/max values
+/// - Maintains appropriate precision information (exact, inexact, absent)
+///
+/// # Parameters
+/// * `file_group` - The group of files to process
+/// * `file_schema` - Schema of the files
+/// * `collect_stats` - Whether to collect statistics (if false, returns original file group)
+///
+/// # Returns
+/// A new file group with summary statistics attached
+pub fn compute_file_group_statistics(
+    file_group: FileGroup,
+    file_schema: SchemaRef,
+    collect_stats: bool,
+) -> Result<FileGroup> {
+    if !collect_stats {
+        return Ok(file_group);
+    }
+
+    let file_group_stats = file_group.iter().filter_map(|file| {
+        let stats = file.statistics.as_ref()?;
+        Some(stats.as_ref())
+    });
+    let statistics = Statistics::try_merge_iter(file_group_stats, &file_schema)?;
+
+    Ok(file_group.with_statistics(Arc::new(statistics)))
+}
+
+/// Computes statistics for all files across multiple file groups.
+///
+/// This function:
+/// 1. Computes statistics for each individual file group
+/// 2. Summary statistics across all file groups
+/// 3. Optionally marks statistics as inexact
+///
+/// # Parameters
+/// * `file_groups` - Vector of file groups to process
+/// * `table_schema` - Schema of the table
+/// * `collect_stats` - Whether to collect statistics
+/// * `inexact_stats` - Whether to mark the resulting statistics as inexact
+///
+/// # Returns
+/// A tuple containing:
+/// * The processed file groups with their individual statistics attached
+/// * The summary statistics across all file groups, aka all files summary statistics
+pub fn compute_all_files_statistics(
+    file_groups: Vec<FileGroup>,
+    table_schema: SchemaRef,
+    collect_stats: bool,
+    inexact_stats: bool,
+) -> Result<(Vec<FileGroup>, Statistics)> {
+    let file_groups_with_stats = file_groups
+        .into_iter()
+        .map(|file_group| {
+            compute_file_group_statistics(
+                file_group,
+                Arc::clone(&table_schema),
+                collect_stats,
+            )
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    // Then summary statistics across all file groups
+    let file_groups_statistics = file_groups_with_stats
+        .iter()
+        .filter_map(|file_group| file_group.statistics());
+
+    let mut statistics =
+        Statistics::try_merge_iter(file_groups_statistics, &table_schema)?;
+
+    if inexact_stats {
+        statistics = statistics.to_inexact()
+    }
+
+    Ok((file_groups_with_stats, statistics))
+}
+
+#[deprecated(since = "47.0.0", note = "Use Statistics::add")]
+pub fn add_row_stats(
+    file_num_rows: Precision<usize>,
+    num_rows: Precision<usize>,
+) -> Precision<usize> {
+    file_num_rows.add(&num_rows)
+}
diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
index 2dbcfa2ef1fae..bddfdbcc06d13 100644
--- a/datafusion/datasource/src/url.rs
+++ b/datafusion/datasource/src/url.rs
@@ -209,10 +209,10 @@ impl ListingTableUrl {
     /// assert_eq!(url.file_extension(), None);
     /// ```
     pub fn file_extension(&self) -> Option<&str> {
-        if let Some(segments) = self.url.path_segments() {
-            if let Some(last_segment) = segments.last() {
+        if let Some(mut segments) = self.url.path_segments() {
+            if let Some(last_segment) = segments.next_back() {
                 if last_segment.contains(".") && !last_segment.ends_with(".") {
-                    return last_segment.split('.').last();
+                    return last_segment.split('.').next_back();
                 }
             }
         }
diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs
index fc2e5daf92b66..49c3a64d24aa8 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -28,8 +28,8 @@ use datafusion_common::error::Result;
 use datafusion_physical_plan::SendableRecordBatchStream;
 
 use arrow::array::{
-    builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, RecordBatch,
-    StringArray, StructArray,
+    builder::UInt64Builder, cast::AsArray, downcast_dictionary_array, ArrayAccessor,
+    RecordBatch, StringArray, StructArray,
 };
 use arrow::datatypes::{DataType, Schema};
 use datafusion_common::cast::{
@@ -482,10 +482,8 @@ fn compute_partition_keys_by_row<'a>(
                             .ok_or(exec_datafusion_err!("it is not yet supported to write to hive partitions with datatype {}",
                             dtype))?;
 
-                        for val in array.values() {
-                            partition_values.push(
-                                Cow::from(val.ok_or(exec_datafusion_err!("Cannot partition by null value for column {}", col))?),
-                            );
+                        for i in 0..rb.num_rows() {
+                            partition_values.push(Cow::from(array.value(i)));
                         }
                     },
                     _ => unreachable!(),
diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml
index 8f642f3384d2e..20e507e98b68a 100644
--- a/datafusion/execution/Cargo.toml
+++ b/datafusion/execution/Cargo.toml
@@ -44,7 +44,7 @@ datafusion-common = { workspace = true, default-features = true }
 datafusion-expr = { workspace = true }
 futures = { workspace = true }
 log = { workspace = true }
-object_store = { workspace = true }
+object_store = { workspace = true, features = ["fs"] }
 parking_lot = { workspace = true }
 rand = { workspace = true }
 tempfile = { workspace = true }
diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs
index 53646dc5b468e..1e00a1ce4725e 100644
--- a/datafusion/execution/src/config.rs
+++ b/datafusion/execution/src/config.rs
@@ -193,9 +193,11 @@ impl SessionConfig {
     ///
     /// [`target_partitions`]: datafusion_common::config::ExecutionOptions::target_partitions
     pub fn with_target_partitions(mut self, n: usize) -> Self {
-        // partition count must be greater than zero
-        assert!(n > 0);
-        self.options.execution.target_partitions = n;
+        self.options.execution.target_partitions = if n == 0 {
+            datafusion_common::config::ExecutionOptions::default().target_partitions
+        } else {
+            n
+        };
         self
     }
 
diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs
index caa62eefe14c7..2b21a6dbf175f 100644
--- a/datafusion/execution/src/disk_manager.rs
+++ b/datafusion/execution/src/disk_manager.rs
@@ -17,14 +17,21 @@
 
 //! [`DiskManager`]: Manages files generated during query execution
 
-use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
+use datafusion_common::{
+    config_err, resources_datafusion_err, resources_err, DataFusionError, Result,
+};
 use log::debug;
 use parking_lot::Mutex;
 use rand::{thread_rng, Rng};
 use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::Arc;
 use tempfile::{Builder, NamedTempFile, TempDir};
 
+use crate::memory_pool::human_readable_size;
+
+const DEFAULT_MAX_TEMP_DIRECTORY_SIZE: u64 = 100 * 1024 * 1024 * 1024; // 100GB
+
 /// Configuration for temporary disk access
 #[derive(Debug, Clone)]
 pub enum DiskManagerConfig {
@@ -75,6 +82,12 @@ pub struct DiskManager {
     /// If `Some(vec![])` a new OS specified temporary directory will be created
     /// If `None` an error will be returned (configured not to spill)
     local_dirs: Mutex<Option<Vec<Arc<TempDir>>>>,
+    /// The maximum amount of data (in bytes) stored inside the temporary directories.
+    /// Default to 100GB
+    max_temp_directory_size: u64,
+    /// Used disk space in the temporary directories. Now only spilled data for
+    /// external executors are counted.
+    used_disk_space: Arc<AtomicU64>,
 }
 
 impl DiskManager {
@@ -84,6 +97,8 @@ impl DiskManager {
             DiskManagerConfig::Existing(manager) => Ok(manager),
             DiskManagerConfig::NewOs => Ok(Arc::new(Self {
                 local_dirs: Mutex::new(Some(vec![])),
+                max_temp_directory_size: DEFAULT_MAX_TEMP_DIRECTORY_SIZE,
+                used_disk_space: Arc::new(AtomicU64::new(0)),
             })),
             DiskManagerConfig::NewSpecified(conf_dirs) => {
                 let local_dirs = create_local_dirs(conf_dirs)?;
@@ -93,14 +108,38 @@ impl DiskManager {
                 );
                 Ok(Arc::new(Self {
                     local_dirs: Mutex::new(Some(local_dirs)),
+                    max_temp_directory_size: DEFAULT_MAX_TEMP_DIRECTORY_SIZE,
+                    used_disk_space: Arc::new(AtomicU64::new(0)),
                 }))
             }
             DiskManagerConfig::Disabled => Ok(Arc::new(Self {
                 local_dirs: Mutex::new(None),
+                max_temp_directory_size: DEFAULT_MAX_TEMP_DIRECTORY_SIZE,
+                used_disk_space: Arc::new(AtomicU64::new(0)),
             })),
         }
     }
 
+    pub fn with_max_temp_directory_size(
+        mut self,
+        max_temp_directory_size: u64,
+    ) -> Result<Self> {
+        // If the disk manager is disabled and `max_temp_directory_size` is not 0,
+        // this operation is not meaningful, fail early.
+        if self.local_dirs.lock().is_none() && max_temp_directory_size != 0 {
+            return config_err!(
+                "Cannot set max temp directory size for a disk manager that spilling is disabled"
+            );
+        }
+
+        self.max_temp_directory_size = max_temp_directory_size;
+        Ok(self)
+    }
+
+    pub fn used_disk_space(&self) -> u64 {
+        self.used_disk_space.load(Ordering::Relaxed)
+    }
+
     /// Return true if this disk manager supports creating temporary
     /// files. If this returns false, any call to `create_tmp_file`
     /// will error.
@@ -113,7 +152,7 @@ impl DiskManager {
     /// If the file can not be created for some reason, returns an
     /// error message referencing the request description
     pub fn create_tmp_file(
-        &self,
+        self: &Arc<Self>,
         request_description: &str,
     ) -> Result<RefCountedTempFile> {
         let mut guard = self.local_dirs.lock();
@@ -142,18 +181,31 @@ impl DiskManager {
             tempfile: Builder::new()
                 .tempfile_in(local_dirs[dir_index].as_ref())
                 .map_err(DataFusionError::IoError)?,
+            current_file_disk_usage: 0,
+            disk_manager: Arc::clone(self),
         })
     }
 }
 
 /// A wrapper around a [`NamedTempFile`] that also contains
-/// a reference to its parent temporary directory
+/// a reference to its parent temporary directory.
+///
+/// # Note
+/// After any modification to the underlying file (e.g., writing data to it), the caller
+/// must invoke [`Self::update_disk_usage`] to update the global disk usage counter.
+/// This ensures the disk manager can properly enforce usage limits configured by
+/// [`DiskManager::with_max_temp_directory_size`].
 #[derive(Debug)]
 pub struct RefCountedTempFile {
     /// The reference to the directory in which temporary files are created to ensure
     /// it is not cleaned up prior to the NamedTempFile
     _parent_temp_dir: Arc<TempDir>,
     tempfile: NamedTempFile,
+    /// Tracks the current disk usage of this temporary file. See
+    /// [`Self::update_disk_usage`] for more details.
+    current_file_disk_usage: u64,
+    /// The disk manager that created and manages this temporary file
+    disk_manager: Arc<DiskManager>,
 }
 
 impl RefCountedTempFile {
@@ -164,6 +216,50 @@ impl RefCountedTempFile {
     pub fn inner(&self) -> &NamedTempFile {
         &self.tempfile
     }
+
+    /// Updates the global disk usage counter after modifications to the underlying file.
+    ///
+    /// # Errors
+    /// - Returns an error if the global disk usage exceeds the configured limit.
+    pub fn update_disk_usage(&mut self) -> Result<()> {
+        // Get new file size from OS
+        let metadata = self.tempfile.as_file().metadata()?;
+        let new_disk_usage = metadata.len();
+
+        // Update the global disk usage by:
+        // 1. Subtracting the old file size from the global counter
+        self.disk_manager
+            .used_disk_space
+            .fetch_sub(self.current_file_disk_usage, Ordering::Relaxed);
+        // 2. Adding the new file size to the global counter
+        self.disk_manager
+            .used_disk_space
+            .fetch_add(new_disk_usage, Ordering::Relaxed);
+
+        // 3. Check if the updated global disk usage exceeds the configured limit
+        let global_disk_usage = self.disk_manager.used_disk_space.load(Ordering::Relaxed);
+        if global_disk_usage > self.disk_manager.max_temp_directory_size {
+            return resources_err!(
+                "The used disk space during the spilling process has exceeded the allowable limit of {}. Try increasing the `max_temp_directory_size` in the disk manager configuration.",
+                human_readable_size(self.disk_manager.max_temp_directory_size as usize)
+            );
+        }
+
+        // 4. Update the local file size tracking
+        self.current_file_disk_usage = new_disk_usage;
+
+        Ok(())
+    }
+}
+
+/// When the temporary file is dropped, subtract its disk usage from the disk manager's total
+impl Drop for RefCountedTempFile {
+    fn drop(&mut self) {
+        // Subtract the current file's disk usage from the global counter
+        self.disk_manager
+            .used_disk_space
+            .fetch_sub(self.current_file_disk_usage, Ordering::Relaxed);
+    }
 }
 
 /// Setup local dirs by creating one new dir in each of the given dirs
diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs
index 71d40aeab53c7..19e509d263ea2 100644
--- a/datafusion/execution/src/memory_pool/mod.rs
+++ b/datafusion/execution/src/memory_pool/mod.rs
@@ -19,7 +19,8 @@
 //! help with allocation accounting.
 
 use datafusion_common::{internal_err, Result};
-use std::{cmp::Ordering, sync::Arc};
+use std::hash::{Hash, Hasher};
+use std::{cmp::Ordering, sync::atomic, sync::Arc};
 
 mod pool;
 pub mod proxy {
@@ -140,30 +141,101 @@ pub trait MemoryPool: Send + Sync + std::fmt::Debug {
 
     /// Return the total amount of memory reserved
     fn reserved(&self) -> usize;
+
+    /// Return the memory limit of the pool
+    ///
+    /// The default implementation of `MemoryPool::memory_limit`
+    /// will return `MemoryLimit::Unknown`.
+    /// If you are using your custom memory pool, but have the requirement to
+    /// know the memory usage limit of the pool, please implement this method
+    /// to return it(`Memory::Finite(limit)`).
+    fn memory_limit(&self) -> MemoryLimit {
+        MemoryLimit::Unknown
+    }
+}
+
+/// Memory limit of `MemoryPool`
+pub enum MemoryLimit {
+    Infinite,
+    /// Bounded memory limit in bytes.
+    Finite(usize),
+    Unknown,
 }
 
 /// A memory consumer is a named allocation traced by a particular
 /// [`MemoryReservation`] in a [`MemoryPool`]. All allocations are registered to
 /// a particular `MemoryConsumer`;
 ///
+/// Each `MemoryConsumer` is identifiable by a process-unique id, and is therefor not cloneable,
+/// If you want a clone of a `MemoryConsumer`, you should look into [`MemoryConsumer::clone_with_new_id`],
+/// but note that this `MemoryConsumer` may be treated as a separate entity based on the used pool,
+/// and is only guaranteed to share the name and inner properties.
+///
 /// For help with allocation accounting, see the [`proxy`] module.
 ///
 /// [proxy]: datafusion_common::utils::proxy
-#[derive(Debug, PartialEq, Eq, Hash, Clone)]
+#[derive(Debug)]
 pub struct MemoryConsumer {
     name: String,
     can_spill: bool,
+    id: usize,
+}
+
+impl PartialEq for MemoryConsumer {
+    fn eq(&self, other: &Self) -> bool {
+        let is_same_id = self.id == other.id;
+
+        #[cfg(debug_assertions)]
+        if is_same_id {
+            assert_eq!(self.name, other.name);
+            assert_eq!(self.can_spill, other.can_spill);
+        }
+
+        is_same_id
+    }
+}
+
+impl Eq for MemoryConsumer {}
+
+impl Hash for MemoryConsumer {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.id.hash(state);
+        self.name.hash(state);
+        self.can_spill.hash(state);
+    }
 }
 
 impl MemoryConsumer {
+    fn new_unique_id() -> usize {
+        static ID: atomic::AtomicUsize = atomic::AtomicUsize::new(0);
+        ID.fetch_add(1, atomic::Ordering::Relaxed)
+    }
+
     /// Create a new empty [`MemoryConsumer`] that can be grown using [`MemoryReservation`]
     pub fn new(name: impl Into<String>) -> Self {
         Self {
             name: name.into(),
             can_spill: false,
+            id: Self::new_unique_id(),
+        }
+    }
+
+    /// Returns a clone of this [`MemoryConsumer`] with a new unique id,
+    /// which can be registered with a [`MemoryPool`],
+    /// This new consumer is separate from the original.
+    pub fn clone_with_new_id(&self) -> Self {
+        Self {
+            name: self.name.clone(),
+            can_spill: self.can_spill,
+            id: Self::new_unique_id(),
         }
     }
 
+    /// Return the unique id of this [`MemoryConsumer`]
+    pub fn id(&self) -> usize {
+        self.id
+    }
+
     /// Set whether this allocation can be spilled to disk
     pub fn with_can_spill(self, can_spill: bool) -> Self {
         Self { can_spill, ..self }
@@ -349,7 +421,7 @@ pub mod units {
     pub const KB: u64 = 1 << 10;
 }
 
-/// Present size in human readable form
+/// Present size in human-readable form
 pub fn human_readable_size(size: usize) -> String {
     use units::*;
 
@@ -374,6 +446,15 @@ pub fn human_readable_size(size: usize) -> String {
 mod tests {
     use super::*;
 
+    #[test]
+    fn test_id_uniqueness() {
+        let mut ids = std::collections::HashSet::new();
+        for _ in 0..100 {
+            let consumer = MemoryConsumer::new("test");
+            assert!(ids.insert(consumer.id())); // Ensures unique insertion
+        }
+    }
+
     #[test]
     fn test_memory_pool_underflow() {
         let pool = Arc::new(GreedyMemoryPool::new(50)) as _;
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 261332180e571..e623246eb976b 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -15,14 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
+use crate::memory_pool::{MemoryConsumer, MemoryLimit, MemoryPool, MemoryReservation};
 use datafusion_common::HashMap;
 use datafusion_common::{resources_datafusion_err, DataFusionError, Result};
 use log::debug;
 use parking_lot::Mutex;
 use std::{
     num::NonZeroUsize,
-    sync::atomic::{AtomicU64, AtomicUsize, Ordering},
+    sync::atomic::{AtomicUsize, Ordering},
 };
 
 /// A [`MemoryPool`] that enforces no limit
@@ -48,6 +48,10 @@ impl MemoryPool for UnboundedMemoryPool {
     fn reserved(&self) -> usize {
         self.used.load(Ordering::Relaxed)
     }
+
+    fn memory_limit(&self) -> MemoryLimit {
+        MemoryLimit::Infinite
+    }
 }
 
 /// A [`MemoryPool`] that implements a greedy first-come first-serve limit.
@@ -100,6 +104,10 @@ impl MemoryPool for GreedyMemoryPool {
     fn reserved(&self) -> usize {
         self.used.load(Ordering::Relaxed)
     }
+
+    fn memory_limit(&self) -> MemoryLimit {
+        MemoryLimit::Finite(self.pool_size)
+    }
 }
 
 /// A [`MemoryPool`] that prevents spillable reservations from using more than
@@ -233,6 +241,10 @@ impl MemoryPool for FairSpillPool {
         let state = self.state.lock();
         state.spillable + state.unspillable
     }
+
+    fn memory_limit(&self) -> MemoryLimit {
+        MemoryLimit::Finite(self.pool_size)
+    }
 }
 
 /// Constructs a resources error based upon the individual [`MemoryReservation`].
@@ -249,6 +261,32 @@ fn insufficient_capacity_err(
     resources_datafusion_err!("Failed to allocate additional {} bytes for {} with {} bytes already allocated for this reservation - {} bytes remain available for the total pool", additional, reservation.registration.consumer.name, reservation.size, available)
 }
 
+#[derive(Debug)]
+struct TrackedConsumer {
+    name: String,
+    can_spill: bool,
+    reserved: AtomicUsize,
+}
+
+impl TrackedConsumer {
+    /// Shorthand to return the currently reserved value
+    fn reserved(&self) -> usize {
+        self.reserved.load(Ordering::Relaxed)
+    }
+
+    /// Grows the tracked consumer's reserved size,
+    /// should be called after the pool has successfully performed the grow().
+    fn grow(&self, additional: usize) {
+        self.reserved.fetch_add(additional, Ordering::Relaxed);
+    }
+
+    /// Reduce the tracked consumer's reserved size,
+    /// should be called after the pool has successfully performed the shrink().
+    fn shrink(&self, shrink: usize) {
+        self.reserved.fetch_sub(shrink, Ordering::Relaxed);
+    }
+}
+
 /// A [`MemoryPool`] that tracks the consumers that have
 /// reserved memory within the inner memory pool.
 ///
@@ -259,9 +297,12 @@ fn insufficient_capacity_err(
 /// The same consumer can have multiple reservations.
 #[derive(Debug)]
 pub struct TrackConsumersPool<I> {
+    /// The wrapped memory pool that actually handles reservation logic
     inner: I,
+    /// The amount of consumers to report(ordered top to bottom by reservation size)
     top: NonZeroUsize,
-    tracked_consumers: Mutex<HashMap<MemoryConsumer, AtomicU64>>,
+    /// Maps consumer_id --> TrackedConsumer
+    tracked_consumers: Mutex<HashMap<usize, TrackedConsumer>>,
 }
 
 impl<I: MemoryPool> TrackConsumersPool<I> {
@@ -277,27 +318,20 @@ impl<I: MemoryPool> TrackConsumersPool<I> {
         }
     }
 
-    /// Determine if there are multiple [`MemoryConsumer`]s registered
-    /// which have the same name.
-    ///
-    /// This is very tied to the implementation of the memory consumer.
-    fn has_multiple_consumers(&self, name: &String) -> bool {
-        let consumer = MemoryConsumer::new(name);
-        let consumer_with_spill = consumer.clone().with_can_spill(true);
-        let guard = self.tracked_consumers.lock();
-        guard.contains_key(&consumer) && guard.contains_key(&consumer_with_spill)
-    }
-
     /// The top consumers in a report string.
     pub fn report_top(&self, top: usize) -> String {
         let mut consumers = self
             .tracked_consumers
             .lock()
             .iter()
-            .map(|(consumer, reserved)| {
+            .map(|(consumer_id, tracked_consumer)| {
                 (
-                    (consumer.name().to_owned(), consumer.can_spill()),
-                    reserved.load(Ordering::Acquire),
+                    (
+                        *consumer_id,
+                        tracked_consumer.name.to_owned(),
+                        tracked_consumer.can_spill,
+                    ),
+                    tracked_consumer.reserved(),
                 )
             })
             .collect::<Vec<_>>();
@@ -305,12 +339,8 @@ impl<I: MemoryPool> TrackConsumersPool<I> {
 
         consumers[0..std::cmp::min(top, consumers.len())]
             .iter()
-            .map(|((name, can_spill), size)| {
-                if self.has_multiple_consumers(name) {
-                    format!("{name}(can_spill={}) consumed {:?} bytes", can_spill, size)
-                } else {
-                    format!("{name} consumed {:?} bytes", size)
-                }
+            .map(|((id, name, can_spill), size)| {
+                format!("{name}#{id}(can spill: {can_spill}) consumed {size} bytes")
             })
             .collect::<Vec<_>>()
             .join(", ")
@@ -322,29 +352,33 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
         self.inner.register(consumer);
 
         let mut guard = self.tracked_consumers.lock();
-        if let Some(already_reserved) = guard.insert(consumer.clone(), Default::default())
-        {
-            guard.entry_ref(consumer).and_modify(|bytes| {
-                bytes.fetch_add(
-                    already_reserved.load(Ordering::Acquire),
-                    Ordering::AcqRel,
-                );
-            });
-        }
+        let existing = guard.insert(
+            consumer.id(),
+            TrackedConsumer {
+                name: consumer.name().to_string(),
+                can_spill: consumer.can_spill(),
+                reserved: Default::default(),
+            },
+        );
+
+        debug_assert!(
+            existing.is_none(),
+            "Registered was called twice on the same consumer"
+        );
     }
 
     fn unregister(&self, consumer: &MemoryConsumer) {
         self.inner.unregister(consumer);
-        self.tracked_consumers.lock().remove(consumer);
+        self.tracked_consumers.lock().remove(&consumer.id());
     }
 
     fn grow(&self, reservation: &MemoryReservation, additional: usize) {
         self.inner.grow(reservation, additional);
         self.tracked_consumers
             .lock()
-            .entry_ref(reservation.consumer())
-            .and_modify(|bytes| {
-                bytes.fetch_add(additional as u64, Ordering::AcqRel);
+            .entry(reservation.consumer().id())
+            .and_modify(|tracked_consumer| {
+                tracked_consumer.grow(additional);
             });
     }
 
@@ -352,9 +386,9 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
         self.inner.shrink(reservation, shrink);
         self.tracked_consumers
             .lock()
-            .entry_ref(reservation.consumer())
-            .and_modify(|bytes| {
-                bytes.fetch_sub(shrink as u64, Ordering::AcqRel);
+            .entry(reservation.consumer().id())
+            .and_modify(|tracked_consumer| {
+                tracked_consumer.shrink(shrink);
             });
     }
 
@@ -376,9 +410,9 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
 
         self.tracked_consumers
             .lock()
-            .entry_ref(reservation.consumer())
-            .and_modify(|bytes| {
-                bytes.fetch_add(additional as u64, Ordering::AcqRel);
+            .entry(reservation.consumer().id())
+            .and_modify(|tracked_consumer| {
+                tracked_consumer.grow(additional);
             });
         Ok(())
     }
@@ -386,6 +420,10 @@ impl<I: MemoryPool> MemoryPool for TrackConsumersPool<I> {
     fn reserved(&self) -> usize {
         self.inner.reserved()
     }
+
+    fn memory_limit(&self) -> MemoryLimit {
+        self.inner.memory_limit()
+    }
 }
 
 fn provide_top_memory_consumers_to_error_msg(
@@ -501,12 +539,12 @@ mod tests {
         // Test: reports if new reservation causes error
         // using the previously set sizes for other consumers
         let mut r5 = MemoryConsumer::new("r5").register(&pool);
-        let expected = "Additional allocation failed with top memory consumers (across reservations) as: r1 consumed 50 bytes, r3 consumed 20 bytes, r2 consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated for this reservation - 5 bytes remain available for the total pool";
+        let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: r1#{}(can spill: false) consumed 50 bytes, r3#{}(can spill: false) consumed 20 bytes, r2#{}(can spill: false) consumed 15 bytes. Error: Failed to allocate additional 150 bytes for r5 with 0 bytes already allocated for this reservation - 5 bytes remain available for the total pool", r1.consumer().id(), r3.consumer().id(), r2.consumer().id());
         let res = r5.try_grow(150);
         assert!(
             matches!(
                 &res,
-                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
             ),
             "should provide list of top memory consumers, instead found {:?}",
             res
@@ -524,45 +562,45 @@ mod tests {
 
         // Test: see error message when no consumers recorded yet
         let mut r0 = MemoryConsumer::new(same_name).register(&pool);
-        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 100 bytes remain available for the total pool";
+        let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: foo#{}(can spill: false) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 100 bytes remain available for the total pool", r0.consumer().id());
         let res = r0.try_grow(150);
         assert!(
             matches!(
                 &res,
-                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
             ),
             "should provide proper error when no reservations have been made yet, instead found {:?}", res
         );
 
         // API: multiple registrations using the same hashed consumer,
-        // will be recognized as the same in the TrackConsumersPool.
+        // will be recognized *differently* in the TrackConsumersPool.
 
-        // Test: will be the same per Top Consumers reported.
         r0.grow(10); // make r0=10, pool available=90
         let new_consumer_same_name = MemoryConsumer::new(same_name);
         let mut r1 = new_consumer_same_name.register(&pool);
         // TODO: the insufficient_capacity_err() message is per reservation, not per consumer.
         // a followup PR will clarify this message "0 bytes already allocated for this reservation"
-        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
+        let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: foo#{}(can spill: false) consumed 10 bytes, foo#{}(can spill: false) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 90 bytes remain available for the total pool", r0.consumer().id(), r1.consumer().id());
         let res = r1.try_grow(150);
         assert!(
             matches!(
                 &res,
-                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
             ),
-            "should provide proper error with same hashed consumer (a single foo=10 bytes, available=90), instead found {:?}", res
+            "should provide proper error for 2 consumers, instead found {:?}",
+            res
         );
 
         // Test: will accumulate size changes per consumer, not per reservation
         r1.grow(20);
-        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo consumed 30 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+        let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: foo#{}(can spill: false) consumed 20 bytes, foo#{}(can spill: false) consumed 10 bytes. Error: Failed to allocate additional 150 bytes for foo with 20 bytes already allocated for this reservation - 70 bytes remain available for the total pool", r1.consumer().id(), r0.consumer().id());
         let res = r1.try_grow(150);
         assert!(
             matches!(
                 &res,
-                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
             ),
-            "should provide proper error with same hashed consumer (a single foo=30 bytes, available=70), instead found {:?}", res
+            "should provide proper error for 2 consumers(one foo=20 bytes, another foo=10 bytes, available=70), instead found {:?}", res
         );
 
         // Test: different hashed consumer, (even with the same name),
@@ -570,14 +608,14 @@ mod tests {
         let consumer_with_same_name_but_different_hash =
             MemoryConsumer::new(same_name).with_can_spill(true);
         let mut r2 = consumer_with_same_name_but_different_hash.register(&pool);
-        let expected = "Additional allocation failed with top memory consumers (across reservations) as: foo(can_spill=false) consumed 30 bytes, foo(can_spill=true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+        let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: foo#{}(can spill: false) consumed 20 bytes, foo#{}(can spill: false) consumed 10 bytes, foo#{}(can spill: true) consumed 0 bytes. Error: Failed to allocate additional 150 bytes for foo with 0 bytes already allocated for this reservation - 70 bytes remain available for the total pool", r1.consumer().id(), r0.consumer().id(), r2.consumer().id());
         let res = r2.try_grow(150);
         assert!(
             matches!(
                 &res,
-                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
             ),
-            "should provide proper error with different hashed consumer (foo(can_spill=false)=30 bytes and foo(can_spill=true)=0 bytes, available=70), instead found {:?}", res
+            "should provide proper error with 3 separate consumers(1 = 20 bytes, 2 = 10 bytes, 3 = 0 bytes), instead found {:?}", res
         );
     }
 
@@ -588,14 +626,15 @@ mod tests {
             let mut r0 = MemoryConsumer::new("r0").register(&pool);
             r0.grow(10);
             let r1_consumer = MemoryConsumer::new("r1");
-            let mut r1 = r1_consumer.clone().register(&pool);
+            let mut r1 = r1_consumer.register(&pool);
             r1.grow(20);
-            let expected = "Additional allocation failed with top memory consumers (across reservations) as: r1 consumed 20 bytes, r0 consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+
+            let expected = format!("Additional allocation failed with top memory consumers (across reservations) as: r1#{}(can spill: false) consumed 20 bytes, r0#{}(can spill: false) consumed 10 bytes. Error: Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool", r1.consumer().id(), r0.consumer().id());
             let res = r0.try_grow(150);
             assert!(
                 matches!(
                     &res,
-                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected)
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected)
                 ),
                 "should provide proper error with both consumers, instead found {:?}",
                 res
@@ -603,32 +642,31 @@ mod tests {
 
             // Test: unregister one
             // only the remaining one should be listed
-            pool.unregister(&r1_consumer);
-            let expected_consumers = "Additional allocation failed with top memory consumers (across reservations) as: r0 consumed 10 bytes";
+            drop(r1);
+            let expected_consumers = format!("Additional allocation failed with top memory consumers (across reservations) as: r0#{}(can spill: false) consumed 10 bytes", r0.consumer().id());
             let res = r0.try_grow(150);
             assert!(
                 matches!(
                     &res,
-                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_consumers)
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(&expected_consumers)
                 ),
                 "should provide proper error with only 1 consumer left registered, instead found {:?}", res
             );
 
             // Test: actual message we see is the `available is 70`. When it should be `available is 90`.
             // This is because the pool.shrink() does not automatically occur within the inner_pool.deregister().
-            let expected_70_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 70 bytes remain available for the total pool";
+            let expected_90_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
                 matches!(
                     &res,
-                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_70_available)
+                    Err(DataFusionError::ResourcesExhausted(ref e)) if e.to_string().contains(expected_90_available)
                 ),
                 "should find that the inner pool will still count all bytes for the deregistered consumer until the reservation is dropped, instead found {:?}", res
             );
 
             // Test: the registration needs to free itself (or be dropped),
             // for the proper error message
-            r1.free();
             let expected_90_available = "Failed to allocate additional 150 bytes for r0 with 10 bytes already allocated for this reservation - 90 bytes remain available for the total pool";
             let res = r0.try_grow(150);
             assert!(
@@ -678,7 +716,7 @@ mod tests {
             .unwrap();
 
         // Test: can get runtime metrics, even without an error thrown
-        let expected = "r3 consumed 45 bytes, r1 consumed 20 bytes";
+        let expected = format!("r3#{}(can spill: false) consumed 45 bytes, r1#{}(can spill: false) consumed 20 bytes", r3.consumer().id(), r1.consumer().id());
         let res = downcasted.report_top(2);
         assert_eq!(
             res, expected,
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index 95f14f485792a..cb085108819eb 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -27,7 +27,7 @@ use crate::{
 };
 
 use crate::cache::cache_manager::{CacheManager, CacheManagerConfig};
-use datafusion_common::Result;
+use datafusion_common::{config::ConfigEntry, Result};
 use object_store::ObjectStore;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -268,4 +268,56 @@ impl RuntimeEnvBuilder {
     pub fn build_arc(self) -> Result<Arc<RuntimeEnv>> {
         self.build().map(Arc::new)
     }
+
+    /// Create a new RuntimeEnvBuilder from an existing RuntimeEnv
+    pub fn from_runtime_env(runtime_env: &RuntimeEnv) -> Self {
+        let cache_config = CacheManagerConfig {
+            table_files_statistics_cache: runtime_env
+                .cache_manager
+                .get_file_statistic_cache(),
+            list_files_cache: runtime_env.cache_manager.get_list_files_cache(),
+        };
+
+        Self {
+            disk_manager: DiskManagerConfig::Existing(Arc::clone(
+                &runtime_env.disk_manager,
+            )),
+            memory_pool: Some(Arc::clone(&runtime_env.memory_pool)),
+            cache_manager: cache_config,
+            object_store_registry: Arc::clone(&runtime_env.object_store_registry),
+        }
+    }
+
+    /// Returns a list of all available runtime configurations with their current values and descriptions
+    pub fn entries(&self) -> Vec<ConfigEntry> {
+        // Memory pool configuration
+        vec![ConfigEntry {
+            key: "datafusion.runtime.memory_limit".to_string(),
+            value: None, // Default is system-dependent
+            description: "Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.",
+        }]
+    }
+
+    /// Generate documentation that can be included in the user guide
+    pub fn generate_config_markdown() -> String {
+        use std::fmt::Write as _;
+
+        let s = Self::default();
+
+        let mut docs = "| key | default | description |\n".to_string();
+        docs += "|-----|---------|-------------|\n";
+        let mut entries = s.entries();
+        entries.sort_unstable_by(|a, b| a.key.cmp(&b.key));
+
+        for entry in &entries {
+            let _ = writeln!(
+                &mut docs,
+                "| {} | {} | {} |",
+                entry.key,
+                entry.value.as_deref().unwrap_or("NULL"),
+                entry.description
+            );
+        }
+        docs
+    }
 }
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs
index 9d00b45962bc2..6af4322df29ea 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -174,7 +174,7 @@ macro_rules! value_transition {
 ///    - `INF` values are converted to `NULL`s while constructing an interval to
 ///      ensure consistency, with other data types.
 ///    - `NaN` (Not a Number) results are conservatively result in unbounded
-///       endpoints.
+///      endpoints.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct Interval {
     lower: ScalarValue,
diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs
index 063417a254be3..a7c9330201bc0 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -391,10 +391,11 @@ impl TypeSignature {
                 vec![format!("{}, ..", Self::join_types(types, "/"))]
             }
             TypeSignature::Uniform(arg_count, valid_types) => {
-                vec![std::iter::repeat(Self::join_types(valid_types, "/"))
-                    .take(*arg_count)
-                    .collect::<Vec<String>>()
-                    .join(", ")]
+                vec![
+                    std::iter::repeat_n(Self::join_types(valid_types, "/"), *arg_count)
+                        .collect::<Vec<String>>()
+                        .join(", "),
+                ]
             }
             TypeSignature::String(num) => {
                 vec![format!("String({num})")]
@@ -412,8 +413,7 @@ impl TypeSignature {
                 vec![Self::join_types(types, ", ")]
             }
             TypeSignature::Any(arg_count) => {
-                vec![std::iter::repeat("Any")
-                    .take(*arg_count)
+                vec![std::iter::repeat_n("Any", *arg_count)
                     .collect::<Vec<&str>>()
                     .join(", ")]
             }
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
index 13d52959aba65..44839378d52c9 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -210,6 +210,7 @@ pub fn avg_return_type(func_name: &str, arg_type: &DataType) -> Result<DataType>
             let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
             Ok(DataType::Decimal256(new_precision, new_scale))
         }
+        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
         arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
         DataType::Dictionary(_, dict_value_type) => {
             avg_return_type(func_name, dict_value_type.as_ref())
@@ -231,6 +232,7 @@ pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
             let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
             Ok(DataType::Decimal256(new_precision, *scale))
         }
+        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
         arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
         DataType::Dictionary(_, dict_value_type) => {
             avg_sum_type(dict_value_type.as_ref())
@@ -298,6 +300,7 @@ pub fn coerce_avg_type(func_name: &str, arg_types: &[DataType]) -> Result<Vec<Da
             DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
             DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
             d if d.is_numeric() => Ok(DataType::Float64),
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
             DataType::Dictionary(_, v) => coerced_type(func_name, v.as_ref()),
             _ => {
                 plan_err!(
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index c49de3984097f..fdee00f81b1e6 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -733,6 +733,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
         .or_else(|| string_temporal_coercion(lhs_type, rhs_type))
         .or_else(|| binary_coercion(lhs_type, rhs_type))
         .or_else(|| struct_coercion(lhs_type, rhs_type))
+        .or_else(|| map_coercion(lhs_type, rhs_type))
 }
 
 /// Similar to [`comparison_coercion`] but prefers numeric if compares with
@@ -987,6 +988,25 @@ fn coerce_fields(common_type: DataType, lhs: &FieldRef, rhs: &FieldRef) -> Field
     Arc::new(Field::new(name, common_type, is_nullable))
 }
 
+/// coerce two types if they are Maps by coercing their inner 'entries' fields' types
+/// using struct coercion
+fn map_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (Map(lhs_field, lhs_ordered), Map(rhs_field, rhs_ordered)) => {
+            struct_coercion(lhs_field.data_type(), rhs_field.data_type()).map(
+                |key_value_type| {
+                    Map(
+                        Arc::new((**lhs_field).clone().with_data_type(key_value_type)),
+                        *lhs_ordered && *rhs_ordered,
+                    )
+                },
+            )
+        }
+        _ => None,
+    }
+}
+
 /// Returns the output type of applying mathematics operations such as
 /// `+` to arguments of `lhs_type` and `rhs_type`.
 fn mathematics_numerical_coercion(
@@ -1277,6 +1297,10 @@ fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
             Some(LargeBinary)
         }
         (Binary, Utf8) | (Utf8, Binary) => Some(Binary),
+
+        // Cast FixedSizeBinary to Binary
+        (FixedSizeBinary(_), Binary) | (Binary, FixedSizeBinary(_)) => Some(Binary),
+
         _ => None,
     }
 }
@@ -2483,4 +2507,49 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_map_coercion() -> Result<()> {
+        let lhs = Field::new_map(
+            "lhs",
+            "entries",
+            Arc::new(Field::new("keys", DataType::Utf8, false)),
+            Arc::new(Field::new("values", DataType::LargeUtf8, false)),
+            true,
+            false,
+        );
+        let rhs = Field::new_map(
+            "rhs",
+            "kvp",
+            Arc::new(Field::new("k", DataType::Utf8, false)),
+            Arc::new(Field::new("v", DataType::Utf8, true)),
+            false,
+            true,
+        );
+
+        let expected = Field::new_map(
+            "expected",
+            "entries", // struct coercion takes lhs name
+            Arc::new(Field::new(
+                "keys", // struct coercion takes lhs name
+                DataType::Utf8,
+                false,
+            )),
+            Arc::new(Field::new(
+                "values",            // struct coercion takes lhs name
+                DataType::LargeUtf8, // lhs is large string
+                true,                // rhs is nullable
+            )),
+            false, // both sides must be sorted
+            true,  // rhs is nullable
+        );
+
+        test_coercion_binary_rule!(
+            lhs.data_type(),
+            rhs.data_type(),
+            Operator::Eq,
+            expected.data_type().clone()
+        );
+        Ok(())
+    }
 }
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 91a871d52e9ad..24a5c0fe9a211 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -501,6 +501,21 @@ impl LogicalPlanBuilder {
         if table_scan.filters.is_empty() {
             if let Some(p) = table_scan.source.get_logical_plan() {
                 let sub_plan = p.into_owned();
+
+                if let Some(proj) = table_scan.projection {
+                    let projection_exprs = proj
+                        .into_iter()
+                        .map(|i| {
+                            Expr::Column(Column::from(
+                                sub_plan.schema().qualified_field(i),
+                            ))
+                        })
+                        .collect::<Vec<_>>();
+                    return Self::new(sub_plan)
+                        .project(projection_exprs)?
+                        .alias(table_scan.table_name);
+                }
+
                 // Ensures that the reference to the inlined table remains the
                 // same, meaning we don't have to change any of the parent nodes
                 // that reference this table.
@@ -1117,8 +1132,6 @@ impl LogicalPlanBuilder {
             .collect::<Result<_>>()?;
 
         let on: Vec<(_, _)> = left_keys.into_iter().zip(right_keys).collect();
-        let join_schema =
-            build_join_schema(self.plan.schema(), right.schema(), &join_type)?;
         let mut join_on: Vec<(Expr, Expr)> = vec![];
         let mut filters: Option<Expr> = None;
         for (l, r) in &on {
@@ -1151,33 +1164,33 @@ impl LogicalPlanBuilder {
                 DataFusionError::Internal("filters should not be None here".to_string())
             })?)
         } else {
-            Ok(Self::new(LogicalPlan::Join(Join {
-                left: self.plan,
-                right: Arc::new(right),
-                on: join_on,
-                filter: filters,
+            let join = Join::try_new(
+                self.plan,
+                Arc::new(right),
+                join_on,
+                filters,
                 join_type,
-                join_constraint: JoinConstraint::Using,
-                schema: DFSchemaRef::new(join_schema),
-                null_equals_null: false,
-            })))
+                JoinConstraint::Using,
+                false,
+            )?;
+
+            Ok(Self::new(LogicalPlan::Join(join)))
         }
     }
 
     /// Apply a cross join
     pub fn cross_join(self, right: LogicalPlan) -> Result<Self> {
-        let join_schema =
-            build_join_schema(self.plan.schema(), right.schema(), &JoinType::Inner)?;
-        Ok(Self::new(LogicalPlan::Join(Join {
-            left: self.plan,
-            right: Arc::new(right),
-            on: vec![],
-            filter: None,
-            join_type: JoinType::Inner,
-            join_constraint: JoinConstraint::On,
-            null_equals_null: false,
-            schema: DFSchemaRef::new(join_schema),
-        })))
+        let join = Join::try_new(
+            self.plan,
+            Arc::new(right),
+            vec![],
+            None,
+            JoinType::Inner,
+            JoinConstraint::On,
+            false,
+        )?;
+
+        Ok(Self::new(LogicalPlan::Join(join)))
     }
 
     /// Repartition
@@ -1338,7 +1351,7 @@ impl LogicalPlanBuilder {
     /// to columns from the existing input. `r`, the second element of the tuple,
     /// must only refer to columns from the right input.
     ///
-    /// `filter` contains any other other filter expression to apply during the
+    /// `filter` contains any other filter expression to apply during the
     /// join. Note that `equi_exprs` predicates are evaluated more efficiently
     /// than the filter expressions, so they are preferred.
     pub fn join_with_expr_keys(
@@ -1388,19 +1401,17 @@ impl LogicalPlanBuilder {
             })
             .collect::<Result<Vec<_>>>()?;
 
-        let join_schema =
-            build_join_schema(self.plan.schema(), right.schema(), &join_type)?;
-
-        Ok(Self::new(LogicalPlan::Join(Join {
-            left: self.plan,
-            right: Arc::new(right),
-            on: join_key_pairs,
+        let join = Join::try_new(
+            self.plan,
+            Arc::new(right),
+            join_key_pairs,
             filter,
             join_type,
-            join_constraint: JoinConstraint::On,
-            schema: DFSchemaRef::new(join_schema),
-            null_equals_null: false,
-        })))
+            JoinConstraint::On,
+            false,
+        )?;
+
+        Ok(Self::new(LogicalPlan::Join(join)))
     }
 
     /// Unnest the given column.
@@ -1468,19 +1479,37 @@ impl ValuesFields {
     }
 }
 
+// `name_map` tracks a mapping between a field name and the number of appearances of that field.
+//
+// Some field names might already come to this function with the count (number of times it appeared)
+// as a sufix e.g. id:1, so there's still a chance of name collisions, for example,
+// if these three fields passed to this function: "col:1", "col" and "col", the function
+// would rename them to -> col:1, col, col:1 causing a posteriror error when building the DFSchema.
+// that's why we need the `seen` set, so the fields are always unique.
+//
 pub fn change_redundant_column(fields: &Fields) -> Vec<Field> {
     let mut name_map = HashMap::new();
+    let mut seen: HashSet<String> = HashSet::new();
+
     fields
         .into_iter()
         .map(|field| {
-            let counter = name_map.entry(field.name().to_string()).or_insert(0);
-            *counter += 1;
-            if *counter > 1 {
-                let new_name = format!("{}:{}", field.name(), *counter - 1);
-                Field::new(new_name, field.data_type().clone(), field.is_nullable())
-            } else {
-                field.as_ref().clone()
+            let base_name = field.name();
+            let count = name_map.entry(base_name.clone()).or_insert(0);
+            let mut new_name = base_name.clone();
+
+            // Loop until we find a name that hasn't been used
+            while seen.contains(&new_name) {
+                *count += 1;
+                new_name = format!("{}:{}", base_name, count);
             }
+
+            seen.insert(new_name.clone());
+
+            let mut modified_field =
+                Field::new(&new_name, field.data_type().clone(), field.is_nullable());
+            modified_field.set_metadata(field.metadata().clone());
+            modified_field
         })
         .collect()
 }
@@ -2174,7 +2203,7 @@ pub fn unnest_with_options(
 
                     // new columns dependent on the same original index
                     dependency_indices
-                        .extend(std::iter::repeat(index).take(transformed_columns.len()));
+                        .extend(std::iter::repeat_n(index, transformed_columns.len()));
                     Ok(transformed_columns
                         .iter()
                         .map(|(col, field)| (col.relation.to_owned(), field.to_owned()))
@@ -2730,10 +2759,13 @@ mod tests {
         let t1_field_1 = Field::new("a", DataType::Int32, false);
         let t2_field_1 = Field::new("a", DataType::Int32, false);
         let t2_field_3 = Field::new("a", DataType::Int32, false);
+        let t2_field_4 = Field::new("a:1", DataType::Int32, false);
         let t1_field_2 = Field::new("b", DataType::Int32, false);
         let t2_field_2 = Field::new("b", DataType::Int32, false);
 
-        let field_vec = vec![t1_field_1, t2_field_1, t1_field_2, t2_field_2, t2_field_3];
+        let field_vec = vec![
+            t1_field_1, t2_field_1, t1_field_2, t2_field_2, t2_field_3, t2_field_4,
+        ];
         let remove_redundant = change_redundant_column(&Fields::from(field_vec));
 
         assert_eq!(
@@ -2744,6 +2776,7 @@ mod tests {
                 Field::new("b", DataType::Int32, false),
                 Field::new("b:1", DataType::Int32, false),
                 Field::new("a:2", DataType::Int32, false),
+                Field::new("a:1:1", DataType::Int32, false),
             ]
         );
         Ok(())
diff --git a/datafusion/expr/src/logical_plan/invariants.rs b/datafusion/expr/src/logical_plan/invariants.rs
index d83410bf99c98..0c30c9785766b 100644
--- a/datafusion/expr/src/logical_plan/invariants.rs
+++ b/datafusion/expr/src/logical_plan/invariants.rs
@@ -112,11 +112,11 @@ fn assert_valid_semantic_plan(plan: &LogicalPlan) -> Result<()> {
 /// Returns an error if the plan does not have the expected schema.
 /// Ignores metadata and nullability.
 pub fn assert_expected_schema(schema: &DFSchemaRef, plan: &LogicalPlan) -> Result<()> {
-    let compatible = plan.schema().has_equivalent_names_and_types(schema);
+    let compatible = plan.schema().logically_equivalent_names_and_types(schema);
 
-    if let Err(e) = compatible {
+    if !compatible {
         internal_err!(
-            "Failed due to a difference in schemas: {e}, original schema: {:?}, new schema: {:?}",
+            "Failed due to a difference in schemas: original schema: {:?}, new schema: {:?}",
             schema,
             plan.schema()
         )
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 76b45d5d723ae..edf5f1126be93 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -3709,6 +3709,47 @@ pub struct Join {
 }
 
 impl Join {
+    /// Creates a new Join operator with automatically computed schema.
+    ///
+    /// This constructor computes the schema based on the join type and inputs,
+    /// removing the need to manually specify the schema or call `recompute_schema`.
+    ///
+    /// # Arguments
+    ///
+    /// * `left` - Left input plan
+    /// * `right` - Right input plan
+    /// * `on` - Join condition as a vector of (left_expr, right_expr) pairs
+    /// * `filter` - Optional filter expression (for non-equijoin conditions)
+    /// * `join_type` - Type of join (Inner, Left, Right, etc.)
+    /// * `join_constraint` - Join constraint (On, Using)
+    /// * `null_equals_null` - Whether NULL = NULL in join comparisons
+    ///
+    /// # Returns
+    ///
+    /// A new Join operator with the computed schema
+    pub fn try_new(
+        left: Arc<LogicalPlan>,
+        right: Arc<LogicalPlan>,
+        on: Vec<(Expr, Expr)>,
+        filter: Option<Expr>,
+        join_type: JoinType,
+        join_constraint: JoinConstraint,
+        null_equals_null: bool,
+    ) -> Result<Self> {
+        let join_schema = build_join_schema(left.schema(), right.schema(), &join_type)?;
+
+        Ok(Join {
+            left,
+            right,
+            on,
+            filter,
+            join_type,
+            join_constraint,
+            schema: Arc::new(join_schema),
+            null_equals_null,
+        })
+    }
+
     /// Create Join with input which wrapped with projection, this method is used to help create physical join.
     pub fn try_new_with_project_input(
         original: &LogicalPlan,
@@ -4916,4 +4957,379 @@ digraph {
 
         Ok(())
     }
+
+    #[test]
+    fn test_join_try_new() -> Result<()> {
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]);
+
+        let left_scan = table_scan(Some("t1"), &schema, None)?.build()?;
+
+        let right_scan = table_scan(Some("t2"), &schema, None)?.build()?;
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+            JoinType::LeftSemi,
+            JoinType::LeftAnti,
+            JoinType::RightSemi,
+            JoinType::RightAnti,
+            JoinType::LeftMark,
+        ];
+
+        for join_type in join_types {
+            let join = Join::try_new(
+                Arc::new(left_scan.clone()),
+                Arc::new(right_scan.clone()),
+                vec![(col("t1.a"), col("t2.a"))],
+                Some(col("t1.b").gt(col("t2.b"))),
+                join_type,
+                JoinConstraint::On,
+                false,
+            )?;
+
+            match join_type {
+                JoinType::LeftSemi | JoinType::LeftAnti => {
+                    assert_eq!(join.schema.fields().len(), 2);
+
+                    let fields = join.schema.fields();
+                    assert_eq!(
+                        fields[0].name(),
+                        "a",
+                        "First field should be 'a' from left table"
+                    );
+                    assert_eq!(
+                        fields[1].name(),
+                        "b",
+                        "Second field should be 'b' from left table"
+                    );
+                }
+                JoinType::RightSemi | JoinType::RightAnti => {
+                    assert_eq!(join.schema.fields().len(), 2);
+
+                    let fields = join.schema.fields();
+                    assert_eq!(
+                        fields[0].name(),
+                        "a",
+                        "First field should be 'a' from right table"
+                    );
+                    assert_eq!(
+                        fields[1].name(),
+                        "b",
+                        "Second field should be 'b' from right table"
+                    );
+                }
+                JoinType::LeftMark => {
+                    assert_eq!(join.schema.fields().len(), 3);
+
+                    let fields = join.schema.fields();
+                    assert_eq!(
+                        fields[0].name(),
+                        "a",
+                        "First field should be 'a' from left table"
+                    );
+                    assert_eq!(
+                        fields[1].name(),
+                        "b",
+                        "Second field should be 'b' from left table"
+                    );
+                    assert_eq!(
+                        fields[2].name(),
+                        "mark",
+                        "Third field should be the mark column"
+                    );
+
+                    assert!(!fields[0].is_nullable());
+                    assert!(!fields[1].is_nullable());
+                    assert!(!fields[2].is_nullable());
+                }
+                _ => {
+                    assert_eq!(join.schema.fields().len(), 4);
+
+                    let fields = join.schema.fields();
+                    assert_eq!(
+                        fields[0].name(),
+                        "a",
+                        "First field should be 'a' from left table"
+                    );
+                    assert_eq!(
+                        fields[1].name(),
+                        "b",
+                        "Second field should be 'b' from left table"
+                    );
+                    assert_eq!(
+                        fields[2].name(),
+                        "a",
+                        "Third field should be 'a' from right table"
+                    );
+                    assert_eq!(
+                        fields[3].name(),
+                        "b",
+                        "Fourth field should be 'b' from right table"
+                    );
+
+                    if join_type == JoinType::Left {
+                        // Left side fields (first two) shouldn't be nullable
+                        assert!(!fields[0].is_nullable());
+                        assert!(!fields[1].is_nullable());
+                        // Right side fields (third and fourth) should be nullable
+                        assert!(fields[2].is_nullable());
+                        assert!(fields[3].is_nullable());
+                    } else if join_type == JoinType::Right {
+                        // Left side fields (first two) should be nullable
+                        assert!(fields[0].is_nullable());
+                        assert!(fields[1].is_nullable());
+                        // Right side fields (third and fourth) shouldn't be nullable
+                        assert!(!fields[2].is_nullable());
+                        assert!(!fields[3].is_nullable());
+                    } else if join_type == JoinType::Full {
+                        assert!(fields[0].is_nullable());
+                        assert!(fields[1].is_nullable());
+                        assert!(fields[2].is_nullable());
+                        assert!(fields[3].is_nullable());
+                    }
+                }
+            }
+
+            assert_eq!(join.on, vec![(col("t1.a"), col("t2.a"))]);
+            assert_eq!(join.filter, Some(col("t1.b").gt(col("t2.b"))));
+            assert_eq!(join.join_type, join_type);
+            assert_eq!(join.join_constraint, JoinConstraint::On);
+            assert!(!join.null_equals_null);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_join_try_new_with_using_constraint_and_overlapping_columns() -> Result<()> {
+        let left_schema = Schema::new(vec![
+            Field::new("id", DataType::Int32, false), // Common column in both tables
+            Field::new("name", DataType::Utf8, false), // Unique to left
+            Field::new("value", DataType::Int32, false), // Common column, different meaning
+        ]);
+
+        let right_schema = Schema::new(vec![
+            Field::new("id", DataType::Int32, false), // Common column in both tables
+            Field::new("category", DataType::Utf8, false), // Unique to right
+            Field::new("value", DataType::Float64, true), // Common column, different meaning
+        ]);
+
+        let left_plan = table_scan(Some("t1"), &left_schema, None)?.build()?;
+
+        let right_plan = table_scan(Some("t2"), &right_schema, None)?.build()?;
+
+        // Test 1: USING constraint with a common column
+        {
+            // In the logical plan, both copies of the `id` column are preserved
+            // The USING constraint is handled later during physical execution, where the common column appears once
+            let join = Join::try_new(
+                Arc::new(left_plan.clone()),
+                Arc::new(right_plan.clone()),
+                vec![(col("t1.id"), col("t2.id"))],
+                None,
+                JoinType::Inner,
+                JoinConstraint::Using,
+                false,
+            )?;
+
+            let fields = join.schema.fields();
+
+            assert_eq!(fields.len(), 6);
+
+            assert_eq!(
+                fields[0].name(),
+                "id",
+                "First field should be 'id' from left table"
+            );
+            assert_eq!(
+                fields[1].name(),
+                "name",
+                "Second field should be 'name' from left table"
+            );
+            assert_eq!(
+                fields[2].name(),
+                "value",
+                "Third field should be 'value' from left table"
+            );
+            assert_eq!(
+                fields[3].name(),
+                "id",
+                "Fourth field should be 'id' from right table"
+            );
+            assert_eq!(
+                fields[4].name(),
+                "category",
+                "Fifth field should be 'category' from right table"
+            );
+            assert_eq!(
+                fields[5].name(),
+                "value",
+                "Sixth field should be 'value' from right table"
+            );
+
+            assert_eq!(join.join_constraint, JoinConstraint::Using);
+        }
+
+        // Test 2: Complex join condition with expressions
+        {
+            // Complex condition: join on id equality AND where left.value < right.value
+            let join = Join::try_new(
+                Arc::new(left_plan.clone()),
+                Arc::new(right_plan.clone()),
+                vec![(col("t1.id"), col("t2.id"))], // Equijoin condition
+                Some(col("t1.value").lt(col("t2.value"))), // Non-equi filter condition
+                JoinType::Inner,
+                JoinConstraint::On,
+                false,
+            )?;
+
+            let fields = join.schema.fields();
+            assert_eq!(fields.len(), 6);
+
+            assert_eq!(
+                fields[0].name(),
+                "id",
+                "First field should be 'id' from left table"
+            );
+            assert_eq!(
+                fields[1].name(),
+                "name",
+                "Second field should be 'name' from left table"
+            );
+            assert_eq!(
+                fields[2].name(),
+                "value",
+                "Third field should be 'value' from left table"
+            );
+            assert_eq!(
+                fields[3].name(),
+                "id",
+                "Fourth field should be 'id' from right table"
+            );
+            assert_eq!(
+                fields[4].name(),
+                "category",
+                "Fifth field should be 'category' from right table"
+            );
+            assert_eq!(
+                fields[5].name(),
+                "value",
+                "Sixth field should be 'value' from right table"
+            );
+
+            assert_eq!(join.filter, Some(col("t1.value").lt(col("t2.value"))));
+        }
+
+        // Test 3: Join with null equality behavior set to true
+        {
+            let join = Join::try_new(
+                Arc::new(left_plan.clone()),
+                Arc::new(right_plan.clone()),
+                vec![(col("t1.id"), col("t2.id"))],
+                None,
+                JoinType::Inner,
+                JoinConstraint::On,
+                true,
+            )?;
+
+            assert!(join.null_equals_null);
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_join_try_new_schema_validation() -> Result<()> {
+        let left_schema = Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Float64, true),
+        ]);
+
+        let right_schema = Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("category", DataType::Utf8, true),
+            Field::new("code", DataType::Int16, false),
+        ]);
+
+        let left_plan = table_scan(Some("t1"), &left_schema, None)?.build()?;
+
+        let right_plan = table_scan(Some("t2"), &right_schema, None)?.build()?;
+
+        let join_types = vec![
+            JoinType::Inner,
+            JoinType::Left,
+            JoinType::Right,
+            JoinType::Full,
+        ];
+
+        for join_type in join_types {
+            let join = Join::try_new(
+                Arc::new(left_plan.clone()),
+                Arc::new(right_plan.clone()),
+                vec![(col("t1.id"), col("t2.id"))],
+                Some(col("t1.value").gt(lit(5.0))),
+                join_type,
+                JoinConstraint::On,
+                false,
+            )?;
+
+            let fields = join.schema.fields();
+            assert_eq!(
+                fields.len(),
+                6,
+                "Expected 6 fields for {:?} join",
+                join_type
+            );
+
+            for (i, field) in fields.iter().enumerate() {
+                let expected_nullable = match (i, &join_type) {
+                    // Left table fields (indices 0, 1, 2)
+                    (0, JoinType::Right | JoinType::Full) => true, // id becomes nullable in RIGHT/FULL
+                    (1, JoinType::Right | JoinType::Full) => true, // name becomes nullable in RIGHT/FULL
+                    (2, _) => true, // value is already nullable
+
+                    // Right table fields (indices 3, 4, 5)
+                    (3, JoinType::Left | JoinType::Full) => true, // id becomes nullable in LEFT/FULL
+                    (4, _) => true, // category is already nullable
+                    (5, JoinType::Left | JoinType::Full) => true, // code becomes nullable in LEFT/FULL
+
+                    _ => false,
+                };
+
+                assert_eq!(
+                    field.is_nullable(),
+                    expected_nullable,
+                    "Field {} ({}) nullability incorrect for {:?} join",
+                    i,
+                    field.name(),
+                    join_type
+                );
+            }
+        }
+
+        let using_join = Join::try_new(
+            Arc::new(left_plan.clone()),
+            Arc::new(right_plan.clone()),
+            vec![(col("t1.id"), col("t2.id"))],
+            None,
+            JoinType::Inner,
+            JoinConstraint::Using,
+            false,
+        )?;
+
+        assert_eq!(
+            using_join.schema.fields().len(),
+            6,
+            "USING join should have all fields"
+        );
+        assert_eq!(using_join.join_constraint, JoinConstraint::Using);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 0ec017bdc27f6..3b34718062eb4 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -49,7 +49,7 @@ pub fn data_types_with_scalar_udf(
     let signature = func.signature();
     let type_signature = &signature.type_signature;
 
-    if current_types.is_empty() {
+    if current_types.is_empty() && type_signature != &TypeSignature::UserDefined {
         if type_signature.supports_zero_argument() {
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
@@ -87,7 +87,7 @@ pub fn data_types_with_aggregate_udf(
     let signature = func.signature();
     let type_signature = &signature.type_signature;
 
-    if current_types.is_empty() {
+    if current_types.is_empty() && type_signature != &TypeSignature::UserDefined {
         if type_signature.supports_zero_argument() {
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
@@ -124,7 +124,7 @@ pub fn data_types_with_window_udf(
     let signature = func.signature();
     let type_signature = &signature.type_signature;
 
-    if current_types.is_empty() {
+    if current_types.is_empty() && type_signature != &TypeSignature::UserDefined {
         if type_signature.supports_zero_argument() {
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
@@ -161,7 +161,7 @@ pub fn data_types(
 ) -> Result<Vec<DataType>> {
     let type_signature = &signature.type_signature;
 
-    if current_types.is_empty() {
+    if current_types.is_empty() && type_signature != &TypeSignature::UserDefined {
         if type_signature.supports_zero_argument() {
             return Ok(vec![]);
         } else if type_signature.used_to_support_zero_arguments() {
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index b75e8fd3cd3c4..97507433814b9 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -315,6 +315,16 @@ impl AggregateUDF {
         self.inner.default_value(data_type)
     }
 
+    /// See [`AggregateUDFImpl::supports_null_handling_clause`] for more details.
+    pub fn supports_null_handling_clause(&self) -> bool {
+        self.inner.supports_null_handling_clause()
+    }
+
+    /// See [`AggregateUDFImpl::is_ordered_set_aggregate`] for more details.
+    pub fn is_ordered_set_aggregate(&self) -> bool {
+        self.inner.is_ordered_set_aggregate()
+    }
+
     /// Returns the documentation for this Aggregate UDF.
     ///
     /// Documentation can be accessed programmatically as well as
@@ -432,6 +442,14 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
             null_treatment,
         } = params;
 
+        // exclude the first function argument(= column) in ordered set aggregate function,
+        // because it is duplicated with the WITHIN GROUP clause in schema name.
+        let args = if self.is_ordered_set_aggregate() {
+            &args[1..]
+        } else {
+            &args[..]
+        };
+
         let mut schema_name = String::new();
 
         schema_name.write_fmt(format_args!(
@@ -450,8 +468,14 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
         };
 
         if let Some(order_by) = order_by {
+            let clause = match self.is_ordered_set_aggregate() {
+                true => "WITHIN GROUP",
+                false => "ORDER BY",
+            };
+
             schema_name.write_fmt(format_args!(
-                " ORDER BY [{}]",
+                " {} [{}]",
+                clause,
                 schema_name_from_sorts(order_by)?
             ))?;
         };
@@ -891,6 +915,18 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
         ScalarValue::try_from(data_type)
     }
 
+    /// If this function supports `[IGNORE NULLS | RESPECT NULLS]` clause, return true
+    /// If the function does not, return false
+    fn supports_null_handling_clause(&self) -> bool {
+        true
+    }
+
+    /// If this function is ordered-set aggregate function, return true
+    /// If the function is not, return false
+    fn is_ordered_set_aggregate(&self) -> bool {
+        false
+    }
+
     /// Returns the documentation for this Aggregate UDF.
     ///
     /// Documentation can be accessed programmatically as well as
diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml
index 5c80c1b042256..29f40df51444c 100644
--- a/datafusion/ffi/Cargo.toml
+++ b/datafusion/ffi/Cargo.toml
@@ -40,6 +40,7 @@ crate-type = ["cdylib", "rlib"]
 [dependencies]
 abi_stable = "0.11.3"
 arrow = { workspace = true, features = ["ffi"] }
+arrow-schema = { workspace = true }
 async-ffi = { version = "0.5.0", features = ["abi_stable"] }
 async-trait = { workspace = true }
 datafusion = { workspace = true, default-features = false }
diff --git a/datafusion/ffi/src/lib.rs b/datafusion/ffi/src/lib.rs
index 877129fc5bb12..d877e182a1d89 100644
--- a/datafusion/ffi/src/lib.rs
+++ b/datafusion/ffi/src/lib.rs
@@ -35,6 +35,7 @@ pub mod session_config;
 pub mod table_provider;
 pub mod table_source;
 pub mod udf;
+pub mod udtf;
 pub mod util;
 pub mod volatility;
 
diff --git a/datafusion/ffi/src/table_provider.rs b/datafusion/ffi/src/table_provider.rs
index a7391a85031e0..890511997a706 100644
--- a/datafusion/ffi/src/table_provider.rs
+++ b/datafusion/ffi/src/table_provider.rs
@@ -110,8 +110,8 @@ pub struct FFI_TableProvider {
     /// * `session_config` - session configuration
     /// * `projections` - if specified, only a subset of the columns are returned
     /// * `filters_serialized` - filters to apply to the scan, which are a
-    ///    [`LogicalExprList`] protobuf message serialized into bytes to pass
-    ///    across the FFI boundary.
+    ///   [`LogicalExprList`] protobuf message serialized into bytes to pass
+    ///   across the FFI boundary.
     /// * `limit` - if specified, limit the number of rows returned
     pub scan: unsafe extern "C" fn(
         provider: &Self,
@@ -259,14 +259,10 @@ unsafe extern "C" fn scan_fn_wrapper(
         };
 
         let projections: Vec<_> = projections.into_iter().collect();
-        let maybe_projections = match projections.is_empty() {
-            true => None,
-            false => Some(&projections),
-        };
 
         let plan = rresult_return!(
             internal_provider
-                .scan(&ctx.state(), maybe_projections, &filters, limit.into())
+                .scan(&ctx.state(), Some(&projections), &filters, limit.into())
                 .await
         );
 
@@ -600,4 +596,49 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_aggregation() -> Result<()> {
+        use arrow::datatypes::Field;
+        use datafusion::arrow::{
+            array::Float32Array, datatypes::DataType, record_batch::RecordBatch,
+        };
+        use datafusion::common::assert_batches_eq;
+        use datafusion::datasource::MemTable;
+
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
+
+        // define data in two partitions
+        let batch1 = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(Float32Array::from(vec![2.0, 4.0, 8.0]))],
+        )?;
+
+        let ctx = SessionContext::new();
+
+        let provider = Arc::new(MemTable::try_new(schema, vec![vec![batch1]])?);
+
+        let ffi_provider = FFI_TableProvider::new(provider, true, None);
+
+        let foreign_table_provider: ForeignTableProvider = (&ffi_provider).into();
+
+        ctx.register_table("t", Arc::new(foreign_table_provider))?;
+
+        let result = ctx
+            .sql("SELECT COUNT(*) as cnt FROM t")
+            .await?
+            .collect()
+            .await?;
+        #[rustfmt::skip]
+        let expected = [
+            "+-----+",
+            "| cnt |",
+            "+-----+",
+            "| 3   |",
+            "+-----+"
+        ];
+        assert_batches_eq!(expected, &result);
+        Ok(())
+    }
 }
diff --git a/datafusion/ffi/src/tests/mod.rs b/datafusion/ffi/src/tests/mod.rs
index 4b4a29276d9a8..7a36ee52bdb4b 100644
--- a/datafusion/ffi/src/tests/mod.rs
+++ b/datafusion/ffi/src/tests/mod.rs
@@ -27,7 +27,7 @@ use abi_stable::{
 };
 use catalog::create_catalog_provider;
 
-use crate::catalog_provider::FFI_CatalogProvider;
+use crate::{catalog_provider::FFI_CatalogProvider, udtf::FFI_TableFunction};
 
 use super::{table_provider::FFI_TableProvider, udf::FFI_ScalarUDF};
 use arrow::array::RecordBatch;
@@ -37,12 +37,13 @@ use datafusion::{
     common::record_batch,
 };
 use sync_provider::create_sync_table_provider;
-use udf_udaf_udwf::create_ffi_abs_func;
+use udf_udaf_udwf::{create_ffi_abs_func, create_ffi_random_func, create_ffi_table_func};
 
 mod async_provider;
 pub mod catalog;
 mod sync_provider;
 mod udf_udaf_udwf;
+pub mod utils;
 
 #[repr(C)]
 #[derive(StableAbi)]
@@ -60,6 +61,10 @@ pub struct ForeignLibraryModule {
     /// Create a scalar UDF
     pub create_scalar_udf: extern "C" fn() -> FFI_ScalarUDF,
 
+    pub create_nullary_udf: extern "C" fn() -> FFI_ScalarUDF,
+
+    pub create_table_function: extern "C" fn() -> FFI_TableFunction,
+
     pub version: extern "C" fn() -> u64,
 }
 
@@ -105,6 +110,8 @@ pub fn get_foreign_library_module() -> ForeignLibraryModuleRef {
         create_catalog: create_catalog_provider,
         create_table: construct_table_provider,
         create_scalar_udf: create_ffi_abs_func,
+        create_nullary_udf: create_ffi_random_func,
+        create_table_function: create_ffi_table_func,
         version: super::version,
     }
     .leak_into_prefix()
diff --git a/datafusion/ffi/src/tests/udf_udaf_udwf.rs b/datafusion/ffi/src/tests/udf_udaf_udwf.rs
index e8a13aac13081..c3cb1bcc35338 100644
--- a/datafusion/ffi/src/tests/udf_udaf_udwf.rs
+++ b/datafusion/ffi/src/tests/udf_udaf_udwf.rs
@@ -15,8 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::udf::FFI_ScalarUDF;
-use datafusion::{functions::math::abs::AbsFunc, logical_expr::ScalarUDF};
+use crate::{udf::FFI_ScalarUDF, udtf::FFI_TableFunction};
+use datafusion::{
+    catalog::TableFunctionImpl,
+    functions::math::{abs::AbsFunc, random::RandomFunc},
+    functions_table::generate_series::RangeFunc,
+    logical_expr::ScalarUDF,
+};
 
 use std::sync::Arc;
 
@@ -25,3 +30,15 @@ pub(crate) extern "C" fn create_ffi_abs_func() -> FFI_ScalarUDF {
 
     udf.into()
 }
+
+pub(crate) extern "C" fn create_ffi_random_func() -> FFI_ScalarUDF {
+    let udf: Arc<ScalarUDF> = Arc::new(RandomFunc::new().into());
+
+    udf.into()
+}
+
+pub(crate) extern "C" fn create_ffi_table_func() -> FFI_TableFunction {
+    let udtf: Arc<dyn TableFunctionImpl> = Arc::new(RangeFunc {});
+
+    FFI_TableFunction::new(udtf, None)
+}
diff --git a/datafusion/ffi/src/tests/utils.rs b/datafusion/ffi/src/tests/utils.rs
new file mode 100644
index 0000000000000..6465b17d9b60c
--- /dev/null
+++ b/datafusion/ffi/src/tests/utils.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::tests::ForeignLibraryModuleRef;
+use abi_stable::library::RootModule;
+use datafusion::error::{DataFusionError, Result};
+use std::path::Path;
+
+/// Compute the path to the library. It would be preferable to simply use
+/// abi_stable::library::development_utils::compute_library_path however
+/// our current CI pipeline has a `ci` profile that we need to use to
+/// find the library.
+pub fn compute_library_path<M: RootModule>(
+    target_path: &Path,
+) -> std::io::Result<std::path::PathBuf> {
+    let debug_dir = target_path.join("debug");
+    let release_dir = target_path.join("release");
+    let ci_dir = target_path.join("ci");
+
+    let debug_path = M::get_library_path(&debug_dir.join("deps"));
+    let release_path = M::get_library_path(&release_dir.join("deps"));
+    let ci_path = M::get_library_path(&ci_dir.join("deps"));
+
+    let all_paths = vec![
+        (debug_dir.clone(), debug_path),
+        (release_dir, release_path),
+        (ci_dir, ci_path),
+    ];
+
+    let best_path = all_paths
+        .into_iter()
+        .filter(|(_, path)| path.exists())
+        .filter_map(|(dir, path)| path.metadata().map(|m| (dir, m)).ok())
+        .filter_map(|(dir, meta)| meta.modified().map(|m| (dir, m)).ok())
+        .max_by_key(|(_, date)| *date)
+        .map(|(dir, _)| dir)
+        .unwrap_or(debug_dir);
+
+    Ok(best_path)
+}
+
+pub fn get_module() -> Result<ForeignLibraryModuleRef> {
+    let expected_version = crate::version();
+
+    let crate_root = Path::new(env!("CARGO_MANIFEST_DIR"));
+    let target_dir = crate_root
+        .parent()
+        .expect("Failed to find crate parent")
+        .parent()
+        .expect("Failed to find workspace root")
+        .join("target");
+
+    // Find the location of the library. This is specific to the build environment,
+    // so you will need to change the approach here based on your use case.
+    // let target: &std::path::Path = "../../../../target/".as_ref();
+    let library_path =
+        compute_library_path::<ForeignLibraryModuleRef>(target_dir.as_path())
+            .map_err(|e| DataFusionError::External(Box::new(e)))?
+            .join("deps");
+
+    // Load the module
+    let module = ForeignLibraryModuleRef::load_from_directory(&library_path)
+        .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+    assert_eq!(
+        module
+            .version()
+            .expect("Unable to call version on FFI module")(),
+        expected_version
+    );
+
+    Ok(module)
+}
diff --git a/datafusion/ffi/src/udf.rs b/datafusion/ffi/src/udf/mod.rs
similarity index 87%
rename from datafusion/ffi/src/udf.rs
rename to datafusion/ffi/src/udf/mod.rs
index bbc9cf936ceec..706b9fabedcb4 100644
--- a/datafusion/ffi/src/udf.rs
+++ b/datafusion/ffi/src/udf/mod.rs
@@ -29,7 +29,9 @@ use arrow::{
 };
 use datafusion::{
     error::DataFusionError,
-    logical_expr::type_coercion::functions::data_types_with_scalar_udf,
+    logical_expr::{
+        type_coercion::functions::data_types_with_scalar_udf, ReturnInfo, ReturnTypeArgs,
+    },
 };
 use datafusion::{
     error::Result,
@@ -37,6 +39,10 @@ use datafusion::{
         ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature,
     },
 };
+use return_info::FFI_ReturnInfo;
+use return_type_args::{
+    FFI_ReturnTypeArgs, ForeignReturnTypeArgs, ForeignReturnTypeArgsOwned,
+};
 
 use crate::{
     arrow_wrappers::{WrappedArray, WrappedSchema},
@@ -45,6 +51,9 @@ use crate::{
     volatility::FFI_Volatility,
 };
 
+pub mod return_info;
+pub mod return_type_args;
+
 /// A stable struct for sharing a [`ScalarUDF`] across FFI boundaries.
 #[repr(C)]
 #[derive(Debug, StableAbi)]
@@ -66,6 +75,14 @@ pub struct FFI_ScalarUDF {
         arg_types: RVec<WrappedSchema>,
     ) -> RResult<WrappedSchema, RString>,
 
+    /// Determines the return info of the underlying [`ScalarUDF`]. Either this
+    /// or return_type may be implemented on a UDF.
+    pub return_type_from_args: unsafe extern "C" fn(
+        udf: &Self,
+        args: FFI_ReturnTypeArgs,
+    )
+        -> RResult<FFI_ReturnInfo, RString>,
+
     /// Execute the underlying [`ScalarUDF`] and return the result as a `FFI_ArrowArray`
     /// within an AbiStable wrapper.
     pub invoke_with_args: unsafe extern "C" fn(
@@ -123,6 +140,23 @@ unsafe extern "C" fn return_type_fn_wrapper(
     rresult!(return_type)
 }
 
+unsafe extern "C" fn return_type_from_args_fn_wrapper(
+    udf: &FFI_ScalarUDF,
+    args: FFI_ReturnTypeArgs,
+) -> RResult<FFI_ReturnInfo, RString> {
+    let private_data = udf.private_data as *const ScalarUDFPrivateData;
+    let udf = &(*private_data).udf;
+
+    let args: ForeignReturnTypeArgsOwned = rresult_return!((&args).try_into());
+    let args_ref: ForeignReturnTypeArgs = (&args).into();
+
+    let return_type = udf
+        .return_type_from_args((&args_ref).into())
+        .and_then(FFI_ReturnInfo::try_from);
+
+    rresult!(return_type)
+}
+
 unsafe extern "C" fn coerce_types_fn_wrapper(
     udf: &FFI_ScalarUDF,
     arg_types: RVec<WrappedSchema>,
@@ -209,6 +243,7 @@ impl From<Arc<ScalarUDF>> for FFI_ScalarUDF {
             short_circuits,
             invoke_with_args: invoke_with_args_fn_wrapper,
             return_type: return_type_fn_wrapper,
+            return_type_from_args: return_type_from_args_fn_wrapper,
             coerce_types: coerce_types_fn_wrapper,
             clone: clone_fn_wrapper,
             release: release_fn_wrapper,
@@ -281,6 +316,16 @@ impl ScalarUDFImpl for ForeignScalarUDF {
         result.and_then(|r| (&r.0).try_into().map_err(DataFusionError::from))
     }
 
+    fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
+        let args: FFI_ReturnTypeArgs = args.try_into()?;
+
+        let result = unsafe { (self.udf.return_type_from_args)(&self.udf, args) };
+
+        let result = df_result!(result);
+
+        result.and_then(|r| r.try_into())
+    }
+
     fn invoke_with_args(&self, invoke_args: ScalarFunctionArgs) -> Result<ColumnarValue> {
         let ScalarFunctionArgs {
             args,
diff --git a/datafusion/ffi/src/udf/return_info.rs b/datafusion/ffi/src/udf/return_info.rs
new file mode 100644
index 0000000000000..cf76ddd1db762
--- /dev/null
+++ b/datafusion/ffi/src/udf/return_info.rs
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::StableAbi;
+use arrow::{datatypes::DataType, ffi::FFI_ArrowSchema};
+use datafusion::{error::DataFusionError, logical_expr::ReturnInfo};
+
+use crate::arrow_wrappers::WrappedSchema;
+
+/// A stable struct for sharing a [`ReturnInfo`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_ReturnInfo {
+    return_type: WrappedSchema,
+    nullable: bool,
+}
+
+impl TryFrom<ReturnInfo> for FFI_ReturnInfo {
+    type Error = DataFusionError;
+
+    fn try_from(value: ReturnInfo) -> Result<Self, Self::Error> {
+        let return_type = WrappedSchema(FFI_ArrowSchema::try_from(value.return_type())?);
+        Ok(Self {
+            return_type,
+            nullable: value.nullable(),
+        })
+    }
+}
+
+impl TryFrom<FFI_ReturnInfo> for ReturnInfo {
+    type Error = DataFusionError;
+
+    fn try_from(value: FFI_ReturnInfo) -> Result<Self, Self::Error> {
+        let return_type = DataType::try_from(&value.return_type.0)?;
+
+        Ok(ReturnInfo::new(return_type, value.nullable))
+    }
+}
diff --git a/datafusion/ffi/src/udf/return_type_args.rs b/datafusion/ffi/src/udf/return_type_args.rs
new file mode 100644
index 0000000000000..a0897630e2ea9
--- /dev/null
+++ b/datafusion/ffi/src/udf/return_type_args.rs
@@ -0,0 +1,142 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use abi_stable::{
+    std_types::{ROption, RVec},
+    StableAbi,
+};
+use arrow::datatypes::DataType;
+use datafusion::{
+    common::exec_datafusion_err, error::DataFusionError, logical_expr::ReturnTypeArgs,
+    scalar::ScalarValue,
+};
+
+use crate::{
+    arrow_wrappers::WrappedSchema,
+    util::{rvec_wrapped_to_vec_datatype, vec_datatype_to_rvec_wrapped},
+};
+use prost::Message;
+
+/// A stable struct for sharing a [`ReturnTypeArgs`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_ReturnTypeArgs {
+    arg_types: RVec<WrappedSchema>,
+    scalar_arguments: RVec<ROption<RVec<u8>>>,
+    nullables: RVec<bool>,
+}
+
+impl TryFrom<ReturnTypeArgs<'_>> for FFI_ReturnTypeArgs {
+    type Error = DataFusionError;
+
+    fn try_from(value: ReturnTypeArgs) -> Result<Self, Self::Error> {
+        let arg_types = vec_datatype_to_rvec_wrapped(value.arg_types)?;
+        let scalar_arguments: Result<Vec<_>, Self::Error> = value
+            .scalar_arguments
+            .iter()
+            .map(|maybe_arg| {
+                maybe_arg
+                    .map(|arg| {
+                        let proto_value: datafusion_proto::protobuf::ScalarValue =
+                            arg.try_into()?;
+                        let proto_bytes: RVec<u8> = proto_value.encode_to_vec().into();
+                        Ok(proto_bytes)
+                    })
+                    .transpose()
+            })
+            .collect();
+        let scalar_arguments = scalar_arguments?.into_iter().map(ROption::from).collect();
+
+        let nullables = value.nullables.into();
+        Ok(Self {
+            arg_types,
+            scalar_arguments,
+            nullables,
+        })
+    }
+}
+
+// TODO(tsaucer) It would be good to find a better way around this, but it
+// appears a restriction based on the need to have a borrowed ScalarValue
+// in the arguments when converted to ReturnTypeArgs
+pub struct ForeignReturnTypeArgsOwned {
+    arg_types: Vec<DataType>,
+    scalar_arguments: Vec<Option<ScalarValue>>,
+    nullables: Vec<bool>,
+}
+
+pub struct ForeignReturnTypeArgs<'a> {
+    arg_types: &'a [DataType],
+    scalar_arguments: Vec<Option<&'a ScalarValue>>,
+    nullables: &'a [bool],
+}
+
+impl TryFrom<&FFI_ReturnTypeArgs> for ForeignReturnTypeArgsOwned {
+    type Error = DataFusionError;
+
+    fn try_from(value: &FFI_ReturnTypeArgs) -> Result<Self, Self::Error> {
+        let arg_types = rvec_wrapped_to_vec_datatype(&value.arg_types)?;
+        let scalar_arguments: Result<Vec<_>, Self::Error> = value
+            .scalar_arguments
+            .iter()
+            .map(|maybe_arg| {
+                let maybe_arg = maybe_arg.as_ref().map(|arg| {
+                    let proto_value =
+                        datafusion_proto::protobuf::ScalarValue::decode(arg.as_ref())
+                            .map_err(|err| exec_datafusion_err!("{}", err))?;
+                    let scalar_value: ScalarValue = (&proto_value).try_into()?;
+                    Ok(scalar_value)
+                });
+                Option::from(maybe_arg).transpose()
+            })
+            .collect();
+        let scalar_arguments = scalar_arguments?.into_iter().collect();
+
+        let nullables = value.nullables.iter().cloned().collect();
+
+        Ok(Self {
+            arg_types,
+            scalar_arguments,
+            nullables,
+        })
+    }
+}
+
+impl<'a> From<&'a ForeignReturnTypeArgsOwned> for ForeignReturnTypeArgs<'a> {
+    fn from(value: &'a ForeignReturnTypeArgsOwned) -> Self {
+        Self {
+            arg_types: &value.arg_types,
+            scalar_arguments: value
+                .scalar_arguments
+                .iter()
+                .map(|opt| opt.as_ref())
+                .collect(),
+            nullables: &value.nullables,
+        }
+    }
+}
+
+impl<'a> From<&'a ForeignReturnTypeArgs<'a>> for ReturnTypeArgs<'a> {
+    fn from(value: &'a ForeignReturnTypeArgs) -> Self {
+        ReturnTypeArgs {
+            arg_types: value.arg_types,
+            scalar_arguments: &value.scalar_arguments,
+            nullables: value.nullables,
+        }
+    }
+}
diff --git a/datafusion/ffi/src/udtf.rs b/datafusion/ffi/src/udtf.rs
new file mode 100644
index 0000000000000..1e06247546be7
--- /dev/null
+++ b/datafusion/ffi/src/udtf.rs
@@ -0,0 +1,321 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::c_void, sync::Arc};
+
+use abi_stable::{
+    std_types::{RResult, RString, RVec},
+    StableAbi,
+};
+
+use datafusion::error::Result;
+use datafusion::{
+    catalog::{TableFunctionImpl, TableProvider},
+    prelude::{Expr, SessionContext},
+};
+use datafusion_proto::{
+    logical_plan::{
+        from_proto::parse_exprs, to_proto::serialize_exprs, DefaultLogicalExtensionCodec,
+    },
+    protobuf::LogicalExprList,
+};
+use prost::Message;
+use tokio::runtime::Handle;
+
+use crate::{
+    df_result, rresult_return,
+    table_provider::{FFI_TableProvider, ForeignTableProvider},
+};
+
+/// A stable struct for sharing a [`TableFunctionImpl`] across FFI boundaries.
+#[repr(C)]
+#[derive(Debug, StableAbi)]
+#[allow(non_camel_case_types)]
+pub struct FFI_TableFunction {
+    /// Equivalent to the `call` function of the TableFunctionImpl.
+    /// The arguments are Expr passed as protobuf encoded bytes.
+    pub call: unsafe extern "C" fn(
+        udtf: &Self,
+        args: RVec<u8>,
+    ) -> RResult<FFI_TableProvider, RString>,
+
+    /// Used to create a clone on the provider of the udtf. This should
+    /// only need to be called by the receiver of the udtf.
+    pub clone: unsafe extern "C" fn(udtf: &Self) -> Self,
+
+    /// Release the memory of the private data when it is no longer being used.
+    pub release: unsafe extern "C" fn(udtf: &mut Self),
+
+    /// Internal data. This is only to be accessed by the provider of the udtf.
+    /// A [`ForeignTableFunction`] should never attempt to access this data.
+    pub private_data: *mut c_void,
+}
+
+unsafe impl Send for FFI_TableFunction {}
+unsafe impl Sync for FFI_TableFunction {}
+
+pub struct TableFunctionPrivateData {
+    udtf: Arc<dyn TableFunctionImpl>,
+    runtime: Option<Handle>,
+}
+
+impl FFI_TableFunction {
+    fn inner(&self) -> &Arc<dyn TableFunctionImpl> {
+        let private_data = self.private_data as *const TableFunctionPrivateData;
+        unsafe { &(*private_data).udtf }
+    }
+
+    fn runtime(&self) -> Option<Handle> {
+        let private_data = self.private_data as *const TableFunctionPrivateData;
+        unsafe { (*private_data).runtime.clone() }
+    }
+}
+
+unsafe extern "C" fn call_fn_wrapper(
+    udtf: &FFI_TableFunction,
+    args: RVec<u8>,
+) -> RResult<FFI_TableProvider, RString> {
+    let runtime = udtf.runtime();
+    let udtf = udtf.inner();
+
+    let default_ctx = SessionContext::new();
+    let codec = DefaultLogicalExtensionCodec {};
+
+    let proto_filters = rresult_return!(LogicalExprList::decode(args.as_ref()));
+
+    let args =
+        rresult_return!(parse_exprs(proto_filters.expr.iter(), &default_ctx, &codec));
+
+    let table_provider = rresult_return!(udtf.call(&args));
+    RResult::ROk(FFI_TableProvider::new(table_provider, false, runtime))
+}
+
+unsafe extern "C" fn release_fn_wrapper(udtf: &mut FFI_TableFunction) {
+    let private_data = Box::from_raw(udtf.private_data as *mut TableFunctionPrivateData);
+    drop(private_data);
+}
+
+unsafe extern "C" fn clone_fn_wrapper(udtf: &FFI_TableFunction) -> FFI_TableFunction {
+    let runtime = udtf.runtime();
+    let udtf = udtf.inner();
+
+    FFI_TableFunction::new(Arc::clone(udtf), runtime)
+}
+
+impl Clone for FFI_TableFunction {
+    fn clone(&self) -> Self {
+        unsafe { (self.clone)(self) }
+    }
+}
+
+impl FFI_TableFunction {
+    pub fn new(udtf: Arc<dyn TableFunctionImpl>, runtime: Option<Handle>) -> Self {
+        let private_data = Box::new(TableFunctionPrivateData { udtf, runtime });
+
+        Self {
+            call: call_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+        }
+    }
+}
+
+impl From<Arc<dyn TableFunctionImpl>> for FFI_TableFunction {
+    fn from(udtf: Arc<dyn TableFunctionImpl>) -> Self {
+        let private_data = Box::new(TableFunctionPrivateData {
+            udtf,
+            runtime: None,
+        });
+
+        Self {
+            call: call_fn_wrapper,
+            clone: clone_fn_wrapper,
+            release: release_fn_wrapper,
+            private_data: Box::into_raw(private_data) as *mut c_void,
+        }
+    }
+}
+
+impl Drop for FFI_TableFunction {
+    fn drop(&mut self) {
+        unsafe { (self.release)(self) }
+    }
+}
+
+/// This struct is used to access an UDTF provided by a foreign
+/// library across a FFI boundary.
+///
+/// The ForeignTableFunction is to be used by the caller of the UDTF, so it has
+/// no knowledge or access to the private data. All interaction with the UDTF
+/// must occur through the functions defined in FFI_TableFunction.
+#[derive(Debug)]
+pub struct ForeignTableFunction(FFI_TableFunction);
+
+unsafe impl Send for ForeignTableFunction {}
+unsafe impl Sync for ForeignTableFunction {}
+
+impl From<FFI_TableFunction> for ForeignTableFunction {
+    fn from(value: FFI_TableFunction) -> Self {
+        Self(value)
+    }
+}
+
+impl TableFunctionImpl for ForeignTableFunction {
+    fn call(&self, args: &[Expr]) -> Result<Arc<dyn TableProvider>> {
+        let codec = DefaultLogicalExtensionCodec {};
+        let expr_list = LogicalExprList {
+            expr: serialize_exprs(args, &codec)?,
+        };
+        let filters_serialized = expr_list.encode_to_vec().into();
+
+        let table_provider = unsafe { (self.0.call)(&self.0, filters_serialized) };
+
+        let table_provider = df_result!(table_provider)?;
+        let table_provider: ForeignTableProvider = (&table_provider).into();
+
+        Ok(Arc::new(table_provider))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow::{
+        array::{
+            record_batch, ArrayRef, Float64Array, RecordBatch, StringArray, UInt64Array,
+        },
+        datatypes::{DataType, Field, Schema},
+    };
+    use datafusion::{
+        catalog::MemTable, common::exec_err, prelude::lit, scalar::ScalarValue,
+    };
+
+    use super::*;
+
+    #[derive(Debug)]
+    struct TestUDTF {}
+
+    impl TableFunctionImpl for TestUDTF {
+        fn call(&self, args: &[Expr]) -> Result<Arc<dyn TableProvider>> {
+            let args = args
+                .iter()
+                .map(|arg| {
+                    if let Expr::Literal(scalar) = arg {
+                        Ok(scalar)
+                    } else {
+                        exec_err!("Expected only literal arguments to table udf")
+                    }
+                })
+                .collect::<Result<Vec<_>>>()?;
+
+            if args.len() < 2 {
+                exec_err!("Expected at least two arguments to table udf")?
+            }
+
+            let ScalarValue::UInt64(Some(num_rows)) = args[0].to_owned() else {
+                exec_err!(
+                    "First argument must be the number of elements to create as u64"
+                )?
+            };
+            let num_rows = num_rows as usize;
+
+            let mut fields = Vec::default();
+            let mut arrays1 = Vec::default();
+            let mut arrays2 = Vec::default();
+
+            let split = num_rows / 3;
+            for (idx, arg) in args[1..].iter().enumerate() {
+                let (field, array) = match arg {
+                    ScalarValue::Utf8(s) => {
+                        let s_vec = vec![s.to_owned(); num_rows];
+                        (
+                            Field::new(format!("field-{}", idx), DataType::Utf8, true),
+                            Arc::new(StringArray::from(s_vec)) as ArrayRef,
+                        )
+                    }
+                    ScalarValue::UInt64(v) => {
+                        let v_vec = vec![v.to_owned(); num_rows];
+                        (
+                            Field::new(format!("field-{}", idx), DataType::UInt64, true),
+                            Arc::new(UInt64Array::from(v_vec)) as ArrayRef,
+                        )
+                    }
+                    ScalarValue::Float64(v) => {
+                        let v_vec = vec![v.to_owned(); num_rows];
+                        (
+                            Field::new(format!("field-{}", idx), DataType::Float64, true),
+                            Arc::new(Float64Array::from(v_vec)) as ArrayRef,
+                        )
+                    }
+                    _ => exec_err!(
+                        "Test case only supports utf8, u64, and f64. Found {}",
+                        arg.data_type()
+                    )?,
+                };
+
+                fields.push(field);
+                arrays1.push(array.slice(0, split));
+                arrays2.push(array.slice(split, num_rows - split));
+            }
+
+            let schema = Arc::new(Schema::new(fields));
+            let batches = vec![
+                RecordBatch::try_new(Arc::clone(&schema), arrays1)?,
+                RecordBatch::try_new(Arc::clone(&schema), arrays2)?,
+            ];
+
+            let table_provider = MemTable::try_new(schema, vec![batches])?;
+
+            Ok(Arc::new(table_provider))
+        }
+    }
+
+    #[tokio::test]
+    async fn test_round_trip_udtf() -> Result<()> {
+        let original_udtf = Arc::new(TestUDTF {}) as Arc<dyn TableFunctionImpl>;
+
+        let local_udtf: FFI_TableFunction =
+            FFI_TableFunction::new(Arc::clone(&original_udtf), None);
+
+        let foreign_udf: ForeignTableFunction = local_udtf.into();
+
+        let table =
+            foreign_udf.call(&vec![lit(6_u64), lit("one"), lit(2.0), lit(3_u64)])?;
+
+        let ctx = SessionContext::default();
+        let _ = ctx.register_table("test-table", table)?;
+
+        let returned_batches = ctx.table("test-table").await?.collect().await?;
+
+        assert_eq!(returned_batches.len(), 2);
+        let expected_batch_0 = record_batch!(
+            ("field-0", Utf8, ["one", "one"]),
+            ("field-1", Float64, [2.0, 2.0]),
+            ("field-2", UInt64, [3, 3])
+        )?;
+        assert_eq!(returned_batches[0], expected_batch_0);
+
+        let expected_batch_1 = record_batch!(
+            ("field-0", Utf8, ["one", "one", "one", "one"]),
+            ("field-1", Float64, [2.0, 2.0, 2.0, 2.0]),
+            ("field-2", UInt64, [3, 3, 3, 3])
+        )?;
+        assert_eq!(returned_batches[1], expected_batch_1);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/tests/ffi_integration.rs b/datafusion/ffi/tests/ffi_integration.rs
index f610f12c8244e..c6df324e9a17c 100644
--- a/datafusion/ffi/tests/ffi_integration.rs
+++ b/datafusion/ffi/tests/ffi_integration.rs
@@ -20,84 +20,14 @@
 #[cfg(feature = "integration-tests")]
 mod tests {
 
-    use abi_stable::library::RootModule;
-    use datafusion::common::record_batch;
     use datafusion::error::{DataFusionError, Result};
-    use datafusion::logical_expr::ScalarUDF;
-    use datafusion::prelude::{col, SessionContext};
+    use datafusion::prelude::SessionContext;
     use datafusion_ffi::catalog_provider::ForeignCatalogProvider;
     use datafusion_ffi::table_provider::ForeignTableProvider;
-    use datafusion_ffi::tests::{create_record_batch, ForeignLibraryModuleRef};
-    use datafusion_ffi::udf::ForeignScalarUDF;
-    use std::path::Path;
+    use datafusion_ffi::tests::create_record_batch;
+    use datafusion_ffi::tests::utils::get_module;
     use std::sync::Arc;
 
-    /// Compute the path to the library. It would be preferable to simply use
-    /// abi_stable::library::development_utils::compute_library_path however
-    /// our current CI pipeline has a `ci` profile that we need to use to
-    /// find the library.
-    pub fn compute_library_path<M: RootModule>(
-        target_path: &Path,
-    ) -> std::io::Result<std::path::PathBuf> {
-        let debug_dir = target_path.join("debug");
-        let release_dir = target_path.join("release");
-        let ci_dir = target_path.join("ci");
-
-        let debug_path = M::get_library_path(&debug_dir.join("deps"));
-        let release_path = M::get_library_path(&release_dir.join("deps"));
-        let ci_path = M::get_library_path(&ci_dir.join("deps"));
-
-        let all_paths = vec![
-            (debug_dir.clone(), debug_path),
-            (release_dir, release_path),
-            (ci_dir, ci_path),
-        ];
-
-        let best_path = all_paths
-            .into_iter()
-            .filter(|(_, path)| path.exists())
-            .filter_map(|(dir, path)| path.metadata().map(|m| (dir, m)).ok())
-            .filter_map(|(dir, meta)| meta.modified().map(|m| (dir, m)).ok())
-            .max_by_key(|(_, date)| *date)
-            .map(|(dir, _)| dir)
-            .unwrap_or(debug_dir);
-
-        Ok(best_path)
-    }
-
-    fn get_module() -> Result<ForeignLibraryModuleRef> {
-        let expected_version = datafusion_ffi::version();
-
-        let crate_root = Path::new(env!("CARGO_MANIFEST_DIR"));
-        let target_dir = crate_root
-            .parent()
-            .expect("Failed to find crate parent")
-            .parent()
-            .expect("Failed to find workspace root")
-            .join("target");
-
-        // Find the location of the library. This is specific to the build environment,
-        // so you will need to change the approach here based on your use case.
-        // let target: &std::path::Path = "../../../../target/".as_ref();
-        let library_path =
-            compute_library_path::<ForeignLibraryModuleRef>(target_dir.as_path())
-                .map_err(|e| DataFusionError::External(Box::new(e)))?
-                .join("deps");
-
-        // Load the module
-        let module = ForeignLibraryModuleRef::load_from_directory(&library_path)
-            .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-        assert_eq!(
-            module
-                .version()
-                .expect("Unable to call version on FFI module")(),
-            expected_version
-        );
-
-        Ok(module)
-    }
-
     /// It is important that this test is in the `tests` directory and not in the
     /// library directory so we can verify we are building a dynamic library and
     /// testing it via a different executable.
@@ -141,46 +71,6 @@ mod tests {
         test_table_provider(true).await
     }
 
-    /// This test validates that we can load an external module and use a scalar
-    /// udf defined in it via the foreign function interface. In this case we are
-    /// using the abs() function as our scalar UDF.
-    #[tokio::test]
-    async fn test_scalar_udf() -> Result<()> {
-        let module = get_module()?;
-
-        let ffi_abs_func =
-            module
-                .create_scalar_udf()
-                .ok_or(DataFusionError::NotImplemented(
-                    "External table provider failed to implement create_scalar_udf"
-                        .to_string(),
-                ))?();
-        let foreign_abs_func: ForeignScalarUDF = (&ffi_abs_func).try_into()?;
-
-        let udf: ScalarUDF = foreign_abs_func.into();
-
-        let ctx = SessionContext::default();
-        let df = ctx.read_batch(create_record_batch(-5, 5))?;
-
-        let df = df
-            .with_column("abs_a", udf.call(vec![col("a")]))?
-            .with_column("abs_b", udf.call(vec![col("b")]))?;
-
-        let result = df.collect().await?;
-
-        let expected = record_batch!(
-            ("a", Int32, vec![-5, -4, -3, -2, -1]),
-            ("b", Float64, vec![-5., -4., -3., -2., -1.]),
-            ("abs_a", Int32, vec![5, 4, 3, 2, 1]),
-            ("abs_b", Float64, vec![5., 4., 3., 2., 1.])
-        )?;
-
-        assert!(result.len() == 1);
-        assert!(result[0] == expected);
-
-        Ok(())
-    }
-
     #[tokio::test]
     async fn test_catalog() -> Result<()> {
         let module = get_module()?;
diff --git a/datafusion/ffi/tests/ffi_udf.rs b/datafusion/ffi/tests/ffi_udf.rs
new file mode 100644
index 0000000000000..bbc23552def43
--- /dev/null
+++ b/datafusion/ffi/tests/ffi_udf.rs
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Add an additional module here for convenience to scope this to only
+/// when the feature integtation-tests is built
+#[cfg(feature = "integration-tests")]
+mod tests {
+
+    use arrow::datatypes::DataType;
+    use datafusion::common::record_batch;
+    use datafusion::error::{DataFusionError, Result};
+    use datafusion::logical_expr::ScalarUDF;
+    use datafusion::prelude::{col, SessionContext};
+
+    use datafusion_ffi::tests::create_record_batch;
+    use datafusion_ffi::tests::utils::get_module;
+    use datafusion_ffi::udf::ForeignScalarUDF;
+
+    /// This test validates that we can load an external module and use a scalar
+    /// udf defined in it via the foreign function interface. In this case we are
+    /// using the abs() function as our scalar UDF.
+    #[tokio::test]
+    async fn test_scalar_udf() -> Result<()> {
+        let module = get_module()?;
+
+        let ffi_abs_func =
+            module
+                .create_scalar_udf()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External table provider failed to implement create_scalar_udf"
+                        .to_string(),
+                ))?();
+        let foreign_abs_func: ForeignScalarUDF = (&ffi_abs_func).try_into()?;
+
+        let udf: ScalarUDF = foreign_abs_func.into();
+
+        let ctx = SessionContext::default();
+        let df = ctx.read_batch(create_record_batch(-5, 5))?;
+
+        let df = df
+            .with_column("abs_a", udf.call(vec![col("a")]))?
+            .with_column("abs_b", udf.call(vec![col("b")]))?;
+
+        let result = df.collect().await?;
+
+        let expected = record_batch!(
+            ("a", Int32, vec![-5, -4, -3, -2, -1]),
+            ("b", Float64, vec![-5., -4., -3., -2., -1.]),
+            ("abs_a", Int32, vec![5, 4, 3, 2, 1]),
+            ("abs_b", Float64, vec![5., 4., 3., 2., 1.])
+        )?;
+
+        assert!(result.len() == 1);
+        assert!(result[0] == expected);
+
+        Ok(())
+    }
+
+    /// This test validates nullary input UDFs
+    #[tokio::test]
+    async fn test_nullary_scalar_udf() -> Result<()> {
+        let module = get_module()?;
+
+        let ffi_abs_func =
+            module
+                .create_nullary_udf()
+                .ok_or(DataFusionError::NotImplemented(
+                    "External table provider failed to implement create_scalar_udf"
+                        .to_string(),
+                ))?();
+        let foreign_abs_func: ForeignScalarUDF = (&ffi_abs_func).try_into()?;
+
+        let udf: ScalarUDF = foreign_abs_func.into();
+
+        let ctx = SessionContext::default();
+        let df = ctx.read_batch(create_record_batch(-5, 5))?;
+
+        let df = df.with_column("time_now", udf.call(vec![]))?;
+
+        let result = df.collect().await?;
+
+        assert!(result.len() == 1);
+        assert_eq!(
+            result[0].column_by_name("time_now").unwrap().data_type(),
+            &DataType::Float64
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/ffi/tests/ffi_udtf.rs b/datafusion/ffi/tests/ffi_udtf.rs
new file mode 100644
index 0000000000000..5a46211d3b9c6
--- /dev/null
+++ b/datafusion/ffi/tests/ffi_udtf.rs
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Add an additional module here for convenience to scope this to only
+/// when the feature integtation-tests is built
+#[cfg(feature = "integration-tests")]
+mod tests {
+
+    use std::sync::Arc;
+
+    use arrow::array::{create_array, ArrayRef};
+    use datafusion::error::{DataFusionError, Result};
+    use datafusion::prelude::SessionContext;
+
+    use datafusion_ffi::tests::utils::get_module;
+    use datafusion_ffi::udtf::ForeignTableFunction;
+
+    /// This test validates that we can load an external module and use a scalar
+    /// udf defined in it via the foreign function interface. In this case we are
+    /// using the abs() function as our scalar UDF.
+    #[tokio::test]
+    async fn test_user_defined_table_function() -> Result<()> {
+        let module = get_module()?;
+
+        let ffi_table_func = module
+            .create_table_function()
+            .ok_or(DataFusionError::NotImplemented(
+            "External table function provider failed to implement create_table_function"
+                .to_string(),
+        ))?();
+        let foreign_table_func: ForeignTableFunction = ffi_table_func.into();
+
+        let udtf = Arc::new(foreign_table_func);
+
+        let ctx = SessionContext::default();
+        ctx.register_udtf("my_range", udtf);
+
+        let result = ctx
+            .sql("SELECT * FROM my_range(5)")
+            .await?
+            .collect()
+            .await?;
+        let expected = create_array!(Int64, [0, 1, 2, 3, 4]) as ArrayRef;
+
+        assert!(result.len() == 1);
+        assert!(result[0].column(0) == &expected);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs
index c9cbaa8396fc5..aadce907e7cc3 100644
--- a/datafusion/functions-aggregate-common/src/aggregate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate.rs
@@ -15,5 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub mod avg_distinct;
 pub mod count_distinct;
 pub mod groups_accumulator;
+pub mod sum_distinct;
diff --git a/datafusion/functions-aggregate-common/src/aggregate/avg_distinct.rs b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct.rs
new file mode 100644
index 0000000000000..3d6889431d613
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct.rs
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod numeric;
+
+pub use numeric::Float64DistinctAvgAccumulator;
diff --git a/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/numeric.rs b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/numeric.rs
new file mode 100644
index 0000000000000..c9fb14fb10691
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/avg_distinct/numeric.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::Debug;
+
+use arrow::array::ArrayRef;
+use arrow::datatypes::{DataType, Float64Type};
+use datafusion_common::ScalarValue;
+use datafusion_expr_common::accumulator::Accumulator;
+
+use crate::aggregate::sum_distinct::DistinctSumAccumulator;
+
+/// Specialized implementation of `AVG DISTINCT` for Float64 values, leveraging
+/// the existing DistinctSumAccumulator implementation.
+#[derive(Debug)]
+pub struct Float64DistinctAvgAccumulator {
+    // We use the DistinctSumAccumulator to handle the set of distinct values
+    sum_accumulator: DistinctSumAccumulator<Float64Type>,
+}
+
+impl Float64DistinctAvgAccumulator {
+    pub fn new() -> datafusion_common::Result<Self> {
+        Ok(Self {
+            sum_accumulator: DistinctSumAccumulator::<Float64Type>::try_new(
+                &DataType::Float64,
+            )?,
+        })
+    }
+}
+
+impl Accumulator for Float64DistinctAvgAccumulator {
+    fn state(&mut self) -> datafusion_common::Result<Vec<ScalarValue>> {
+        self.sum_accumulator.state()
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> datafusion_common::Result<()> {
+        self.sum_accumulator.update_batch(values)
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> datafusion_common::Result<()> {
+        self.sum_accumulator.merge_batch(states)
+    }
+
+    fn evaluate(&mut self) -> datafusion_common::Result<ScalarValue> {
+        // Get the sum from the DistinctSumAccumulator
+        let sum_result = self.sum_accumulator.evaluate()?;
+
+        // Extract the sum value
+        if let ScalarValue::Float64(Some(sum)) = sum_result {
+            // Get the count of distinct values
+            let count = self.sum_accumulator.distinct_count() as f64;
+            // Calculate average
+            let avg = sum / count;
+            Ok(ScalarValue::Float64(Some(avg)))
+        } else {
+            // If sum is None, return None (null)
+            Ok(ScalarValue::Float64(None))
+        }
+    }
+
+    fn size(&self) -> usize {
+        self.sum_accumulator.size()
+    }
+}
diff --git a/datafusion/functions-aggregate-common/src/aggregate/sum_distinct.rs b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct.rs
new file mode 100644
index 0000000000000..932bfba0bf0dc
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct.rs
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Sum distinct accumulator implementations
+
+pub mod numeric;
+
+pub use numeric::DistinctSumAccumulator;
diff --git a/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs
new file mode 100644
index 0000000000000..859c82d95660b
--- /dev/null
+++ b/datafusion/functions-aggregate-common/src/aggregate/sum_distinct/numeric.rs
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines the accumulator for `SUM DISTINCT` for primitive numeric types
+
+use std::collections::HashSet;
+use std::fmt::Debug;
+use std::mem::{size_of, size_of_val};
+
+use ahash::RandomState;
+use arrow::array::Array;
+use arrow::array::ArrayRef;
+use arrow::array::ArrowNativeTypeOp;
+use arrow::array::ArrowPrimitiveType;
+use arrow::array::AsArray;
+use arrow::datatypes::ArrowNativeType;
+use arrow::datatypes::DataType;
+
+use datafusion_common::Result;
+use datafusion_common::ScalarValue;
+use datafusion_expr_common::accumulator::Accumulator;
+
+use crate::utils::Hashable;
+
+/// Accumulator for computing SUM(DISTINCT expr)
+pub struct DistinctSumAccumulator<T: ArrowPrimitiveType> {
+    values: HashSet<Hashable<T::Native>, RandomState>,
+    data_type: DataType,
+}
+
+impl<T: ArrowPrimitiveType> Debug for DistinctSumAccumulator<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "DistinctSumAccumulator({})", self.data_type)
+    }
+}
+
+impl<T: ArrowPrimitiveType> DistinctSumAccumulator<T> {
+    pub fn try_new(data_type: &DataType) -> Result<Self> {
+        Ok(Self {
+            values: HashSet::default(),
+            data_type: data_type.clone(),
+        })
+    }
+
+    pub fn distinct_count(&self) -> usize {
+        self.values.len()
+    }
+}
+
+impl<T: ArrowPrimitiveType> Accumulator for DistinctSumAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        // 1. Stores aggregate state in `ScalarValue::List`
+        // 2. Constructs `ScalarValue::List` state from distinct numeric stored in hash set
+        let state_out = {
+            let distinct_values = self
+                .values
+                .iter()
+                .map(|value| {
+                    ScalarValue::new_primitive::<T>(Some(value.0), &self.data_type)
+                })
+                .collect::<Result<Vec<_>>>()?;
+
+            vec![ScalarValue::List(ScalarValue::new_list_nullable(
+                &distinct_values,
+                &self.data_type,
+            ))]
+        };
+        Ok(state_out)
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        let array = values[0].as_primitive::<T>();
+        match array.nulls().filter(|x| x.null_count() > 0) {
+            Some(n) => {
+                for idx in n.valid_indices() {
+                    self.values.insert(Hashable(array.value(idx)));
+                }
+            }
+            None => array.values().iter().for_each(|x| {
+                self.values.insert(Hashable(*x));
+            }),
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        for x in states[0].as_list::<i32>().iter().flatten() {
+            self.update_batch(&[x])?
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let mut acc = T::Native::usize_as(0);
+        for distinct_value in self.values.iter() {
+            acc = acc.add_wrapping(distinct_value.0)
+        }
+        let v = (!self.values.is_empty()).then_some(acc);
+        ScalarValue::new_primitive::<T>(v, &self.data_type)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) + self.values.capacity() * size_of::<T::Native>()
+    }
+}
diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs
index fb605e87ed0cc..e22be611d8d76 100644
--- a/datafusion/functions-aggregate/benches/array_agg.rs
+++ b/datafusion/functions-aggregate/benches/array_agg.rs
@@ -19,17 +19,23 @@ use std::sync::Arc;
 
 use arrow::array::{
     Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, NullBufferBuilder,
+    PrimitiveArray,
 };
 use arrow::datatypes::{Field, Int64Type};
-use arrow::util::bench_util::create_primitive_array;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::Accumulator;
 use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;
 
 use arrow::buffer::OffsetBuffer;
-use arrow::util::test_util::seedable_rng;
 use rand::distributions::{Distribution, Standard};
+use rand::prelude::StdRng;
 use rand::Rng;
+use rand::SeedableRng;
+
+/// Returns fixed seedable RNG
+pub fn seedable_rng() -> StdRng {
+    StdRng::seed_from_u64(42)
+}
 
 fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
     let list_item_data_type = values.as_list::<i32>().values().data_type().clone();
@@ -46,6 +52,24 @@ fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
     });
 }
 
+pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
+where
+    T: ArrowPrimitiveType,
+    Standard: Distribution<T::Native>,
+{
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
+
 /// Create List array with the given item data type, null density, null locations and zero length lists density
 /// Creates an random (but fixed-seeded) array of a given size and null density
 pub fn create_list_array<T>(
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index 787e08bae2867..9a202879d94ab 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -45,7 +45,7 @@ make_udaf_expr_and_func!(
 /// APPROX_MEDIAN aggregate expression
 #[user_doc(
     doc_section(label = "Approximate Functions"),
-    description = "Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.",
+    description = "Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY x)`.",
     syntax_example = "approx_median(expression)",
     sql_example = r#"```sql
 > SELECT approx_median(column_name) FROM table_name;
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 1fad5f73703c7..41281733f5deb 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -34,6 +34,7 @@ use datafusion_common::{
     downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
     Result, ScalarValue,
 };
+use datafusion_expr::expr::{AggregateFunction, Sort};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
 use datafusion_expr::utils::format_state_name;
@@ -51,29 +52,39 @@ create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
 
 /// Computes the approximate percentile continuous of a set of numbers
 pub fn approx_percentile_cont(
-    expression: Expr,
+    order_by: Sort,
     percentile: Expr,
     centroids: Option<Expr>,
 ) -> Expr {
+    let expr = order_by.expr.clone();
+
     let args = if let Some(centroids) = centroids {
-        vec![expression, percentile, centroids]
+        vec![expr, percentile, centroids]
     } else {
-        vec![expression, percentile]
+        vec![expr, percentile]
     };
-    approx_percentile_cont_udaf().call(args)
+
+    Expr::AggregateFunction(AggregateFunction::new_udf(
+        approx_percentile_cont_udaf(),
+        args,
+        false,
+        None,
+        Some(vec![order_by]),
+        None,
+    ))
 }
 
 #[user_doc(
     doc_section(label = "Approximate Functions"),
     description = "Returns the approximate percentile of input values using the t-digest algorithm.",
-    syntax_example = "approx_percentile_cont(expression, percentile, centroids)",
+    syntax_example = "approx_percentile_cont(percentile, centroids) WITHIN GROUP (ORDER BY expression)",
     sql_example = r#"```sql
-> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
-+-------------------------------------------------+
-| approx_percentile_cont(column_name, 0.75, 100)  |
-+-------------------------------------------------+
-| 65.0                                            |
-+-------------------------------------------------+
+> SELECT approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++-----------------------------------------------------------------------+
+| approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) |
++-----------------------------------------------------------------------+
+| 65.0                                                                  |
++-----------------------------------------------------------------------+
 ```"#,
     standard_argument(name = "expression",),
     argument(
@@ -130,6 +141,19 @@ impl ApproxPercentileCont {
         args: AccumulatorArgs,
     ) -> Result<ApproxPercentileAccumulator> {
         let percentile = validate_input_percentile_expr(&args.exprs[1])?;
+
+        let is_descending = args
+            .ordering_req
+            .first()
+            .map(|sort_expr| sort_expr.options.descending)
+            .unwrap_or(false);
+
+        let percentile = if is_descending {
+            1.0 - percentile
+        } else {
+            percentile
+        };
+
         let tdigest_max_size = if args.exprs.len() == 3 {
             Some(validate_input_max_size_expr(&args.exprs[2])?)
         } else {
@@ -292,6 +316,14 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         Ok(arg_types[0].clone())
     }
 
+    fn supports_null_handling_clause(&self) -> bool {
+        false
+    }
+
+    fn is_ordered_set_aggregate(&self) -> bool {
+        true
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 16dac2c1b8f04..0316757f26d08 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -52,14 +52,14 @@ make_udaf_expr_and_func!(
 #[user_doc(
     doc_section(label = "Approximate Functions"),
     description = "Returns the weighted approximate percentile of input values using the t-digest algorithm.",
-    syntax_example = "approx_percentile_cont_with_weight(expression, weight, percentile)",
+    syntax_example = "approx_percentile_cont_with_weight(weight, percentile) WITHIN GROUP (ORDER BY expression)",
     sql_example = r#"```sql
-> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
-+----------------------------------------------------------------------+
-| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
-+----------------------------------------------------------------------+
-| 78.5                                                                 |
-+----------------------------------------------------------------------+
+> SELECT approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++---------------------------------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) |
++---------------------------------------------------------------------------------------------+
+| 78.5                                                                                        |
++---------------------------------------------------------------------------------------------+
 ```"#,
     standard_argument(name = "expression", prefix = "The"),
     argument(
@@ -178,6 +178,14 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
         self.approx_percentile_cont.state_fields(args)
     }
 
+    fn supports_null_handling_clause(&self) -> bool {
+        false
+    }
+
+    fn is_ordered_set_aggregate(&self) -> bool {
+        true
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 573624ce4d491..d658744c1ba5d 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -289,7 +289,7 @@ impl Accumulator for ArrayAggAccumulator {
         }
 
         let val = Arc::clone(&values[0]);
-        if val.len() > 0 {
+        if !val.is_empty() {
             self.values.push(val);
         }
         Ok(())
@@ -310,7 +310,7 @@ impl Accumulator for ArrayAggAccumulator {
         match Self::get_optional_values_to_merge_as_is(list_arr) {
             Some(values) => {
                 // Make sure we don't insert empty lists
-                if values.len() > 0 {
+                if !values.is_empty() {
                     self.values.push(values);
                 }
             }
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 141771b0412f2..30d1e09fe3cc0 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -24,8 +24,9 @@ use arrow::array::{
 
 use arrow::compute::sum;
 use arrow::datatypes::{
-    i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, DecimalType, Field,
-    Float64Type, UInt64Type,
+    i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, DecimalType,
+    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
+    DurationSecondType, Field, Float64Type, TimeUnit, UInt64Type,
 };
 use datafusion_common::{
     exec_err, not_impl_err, utils::take_function_args, Result, ScalarValue,
@@ -39,6 +40,7 @@ use datafusion_expr::{
     ReversedUDAF, Signature,
 };
 
+use datafusion_functions_aggregate_common::aggregate::avg_distinct::Float64DistinctAvgAccumulator;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::NullState;
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::{
     filtered_null_mask, set_nulls,
@@ -113,43 +115,59 @@ impl AggregateUDFImpl for Avg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        if acc_args.is_distinct {
-            return exec_err!("avg(DISTINCT) aggregations are not available");
-        }
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
         use DataType::*;
 
-        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
-        // instantiate specialized accumulator based for the type
-        match (&data_type, acc_args.return_type) {
-            (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
-            (
-                Decimal128(sum_precision, sum_scale),
-                Decimal128(target_precision, target_scale),
-            ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal128Type> {
-                sum: None,
-                count: 0,
-                sum_scale: *sum_scale,
-                sum_precision: *sum_precision,
-                target_precision: *target_precision,
-                target_scale: *target_scale,
-            })),
+        if acc_args.is_distinct {
+            // instantiate specialized accumulator based for the type
+            match &data_type {
+                // Numeric types are converted to Float64 via `coerce_avg_type` during logical plan creation
+                Float64 => Ok(Box::new(Float64DistinctAvgAccumulator::new()?)),
+                _ => exec_err!("AVG(DISTINCT) for {} not supported", data_type),
+            }
+        } else {
+            // instantiate specialized accumulator based for the type
+            match (&data_type, acc_args.return_type) {
+                (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
+                (
+                    Decimal128(sum_precision, sum_scale),
+                    Decimal128(target_precision, target_scale),
+                ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal128Type> {
+                    sum: None,
+                    count: 0,
+                    sum_scale: *sum_scale,
+                    sum_precision: *sum_precision,
+                    target_precision: *target_precision,
+                    target_scale: *target_scale,
+                })),
+
+                (
+                    Decimal256(sum_precision, sum_scale),
+                    Decimal256(target_precision, target_scale),
+                ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal256Type> {
+                    sum: None,
+                    count: 0,
+                    sum_scale: *sum_scale,
+                    sum_precision: *sum_precision,
+                    target_precision: *target_precision,
+                    target_scale: *target_scale,
+                })),
+
+                (Duration(time_unit), Duration(result_unit)) => {
+                    Ok(Box::new(DurationAvgAccumulator {
+                        sum: None,
+                        count: 0,
+                        time_unit: *time_unit,
+                        result_unit: *result_unit,
+                    }))
+                }
 
-            (
-                Decimal256(sum_precision, sum_scale),
-                Decimal256(target_precision, target_scale),
-            ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal256Type> {
-                sum: None,
-                count: 0,
-                sum_scale: *sum_scale,
-                sum_precision: *sum_precision,
-                target_precision: *target_precision,
-                target_scale: *target_scale,
-            })),
-            _ => exec_err!(
-                "AvgAccumulator for ({} --> {})",
-                &data_type,
-                acc_args.return_type
-            ),
+                _ => exec_err!(
+                    "AvgAccumulator for ({} --> {})",
+                    &data_type,
+                    acc_args.return_type
+                ),
+            }
         }
     }
 
@@ -399,6 +417,105 @@ impl<T: DecimalType + ArrowNumericType + Debug> Accumulator for DecimalAvgAccumu
     }
 }
 
+/// An accumulator to compute the average for duration values
+#[derive(Debug)]
+struct DurationAvgAccumulator {
+    sum: Option<i64>,
+    count: u64,
+    time_unit: TimeUnit,
+    result_unit: TimeUnit,
+}
+
+impl Accumulator for DurationAvgAccumulator {
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        let array = &values[0];
+        self.count += (array.len() - array.null_count()) as u64;
+
+        let sum_value = match self.time_unit {
+            TimeUnit::Second => sum(array.as_primitive::<DurationSecondType>()),
+            TimeUnit::Millisecond => sum(array.as_primitive::<DurationMillisecondType>()),
+            TimeUnit::Microsecond => sum(array.as_primitive::<DurationMicrosecondType>()),
+            TimeUnit::Nanosecond => sum(array.as_primitive::<DurationNanosecondType>()),
+        };
+
+        if let Some(x) = sum_value {
+            let v = self.sum.get_or_insert(0);
+            *v += x;
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let avg = self.sum.map(|sum| sum / self.count as i64);
+
+        match self.result_unit {
+            TimeUnit::Second => Ok(ScalarValue::DurationSecond(avg)),
+            TimeUnit::Millisecond => Ok(ScalarValue::DurationMillisecond(avg)),
+            TimeUnit::Microsecond => Ok(ScalarValue::DurationMicrosecond(avg)),
+            TimeUnit::Nanosecond => Ok(ScalarValue::DurationNanosecond(avg)),
+        }
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self)
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        let duration_value = match self.time_unit {
+            TimeUnit::Second => ScalarValue::DurationSecond(self.sum),
+            TimeUnit::Millisecond => ScalarValue::DurationMillisecond(self.sum),
+            TimeUnit::Microsecond => ScalarValue::DurationMicrosecond(self.sum),
+            TimeUnit::Nanosecond => ScalarValue::DurationNanosecond(self.sum),
+        };
+
+        Ok(vec![ScalarValue::from(self.count), duration_value])
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        self.count += sum(states[0].as_primitive::<UInt64Type>()).unwrap_or_default();
+
+        let sum_value = match self.time_unit {
+            TimeUnit::Second => sum(states[1].as_primitive::<DurationSecondType>()),
+            TimeUnit::Millisecond => {
+                sum(states[1].as_primitive::<DurationMillisecondType>())
+            }
+            TimeUnit::Microsecond => {
+                sum(states[1].as_primitive::<DurationMicrosecondType>())
+            }
+            TimeUnit::Nanosecond => {
+                sum(states[1].as_primitive::<DurationNanosecondType>())
+            }
+        };
+
+        if let Some(x) = sum_value {
+            let v = self.sum.get_or_insert(0);
+            *v += x;
+        }
+        Ok(())
+    }
+
+    fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        let array = &values[0];
+        self.count -= (array.len() - array.null_count()) as u64;
+
+        let sum_value = match self.time_unit {
+            TimeUnit::Second => sum(array.as_primitive::<DurationSecondType>()),
+            TimeUnit::Millisecond => sum(array.as_primitive::<DurationMillisecondType>()),
+            TimeUnit::Microsecond => sum(array.as_primitive::<DurationMicrosecondType>()),
+            TimeUnit::Nanosecond => sum(array.as_primitive::<DurationNanosecondType>()),
+        };
+
+        if let Some(x) = sum_value {
+            self.sum = Some(self.sum.unwrap() - x);
+        }
+        Ok(())
+    }
+
+    fn supports_retract_batch(&self) -> bool {
+        true
+    }
+}
+
 /// An accumulator to compute the average of `[PrimitiveArray<T>]`.
 /// Stores values as native types, and does overflow checking
 ///
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 28e6a8723dfd4..ec8c440b77e5f 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -52,6 +52,7 @@ use datafusion_macros::user_doc;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
 create_func!(FirstValue, first_value_udaf);
+create_func!(LastValue, last_value_udaf);
 
 /// Returns the first value in a group of values.
 pub fn first_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
@@ -67,6 +68,20 @@ pub fn first_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
     }
 }
 
+/// Returns the last value in a group of values.
+pub fn last_value(expression: Expr, order_by: Option<Vec<SortExpr>>) -> Expr {
+    if let Some(order_by) = order_by {
+        last_value_udaf()
+            .call(vec![expression])
+            .order_by(order_by)
+            .build()
+            // guaranteed to be `Expr::AggregateFunction`
+            .unwrap()
+    } else {
+        last_value_udaf().call(vec![expression])
+    }
+}
+
 #[user_doc(
     doc_section(label = "General Functions"),
     description = "Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.",
@@ -166,6 +181,7 @@ impl AggregateUDFImpl for FirstValue {
     }
 
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        // TODO: extract to function
         use DataType::*;
         matches!(
             args.return_type,
@@ -193,6 +209,7 @@ impl AggregateUDFImpl for FirstValue {
         &self,
         args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
+        // TODO: extract to function
         fn create_accumulator<T>(
             args: AccumulatorArgs,
         ) -> Result<Box<dyn GroupsAccumulator>>
@@ -210,6 +227,7 @@ impl AggregateUDFImpl for FirstValue {
                 args.ignore_nulls,
                 args.return_type,
                 &ordering_dtypes,
+                true,
             )?))
         }
 
@@ -258,10 +276,12 @@ impl AggregateUDFImpl for FirstValue {
                 create_accumulator::<Time64NanosecondType>(args)
             }
 
-            _ => internal_err!(
-                "GroupsAccumulator not supported for first({})",
-                args.return_type
-            ),
+            _ => {
+                internal_err!(
+                    "GroupsAccumulator not supported for first_value({})",
+                    args.return_type
+                )
+            }
         }
     }
 
@@ -291,6 +311,7 @@ impl AggregateUDFImpl for FirstValue {
     }
 }
 
+// TODO: rename to PrimitiveGroupsAccumulator
 struct FirstPrimitiveGroupsAccumulator<T>
 where
     T: ArrowPrimitiveType + Send,
@@ -316,12 +337,16 @@ where
     // buffer for `get_filtered_min_of_each_group`
     // filter_min_of_each_group_buf.0[group_idx] -> idx_in_val
     // only valid if filter_min_of_each_group_buf.1[group_idx] == true
+    // TODO: rename to extreme_of_each_group_buf
     min_of_each_group_buf: (Vec<usize>, BooleanBufferBuilder),
 
     // =========== option ============
 
     // Stores the applicable ordering requirement.
     ordering_req: LexOrdering,
+    // true: take first element in an aggregation group according to the requested ordering.
+    // false: take last element in an aggregation group according to the requested ordering.
+    pick_first_in_group: bool,
     // derived from `ordering_req`.
     sort_options: Vec<SortOptions>,
     // Stores whether incoming data already satisfies the ordering requirement.
@@ -342,6 +367,7 @@ where
         ignore_nulls: bool,
         data_type: &DataType,
         ordering_dtypes: &[DataType],
+        pick_first_in_group: bool,
     ) -> Result<Self> {
         let requirement_satisfied = ordering_req.is_empty();
 
@@ -365,6 +391,7 @@ where
             is_sets: BooleanBufferBuilder::new(0),
             size_of_orderings: 0,
             min_of_each_group_buf: (Vec::new(), BooleanBufferBuilder::new(0)),
+            pick_first_in_group,
         })
     }
 
@@ -391,8 +418,13 @@ where
 
         assert!(new_ordering_values.len() == self.ordering_req.len());
         let current_ordering = &self.orderings[group_idx];
-        compare_rows(current_ordering, new_ordering_values, &self.sort_options)
-            .map(|x| x.is_gt())
+        compare_rows(current_ordering, new_ordering_values, &self.sort_options).map(|x| {
+            if self.pick_first_in_group {
+                x.is_gt()
+            } else {
+                x.is_lt()
+            }
+        })
     }
 
     fn take_orderings(&mut self, emit_to: EmitTo) -> Vec<Vec<ScalarValue>> {
@@ -501,10 +533,10 @@ where
             .map(ScalarValue::size_of_vec)
             .sum::<usize>()
     }
-
     /// Returns a vector of tuples `(group_idx, idx_in_val)` representing the index of the
     /// minimum value in `orderings` for each group, using lexicographical comparison.
     /// Values are filtered using `opt_filter` and `is_set_arr` if provided.
+    /// TODO: rename to get_filtered_extreme_of_each_group
     fn get_filtered_min_of_each_group(
         &mut self,
         orderings: &[ArrayRef],
@@ -556,15 +588,19 @@ where
             }
 
             let is_valid = self.min_of_each_group_buf.1.get_bit(group_idx);
-            if is_valid
-                && comparator
-                    .compare(self.min_of_each_group_buf.0[group_idx], idx_in_val)
-                    .is_gt()
-            {
-                self.min_of_each_group_buf.0[group_idx] = idx_in_val;
-            } else if !is_valid {
+
+            if !is_valid {
                 self.min_of_each_group_buf.1.set_bit(group_idx, true);
                 self.min_of_each_group_buf.0[group_idx] = idx_in_val;
+            } else {
+                let ordering = comparator
+                    .compare(self.min_of_each_group_buf.0[group_idx], idx_in_val);
+
+                if (ordering.is_gt() && self.pick_first_in_group)
+                    || (ordering.is_lt() && !self.pick_first_in_group)
+                {
+                    self.min_of_each_group_buf.0[group_idx] = idx_in_val;
+                }
             }
         }
 
@@ -918,13 +954,6 @@ impl Accumulator for FirstValueAccumulator {
     }
 }
 
-make_udaf_expr_and_func!(
-    LastValue,
-    last_value,
-    "Returns the last value in a group of values.",
-    last_value_udaf
-);
-
 #[user_doc(
     doc_section(label = "General Functions"),
     description = "Returns the last element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group.",
@@ -1052,6 +1081,109 @@ impl AggregateUDFImpl for LastValue {
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
+
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        use DataType::*;
+        matches!(
+            args.return_type,
+            Int8 | Int16
+                | Int32
+                | Int64
+                | UInt8
+                | UInt16
+                | UInt32
+                | UInt64
+                | Float16
+                | Float32
+                | Float64
+                | Decimal128(_, _)
+                | Decimal256(_, _)
+                | Date32
+                | Date64
+                | Time32(_)
+                | Time64(_)
+                | Timestamp(_, _)
+        )
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        fn create_accumulator<T>(
+            args: AccumulatorArgs,
+        ) -> Result<Box<dyn GroupsAccumulator>>
+        where
+            T: ArrowPrimitiveType + Send,
+        {
+            let ordering_dtypes = args
+                .ordering_req
+                .iter()
+                .map(|e| e.expr.data_type(args.schema))
+                .collect::<Result<Vec<_>>>()?;
+
+            Ok(Box::new(FirstPrimitiveGroupsAccumulator::<T>::try_new(
+                args.ordering_req.clone(),
+                args.ignore_nulls,
+                args.return_type,
+                &ordering_dtypes,
+                false,
+            )?))
+        }
+
+        match args.return_type {
+            DataType::Int8 => create_accumulator::<Int8Type>(args),
+            DataType::Int16 => create_accumulator::<Int16Type>(args),
+            DataType::Int32 => create_accumulator::<Int32Type>(args),
+            DataType::Int64 => create_accumulator::<Int64Type>(args),
+            DataType::UInt8 => create_accumulator::<UInt8Type>(args),
+            DataType::UInt16 => create_accumulator::<UInt16Type>(args),
+            DataType::UInt32 => create_accumulator::<UInt32Type>(args),
+            DataType::UInt64 => create_accumulator::<UInt64Type>(args),
+            DataType::Float16 => create_accumulator::<Float16Type>(args),
+            DataType::Float32 => create_accumulator::<Float32Type>(args),
+            DataType::Float64 => create_accumulator::<Float64Type>(args),
+
+            DataType::Decimal128(_, _) => create_accumulator::<Decimal128Type>(args),
+            DataType::Decimal256(_, _) => create_accumulator::<Decimal256Type>(args),
+
+            DataType::Timestamp(TimeUnit::Second, _) => {
+                create_accumulator::<TimestampSecondType>(args)
+            }
+            DataType::Timestamp(TimeUnit::Millisecond, _) => {
+                create_accumulator::<TimestampMillisecondType>(args)
+            }
+            DataType::Timestamp(TimeUnit::Microsecond, _) => {
+                create_accumulator::<TimestampMicrosecondType>(args)
+            }
+            DataType::Timestamp(TimeUnit::Nanosecond, _) => {
+                create_accumulator::<TimestampNanosecondType>(args)
+            }
+
+            DataType::Date32 => create_accumulator::<Date32Type>(args),
+            DataType::Date64 => create_accumulator::<Date64Type>(args),
+            DataType::Time32(TimeUnit::Second) => {
+                create_accumulator::<Time32SecondType>(args)
+            }
+            DataType::Time32(TimeUnit::Millisecond) => {
+                create_accumulator::<Time32MillisecondType>(args)
+            }
+
+            DataType::Time64(TimeUnit::Microsecond) => {
+                create_accumulator::<Time64MicrosecondType>(args)
+            }
+            DataType::Time64(TimeUnit::Nanosecond) => {
+                create_accumulator::<Time64NanosecondType>(args)
+            }
+
+            _ => {
+                internal_err!(
+                    "GroupsAccumulator not supported for last_value({})",
+                    args.return_type
+                )
+            }
+        }
+    }
 }
 
 #[derive(Debug)]
@@ -1411,6 +1543,7 @@ mod tests {
             true,
             &DataType::Int64,
             &[DataType::Int64],
+            true,
         )?;
 
         let mut val_with_orderings = {
@@ -1485,7 +1618,7 @@ mod tests {
     }
 
     #[test]
-    fn test_frist_group_acc_size_of_ordering() -> Result<()> {
+    fn test_group_acc_size_of_ordering() -> Result<()> {
         let schema = Arc::new(Schema::new(vec![
             Field::new("a", DataType::Int64, true),
             Field::new("b", DataType::Int64, true),
@@ -1504,6 +1637,7 @@ mod tests {
             true,
             &DataType::Int64,
             &[DataType::Int64],
+            true,
         )?;
 
         let val_with_orderings = {
@@ -1563,4 +1697,79 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_last_group_acc() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int64, true),
+            Field::new("b", DataType::Int64, true),
+            Field::new("c", DataType::Int64, true),
+            Field::new("d", DataType::Int32, true),
+            Field::new("e", DataType::Boolean, true),
+        ]));
+
+        let sort_key = LexOrdering::new(vec![PhysicalSortExpr {
+            expr: col("c", &schema).unwrap(),
+            options: SortOptions::default(),
+        }]);
+
+        let mut group_acc = FirstPrimitiveGroupsAccumulator::<Int64Type>::try_new(
+            sort_key,
+            true,
+            &DataType::Int64,
+            &[DataType::Int64],
+            false,
+        )?;
+
+        let mut val_with_orderings = {
+            let mut val_with_orderings = Vec::<ArrayRef>::new();
+
+            let vals = Arc::new(Int64Array::from(vec![Some(1), None, Some(3), Some(-6)]));
+            let orderings = Arc::new(Int64Array::from(vec![1, -9, 3, -6]));
+
+            val_with_orderings.push(vals);
+            val_with_orderings.push(orderings);
+
+            val_with_orderings
+        };
+
+        group_acc.update_batch(
+            &val_with_orderings,
+            &[0, 1, 2, 1],
+            Some(&BooleanArray::from(vec![true, true, false, true])),
+            3,
+        )?;
+
+        let state = group_acc.state(EmitTo::All)?;
+
+        let expected_state: Vec<Arc<dyn Array>> = vec![
+            Arc::new(Int64Array::from(vec![Some(1), Some(-6), None])),
+            Arc::new(Int64Array::from(vec![Some(1), Some(-6), None])),
+            Arc::new(BooleanArray::from(vec![true, true, false])),
+        ];
+        assert_eq!(state, expected_state);
+
+        group_acc.merge_batch(
+            &state,
+            &[0, 1, 2],
+            Some(&BooleanArray::from(vec![true, false, false])),
+            3,
+        )?;
+
+        val_with_orderings.clear();
+        val_with_orderings.push(Arc::new(Int64Array::from(vec![66, 6])));
+        val_with_orderings.push(Arc::new(Int64Array::from(vec![66, 6])));
+
+        group_acc.update_batch(&val_with_orderings, &[1, 2], None, 4)?;
+
+        let binding = group_acc.evaluate(EmitTo::All)?;
+        let eval_result = binding.as_any().downcast_ref::<Int64Array>().unwrap();
+
+        let expect: PrimitiveArray<Int64Type> =
+            Int64Array::from(vec![Some(1), Some(66), Some(6), None]);
+
+        assert_eq!(eval_result, &expect);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index 64314ef6df687..a7594b9ccb01f 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -17,15 +17,17 @@
 
 //! [`StringAgg`] accumulator for the `string_agg` function
 
+use crate::array_agg::ArrayAgg;
 use arrow::array::ArrayRef;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field};
 use datafusion_common::cast::as_generic_string_array;
 use datafusion_common::Result;
-use datafusion_common::{not_impl_err, ScalarValue};
+use datafusion_common::{internal_err, not_impl_err, ScalarValue};
 use datafusion_expr::function::AccumulatorArgs;
 use datafusion_expr::{
     Accumulator, AggregateUDFImpl, Documentation, Signature, TypeSignature, Volatility,
 };
+use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs;
 use datafusion_macros::user_doc;
 use datafusion_physical_expr::expressions::Literal;
 use std::any::Any;
@@ -41,15 +43,31 @@ make_udaf_expr_and_func!(
 
 #[user_doc(
     doc_section(label = "General Functions"),
-    description = "Concatenates the values of string expressions and places separator values between them.",
-    syntax_example = "string_agg(expression, delimiter)",
+    description = "Concatenates the values of string expressions and places separator values between them. \
+If ordering is required, strings are concatenated in the specified order. \
+This aggregation function can only mix DISTINCT and ORDER BY if the ordering expression is exactly the same as the first argument expression.",
+    syntax_example = "string_agg([DISTINCT] expression, delimiter [ORDER BY expression])",
     sql_example = r#"```sql
 > SELECT string_agg(name, ', ') AS names_list
   FROM employee;
 +--------------------------+
 | names_list               |
 +--------------------------+
-| Alice, Bob, Charlie      |
+| Alice, Bob, Bob, Charlie |
++--------------------------+
+> SELECT string_agg(name, ', ' ORDER BY name DESC) AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Charlie, Bob, Bob, Alice |
++--------------------------+
+> SELECT string_agg(DISTINCT name, ', ' ORDER BY name DESC) AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Charlie, Bob, Alice |
 +--------------------------+
 ```"#,
     argument(
@@ -65,6 +83,7 @@ make_udaf_expr_and_func!(
 #[derive(Debug)]
 pub struct StringAgg {
     signature: Signature,
+    array_agg: ArrayAgg,
 }
 
 impl StringAgg {
@@ -76,9 +95,13 @@ impl StringAgg {
                     TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Utf8]),
                     TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]),
                     TypeSignature::Exact(vec![DataType::LargeUtf8, DataType::Null]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::LargeUtf8]),
+                    TypeSignature::Exact(vec![DataType::Utf8, DataType::Null]),
                 ],
                 Volatility::Immutable,
             ),
+            array_agg: Default::default(),
         }
     }
 }
@@ -106,20 +129,40 @@ impl AggregateUDFImpl for StringAgg {
         Ok(DataType::LargeUtf8)
     }
 
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
+        self.array_agg.state_fields(args)
+    }
+
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        if let Some(lit) = acc_args.exprs[1].as_any().downcast_ref::<Literal>() {
-            return match lit.value().try_as_str() {
-                Some(Some(delimiter)) => {
-                    Ok(Box::new(StringAggAccumulator::new(delimiter)))
-                }
-                Some(None) => Ok(Box::new(StringAggAccumulator::new(""))),
-                None => {
-                    not_impl_err!("StringAgg not supported for delimiter {}", lit.value())
-                }
-            };
-        }
+        let Some(lit) = acc_args.exprs[1].as_any().downcast_ref::<Literal>() else {
+            return not_impl_err!(
+                "The second argument of the string_agg function must be a string literal"
+            );
+        };
+
+        let delimiter = if lit.value().is_null() {
+            // If the second argument (the delimiter that joins strings) is NULL, join
+            // on an empty string. (e.g. [a, b, c] => "abc").
+            ""
+        } else if let Some(lit_string) = lit.value().try_as_str() {
+            lit_string.unwrap_or("")
+        } else {
+            return not_impl_err!(
+                "StringAgg not supported for delimiter \"{}\"",
+                lit.value()
+            );
+        };
+
+        let array_agg_acc = self.array_agg.accumulator(AccumulatorArgs {
+            return_type: &DataType::new_list(acc_args.return_type.clone(), true),
+            exprs: &filter_index(acc_args.exprs, 1),
+            ..acc_args
+        })?;
 
-        not_impl_err!("expect literal")
+        Ok(Box::new(StringAggAccumulator::new(
+            array_agg_acc,
+            delimiter,
+        )))
     }
 
     fn documentation(&self) -> Option<&Documentation> {
@@ -129,14 +172,14 @@ impl AggregateUDFImpl for StringAgg {
 
 #[derive(Debug)]
 pub(crate) struct StringAggAccumulator {
-    values: Option<String>,
+    array_agg_acc: Box<dyn Accumulator>,
     delimiter: String,
 }
 
 impl StringAggAccumulator {
-    pub fn new(delimiter: &str) -> Self {
+    pub fn new(array_agg_acc: Box<dyn Accumulator>, delimiter: &str) -> Self {
         Self {
-            values: None,
+            array_agg_acc,
             delimiter: delimiter.to_string(),
         }
     }
@@ -144,37 +187,311 @@ impl StringAggAccumulator {
 
 impl Accumulator for StringAggAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        let string_array: Vec<_> = as_generic_string_array::<i64>(&values[0])?
-            .iter()
-            .filter_map(|v| v.as_ref().map(ToString::to_string))
-            .collect();
-        if !string_array.is_empty() {
-            let s = string_array.join(self.delimiter.as_str());
-            let v = self.values.get_or_insert("".to_string());
-            if !v.is_empty() {
-                v.push_str(self.delimiter.as_str());
+        self.array_agg_acc.update_batch(&filter_index(values, 1))
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let scalar = self.array_agg_acc.evaluate()?;
+
+        let ScalarValue::List(list) = scalar else {
+            return internal_err!("Expected a DataType::List while evaluating underlying ArrayAggAccumulator, but got {}", scalar.data_type());
+        };
+
+        let string_arr: Vec<_> = match list.value_type() {
+            DataType::LargeUtf8 => as_generic_string_array::<i64>(list.values())?
+                .iter()
+                .flatten()
+                .collect(),
+            DataType::Utf8 => as_generic_string_array::<i32>(list.values())?
+                .iter()
+                .flatten()
+                .collect(),
+            _ => {
+                return internal_err!(
+                    "Expected elements to of type Utf8 or LargeUtf8, but got {}",
+                    list.value_type()
+                )
             }
-            v.push_str(s.as_str());
+        };
+
+        if string_arr.is_empty() {
+            return Ok(ScalarValue::LargeUtf8(None));
         }
-        Ok(())
+
+        Ok(ScalarValue::LargeUtf8(Some(
+            string_arr.join(&self.delimiter),
+        )))
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) - size_of_val(&self.array_agg_acc)
+            + self.array_agg_acc.size()
+            + self.delimiter.capacity()
+    }
+
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        self.array_agg_acc.state()
     }
 
     fn merge_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        self.update_batch(values)?;
+        self.array_agg_acc.merge_batch(values)
+    }
+}
+
+fn filter_index<T: Clone>(values: &[T], index: usize) -> Vec<T> {
+    values
+        .iter()
+        .enumerate()
+        .filter(|(i, _)| *i != index)
+        .map(|(_, v)| v)
+        .cloned()
+        .collect::<Vec<_>>()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::LargeStringArray;
+    use arrow::compute::SortOptions;
+    use arrow::datatypes::{Fields, Schema};
+    use datafusion_common::internal_err;
+    use datafusion_physical_expr::expressions::Column;
+    use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
+    use std::sync::Arc;
+
+    #[test]
+    fn no_duplicates_no_distinct() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",").build_two()?;
+
+        acc1.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc2.update_batch(&[data(["d", "e", "f"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "a,b,c,d,e,f");
+
         Ok(())
     }
 
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.evaluate()?])
+    #[test]
+    fn no_duplicates_distinct() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .build_two()?;
+
+        acc1.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc2.update_batch(&[data(["d", "e", "f"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str_sorted(acc1.evaluate()?, ",");
+
+        assert_eq!(result, "a,b,c,d,e,f");
+
+        Ok(())
     }
 
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        Ok(ScalarValue::LargeUtf8(self.values.clone()))
+    #[test]
+    fn duplicates_no_distinct() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",").build_two()?;
+
+        acc1.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc2.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "a,b,c,a,b,c");
+
+        Ok(())
     }
 
-    fn size(&self) -> usize {
-        size_of_val(self)
-            + self.values.as_ref().map(|v| v.capacity()).unwrap_or(0)
-            + self.delimiter.capacity()
+    #[test]
+    fn duplicates_distinct() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .build_two()?;
+
+        acc1.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc2.update_batch(&[data(["a", "b", "c"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str_sorted(acc1.evaluate()?, ",");
+
+        assert_eq!(result, "a,b,c");
+
+        Ok(())
+    }
+
+    #[test]
+    fn no_duplicates_distinct_sort_asc() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .order_by_col("col", SortOptions::new(false, false))
+            .build_two()?;
+
+        acc1.update_batch(&[data(["e", "b", "d"]), data([","])])?;
+        acc2.update_batch(&[data(["f", "a", "c"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "a,b,c,d,e,f");
+
+        Ok(())
+    }
+
+    #[test]
+    fn no_duplicates_distinct_sort_desc() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .order_by_col("col", SortOptions::new(true, false))
+            .build_two()?;
+
+        acc1.update_batch(&[data(["e", "b", "d"]), data([","])])?;
+        acc2.update_batch(&[data(["f", "a", "c"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "f,e,d,c,b,a");
+
+        Ok(())
+    }
+
+    #[test]
+    fn duplicates_distinct_sort_asc() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .order_by_col("col", SortOptions::new(false, false))
+            .build_two()?;
+
+        acc1.update_batch(&[data(["a", "c", "b"]), data([","])])?;
+        acc2.update_batch(&[data(["b", "c", "a"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "a,b,c");
+
+        Ok(())
+    }
+
+    #[test]
+    fn duplicates_distinct_sort_desc() -> Result<()> {
+        let (mut acc1, mut acc2) = StringAggAccumulatorBuilder::new(",")
+            .distinct()
+            .order_by_col("col", SortOptions::new(true, false))
+            .build_two()?;
+
+        acc1.update_batch(&[data(["a", "c", "b"]), data([","])])?;
+        acc2.update_batch(&[data(["b", "c", "a"]), data([","])])?;
+        acc1 = merge(acc1, acc2)?;
+
+        let result = some_str(acc1.evaluate()?);
+
+        assert_eq!(result, "c,b,a");
+
+        Ok(())
+    }
+
+    struct StringAggAccumulatorBuilder {
+        sep: String,
+        distinct: bool,
+        ordering: LexOrdering,
+        schema: Schema,
+    }
+
+    impl StringAggAccumulatorBuilder {
+        fn new(sep: &str) -> Self {
+            Self {
+                sep: sep.to_string(),
+                distinct: Default::default(),
+                ordering: Default::default(),
+                schema: Schema {
+                    fields: Fields::from(vec![Field::new(
+                        "col",
+                        DataType::LargeUtf8,
+                        true,
+                    )]),
+                    metadata: Default::default(),
+                },
+            }
+        }
+        fn distinct(mut self) -> Self {
+            self.distinct = true;
+            self
+        }
+
+        fn order_by_col(mut self, col: &str, sort_options: SortOptions) -> Self {
+            self.ordering.extend([PhysicalSortExpr::new(
+                Arc::new(
+                    Column::new_with_schema(col, &self.schema)
+                        .expect("column not available in schema"),
+                ),
+                sort_options,
+            )]);
+            self
+        }
+
+        fn build(&self) -> Result<Box<dyn Accumulator>> {
+            StringAgg::new().accumulator(AccumulatorArgs {
+                return_type: &DataType::LargeUtf8,
+                schema: &self.schema,
+                ignore_nulls: false,
+                ordering_req: &self.ordering,
+                is_reversed: false,
+                name: "",
+                is_distinct: self.distinct,
+                exprs: &[
+                    Arc::new(Column::new("col", 0)),
+                    Arc::new(Literal::new(ScalarValue::Utf8(Some(self.sep.to_string())))),
+                ],
+            })
+        }
+
+        fn build_two(&self) -> Result<(Box<dyn Accumulator>, Box<dyn Accumulator>)> {
+            Ok((self.build()?, self.build()?))
+        }
+    }
+
+    fn some_str(value: ScalarValue) -> String {
+        str(value)
+            .expect("ScalarValue was not a String")
+            .expect("ScalarValue was None")
+    }
+
+    fn some_str_sorted(value: ScalarValue, sep: &str) -> String {
+        let value = some_str(value);
+        let mut parts: Vec<&str> = value.split(sep).collect();
+        parts.sort();
+        parts.join(sep)
+    }
+
+    fn str(value: ScalarValue) -> Result<Option<String>> {
+        match value {
+            ScalarValue::LargeUtf8(v) => Ok(v),
+            _ => internal_err!(
+                "Expected ScalarValue::LargeUtf8, got {}",
+                value.data_type()
+            ),
+        }
+    }
+
+    fn data<const N: usize>(list: [&str; N]) -> ArrayRef {
+        Arc::new(LargeStringArray::from(list.to_vec()))
+    }
+
+    fn merge(
+        mut acc1: Box<dyn Accumulator>,
+        mut acc2: Box<dyn Accumulator>,
+    ) -> Result<Box<dyn Accumulator>> {
+        let intermediate_state = acc2.state().and_then(|e| {
+            e.iter()
+                .map(|v| v.to_array())
+                .collect::<Result<Vec<ArrayRef>>>()
+        })?;
+        acc1.merge_batch(&intermediate_state)?;
+        Ok(acc1)
     }
 }
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
index 76a1315c2d889..6539ca920ebc3 100644
--- a/datafusion/functions-aggregate/src/sum.rs
+++ b/datafusion/functions-aggregate/src/sum.rs
@@ -17,17 +17,14 @@
 
 //! Defines `SUM` and `SUM DISTINCT` aggregate accumulators
 
-use ahash::RandomState;
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use std::any::Any;
-use std::collections::HashSet;
-use std::mem::{size_of, size_of_val};
+use std::mem::size_of_val;
 
 use arrow::array::Array;
 use arrow::array::ArrowNativeTypeOp;
 use arrow::array::{ArrowNumericType, AsArray};
 use arrow::datatypes::ArrowNativeType;
-use arrow::datatypes::ArrowPrimitiveType;
 use arrow::datatypes::{
     DataType, Decimal128Type, Decimal256Type, Float64Type, Int64Type, UInt64Type,
     DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
@@ -44,7 +41,7 @@ use datafusion_expr::{
     SetMonotonicity, Signature, Volatility,
 };
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-use datafusion_functions_aggregate_common::utils::Hashable;
+use datafusion_functions_aggregate_common::aggregate::sum_distinct::DistinctSumAccumulator;
 use datafusion_macros::user_doc;
 
 make_udaf_expr_and_func!(
@@ -388,84 +385,3 @@ impl<T: ArrowNumericType> Accumulator for SlidingSumAccumulator<T> {
         true
     }
 }
-
-struct DistinctSumAccumulator<T: ArrowPrimitiveType> {
-    values: HashSet<Hashable<T::Native>, RandomState>,
-    data_type: DataType,
-}
-
-impl<T: ArrowPrimitiveType> std::fmt::Debug for DistinctSumAccumulator<T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "DistinctSumAccumulator({})", self.data_type)
-    }
-}
-
-impl<T: ArrowPrimitiveType> DistinctSumAccumulator<T> {
-    pub fn try_new(data_type: &DataType) -> Result<Self> {
-        Ok(Self {
-            values: HashSet::default(),
-            data_type: data_type.clone(),
-        })
-    }
-}
-
-impl<T: ArrowPrimitiveType> Accumulator for DistinctSumAccumulator<T> {
-    fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        // 1. Stores aggregate state in `ScalarValue::List`
-        // 2. Constructs `ScalarValue::List` state from distinct numeric stored in hash set
-        let state_out = {
-            let distinct_values = self
-                .values
-                .iter()
-                .map(|value| {
-                    ScalarValue::new_primitive::<T>(Some(value.0), &self.data_type)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            vec![ScalarValue::List(ScalarValue::new_list_nullable(
-                &distinct_values,
-                &self.data_type,
-            ))]
-        };
-        Ok(state_out)
-    }
-
-    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        if values.is_empty() {
-            return Ok(());
-        }
-
-        let array = values[0].as_primitive::<T>();
-        match array.nulls().filter(|x| x.null_count() > 0) {
-            Some(n) => {
-                for idx in n.valid_indices() {
-                    self.values.insert(Hashable(array.value(idx)));
-                }
-            }
-            None => array.values().iter().for_each(|x| {
-                self.values.insert(Hashable(*x));
-            }),
-        }
-        Ok(())
-    }
-
-    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-        for x in states[0].as_list::<i32>().iter().flatten() {
-            self.update_batch(&[x])?
-        }
-        Ok(())
-    }
-
-    fn evaluate(&mut self) -> Result<ScalarValue> {
-        let mut acc = T::Native::usize_as(0);
-        for distinct_value in self.values.iter() {
-            acc = acc.add_wrapping(distinct_value.0)
-        }
-        let v = (!self.values.is_empty()).then_some(acc);
-        ScalarValue::new_primitive::<T>(v, &self.data_type)
-    }
-
-    fn size(&self) -> usize {
-        size_of_val(self) + self.values.capacity() * size_of::<T::Native>()
-    }
-}
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index 48ee341566b90..5ef1491313b13 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -271,7 +271,7 @@ fn array_has_dispatch_for_scalar<O: OffsetSizeTrait>(
     let offsets = haystack.value_offsets();
     // If first argument is empty list (second argument is non-null), return false
     // i.e. array_has([], non-null element) -> false
-    if values.len() == 0 {
+    if values.is_empty() {
         return Ok(Arc::new(BooleanArray::new(
             BooleanBuffer::new_unset(haystack.len()),
             None,
@@ -488,7 +488,7 @@ fn array_has_all_and_any_dispatch<O: OffsetSizeTrait>(
 ) -> Result<ArrayRef> {
     let haystack = as_generic_list_array::<O>(haystack)?;
     let needle = as_generic_list_array::<O>(needle)?;
-    if needle.values().len() == 0 {
+    if needle.values().is_empty() {
         let buffer = match comparison_type {
             ComparisonType::All => BooleanBuffer::new_set(haystack.len()),
             ComparisonType::Any => BooleanBuffer::new_unset(haystack.len()),
diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
index f288035948dcb..4279f04e3dc44 100644
--- a/datafusion/functions-nested/src/flatten.rs
+++ b/datafusion/functions-nested/src/flatten.rs
@@ -18,19 +18,18 @@
 //! [`ScalarUDFImpl`] definitions for flatten function.
 
 use crate::utils::make_scalar_function;
-use arrow::array::{ArrayRef, GenericListArray, OffsetSizeTrait};
+use arrow::array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait};
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::{
     DataType,
     DataType::{FixedSizeList, LargeList, List, Null},
 };
-use datafusion_common::cast::{
-    as_generic_list_array, as_large_list_array, as_list_array,
-};
+use datafusion_common::cast::{as_large_list_array, as_list_array};
+use datafusion_common::utils::ListCoercion;
 use datafusion_common::{exec_err, utils::take_function_args, Result};
 use datafusion_expr::{
-    ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
-    TypeSignature, Volatility,
+    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Documentation,
+    ScalarUDFImpl, Signature, TypeSignature, Volatility,
 };
 use datafusion_macros::user_doc;
 use std::any::Any;
@@ -77,9 +76,11 @@ impl Flatten {
     pub fn new() -> Self {
         Self {
             signature: Signature {
-                // TODO (https://github.com/apache/datafusion/issues/13757) flatten should be single-step, not recursive
                 type_signature: TypeSignature::ArraySignature(
-                    ArrayFunctionSignature::RecursiveArray,
+                    ArrayFunctionSignature::Array {
+                        arguments: vec![ArrayFunctionArgument::Array],
+                        array_coercion: Some(ListCoercion::FixedSizedListToList),
+                    },
                 ),
                 volatility: Volatility::Immutable,
             },
@@ -102,25 +103,23 @@ impl ScalarUDFImpl for Flatten {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        fn get_base_type(data_type: &DataType) -> Result<DataType> {
-            match data_type {
-                List(field) | FixedSizeList(field, _)
-                    if matches!(field.data_type(), List(_) | FixedSizeList(_, _)) =>
-                {
-                    get_base_type(field.data_type())
-                }
-                LargeList(field) if matches!(field.data_type(), LargeList(_)) => {
-                    get_base_type(field.data_type())
+        let data_type = match &arg_types[0] {
+            List(field) | FixedSizeList(field, _) => match field.data_type() {
+                List(field) | FixedSizeList(field, _) => List(Arc::clone(field)),
+                _ => arg_types[0].clone(),
+            },
+            LargeList(field) => match field.data_type() {
+                List(field) | LargeList(field) | FixedSizeList(field, _) => {
+                    LargeList(Arc::clone(field))
                 }
-                Null | List(_) | LargeList(_) => Ok(data_type.to_owned()),
-                FixedSizeList(field, _) => Ok(List(Arc::clone(field))),
-                _ => exec_err!(
-                    "Not reachable, data_type should be List, LargeList or FixedSizeList"
-                ),
-            }
-        }
+                _ => arg_types[0].clone(),
+            },
+            Null => Null,
+            _ => exec_err!(
+                "Not reachable, data_type should be List, LargeList or FixedSizeList"
+            )?,
+        };
 
-        let data_type = get_base_type(&arg_types[0])?;
         Ok(data_type)
     }
 
@@ -146,14 +145,62 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     match array.data_type() {
         List(_) => {
-            let list_arr = as_list_array(&array)?;
-            let flattened_array = flatten_internal::<i32>(list_arr.clone(), None)?;
-            Ok(Arc::new(flattened_array) as ArrayRef)
+            let (field, offsets, values, nulls) =
+                as_list_array(&array)?.clone().into_parts();
+
+            match field.data_type() {
+                List(_) => {
+                    let (inner_field, inner_offsets, inner_values, _) =
+                        as_list_array(&values)?.clone().into_parts();
+                    let offsets = get_offsets_for_flatten::<i32>(inner_offsets, offsets);
+                    let flattened_array = GenericListArray::<i32>::new(
+                        inner_field,
+                        offsets,
+                        inner_values,
+                        nulls,
+                    );
+
+                    Ok(Arc::new(flattened_array) as ArrayRef)
+                }
+                LargeList(_) => {
+                    exec_err!("flatten does not support type '{:?}'", array.data_type())?
+                }
+                _ => Ok(Arc::clone(array) as ArrayRef),
+            }
         }
         LargeList(_) => {
-            let list_arr = as_large_list_array(&array)?;
-            let flattened_array = flatten_internal::<i64>(list_arr.clone(), None)?;
-            Ok(Arc::new(flattened_array) as ArrayRef)
+            let (field, offsets, values, nulls) =
+                as_large_list_array(&array)?.clone().into_parts();
+
+            match field.data_type() {
+                List(_) => {
+                    let (inner_field, inner_offsets, inner_values, _) =
+                        as_list_array(&values)?.clone().into_parts();
+                    let offsets = get_large_offsets_for_flatten(inner_offsets, offsets);
+                    let flattened_array = GenericListArray::<i64>::new(
+                        inner_field,
+                        offsets,
+                        inner_values,
+                        nulls,
+                    );
+
+                    Ok(Arc::new(flattened_array) as ArrayRef)
+                }
+                LargeList(_) => {
+                    let (inner_field, inner_offsets, inner_values, nulls) =
+                        as_large_list_array(&values)?.clone().into_parts();
+                    let offsets = get_offsets_for_flatten::<i64>(inner_offsets, offsets);
+                    let flattened_array = GenericListArray::<i64>::new(
+                        inner_field,
+                        offsets,
+                        inner_values,
+                        nulls,
+                    );
+
+                    Ok(Arc::new(flattened_array) as ArrayRef)
+                }
+                _ => Ok(Arc::clone(array) as ArrayRef),
+            }
         }
         Null => Ok(Arc::clone(array)),
         _ => {
@@ -162,37 +209,6 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-fn flatten_internal<O: OffsetSizeTrait>(
-    list_arr: GenericListArray<O>,
-    indexes: Option<OffsetBuffer<O>>,
-) -> Result<GenericListArray<O>> {
-    let (field, offsets, values, _) = list_arr.clone().into_parts();
-    let data_type = field.data_type();
-
-    match data_type {
-        // Recursively get the base offsets for flattened array
-        List(_) | LargeList(_) => {
-            let sub_list = as_generic_list_array::<O>(&values)?;
-            if let Some(indexes) = indexes {
-                let offsets = get_offsets_for_flatten(offsets, indexes);
-                flatten_internal::<O>(sub_list.clone(), Some(offsets))
-            } else {
-                flatten_internal::<O>(sub_list.clone(), Some(offsets))
-            }
-        }
-        // Reach the base level, create a new list array
-        _ => {
-            if let Some(indexes) = indexes {
-                let offsets = get_offsets_for_flatten(offsets, indexes);
-                let list_arr = GenericListArray::<O>::new(field, offsets, values, None);
-                Ok(list_arr)
-            } else {
-                Ok(list_arr)
-            }
-        }
-    }
-}
-
 // Create new offsets that are equivalent to `flatten` the array.
 fn get_offsets_for_flatten<O: OffsetSizeTrait>(
     offsets: OffsetBuffer<O>,
@@ -205,3 +221,16 @@ fn get_offsets_for_flatten<O: OffsetSizeTrait>(
         .collect();
     OffsetBuffer::new(offsets.into())
 }
+
+// Create new large offsets that are equivalent to `flatten` the array.
+fn get_large_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
+    offsets: OffsetBuffer<O>,
+    indexes: OffsetBuffer<P>,
+) -> OffsetBuffer<i64> {
+    let buffer = offsets.into_inner();
+    let offsets: Vec<i64> = indexes
+        .iter()
+        .map(|i| buffer[i.to_usize().unwrap()].to_i64().unwrap())
+        .collect();
+    OffsetBuffer::new(offsets.into())
+}
diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs
index 1db245fe52fed..85737ef135bce 100644
--- a/datafusion/functions-nested/src/sort.rs
+++ b/datafusion/functions-nested/src/sort.rs
@@ -20,6 +20,7 @@
 use crate::utils::make_scalar_function;
 use arrow::array::{new_null_array, Array, ArrayRef, ListArray, NullBufferBuilder};
 use arrow::buffer::OffsetBuffer;
+use arrow::compute::SortColumn;
 use arrow::datatypes::DataType::{FixedSizeList, LargeList, List};
 use arrow::datatypes::{DataType, Field};
 use arrow::{compute, compute::SortOptions};
@@ -207,9 +208,24 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             valid.append_null();
         } else {
             let arr_ref = list_array.value(i);
-            let arr_ref = arr_ref.as_ref();
 
-            let sorted_array = compute::sort(arr_ref, sort_option)?;
+            // arrow sort kernel does not support Structs, so use
+            // lexsort_to_indices instead:
+            // https://github.com/apache/arrow-rs/issues/6911#issuecomment-2562928843
+            let sorted_array = match arr_ref.data_type() {
+                DataType::Struct(_) => {
+                    let sort_columns: Vec<SortColumn> = vec![SortColumn {
+                        values: Arc::clone(&arr_ref),
+                        options: sort_option,
+                    }];
+                    let indices = compute::lexsort_to_indices(&sort_columns, None)?;
+                    compute::take(arr_ref.as_ref(), &indices, None)?
+                }
+                _ => {
+                    let arr_ref = arr_ref.as_ref();
+                    compute::sort(arr_ref, sort_option)?
+                }
+            };
             array_lengths.push(sorted_array.len());
             arrays.push(sorted_array);
             valid.append_non_null();
diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs
index 5bb56f28bc8d3..ee95567ab73dc 100644
--- a/datafusion/functions-table/src/generate_series.rs
+++ b/datafusion/functions-table/src/generate_series.rs
@@ -138,12 +138,15 @@ impl TableProvider for GenerateSeriesTable {
     async fn scan(
         &self,
         state: &dyn Session,
-        _projection: Option<&Vec<usize>>,
+        projection: Option<&Vec<usize>>,
         _filters: &[Expr],
         _limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let batch_size = state.config_options().execution.batch_size;
-
+        let schema = match projection {
+            Some(projection) => Arc::new(self.schema.project(projection)?),
+            None => self.schema(),
+        };
         let state = match self.args {
             // if args have null, then return 0 row
             GenSeriesArgs::ContainsNull { include_end, name } => GenerateSeriesState {
@@ -175,7 +178,7 @@ impl TableProvider for GenerateSeriesTable {
         };
 
         Ok(Arc::new(LazyMemoryExec::try_new(
-            self.schema(),
+            schema,
             vec![Arc::new(RwLock::new(state))],
         )?))
     }
diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs
index 1d99fe7acf152..76e27b045b0a3 100644
--- a/datafusion/functions-window-common/src/expr.rs
+++ b/datafusion/functions-window-common/src/expr.rs
@@ -36,9 +36,9 @@ impl<'a> ExpressionArgs<'a> {
     /// # Arguments
     ///
     /// * `input_exprs` - The expressions passed as arguments
-    ///     to the user-defined window function.
+    ///   to the user-defined window function.
     /// * `input_types` - The data types corresponding to the
-    ///     arguments to the user-defined window function.
+    ///   arguments to the user-defined window function.
     ///
     pub fn new(
         input_exprs: &'a [Arc<dyn PhysicalExpr>],
diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs
index 8011b7b0f05f0..03f88b0b95cc8 100644
--- a/datafusion/functions-window-common/src/field.rs
+++ b/datafusion/functions-window-common/src/field.rs
@@ -33,9 +33,9 @@ impl<'a> WindowUDFFieldArgs<'a> {
     /// # Arguments
     ///
     /// * `input_types` - The data types corresponding to the
-    ///     arguments to the user-defined window function.
+    ///   arguments to the user-defined window function.
     /// * `function_name` - The qualified schema name of the
-    ///     user-defined window function expression.
+    ///   user-defined window function expression.
     ///
     pub fn new(input_types: &'a [DataType], display_name: &'a str) -> Self {
         WindowUDFFieldArgs {
diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs
index 64786d2fe7c70..e853aa8fb05d5 100644
--- a/datafusion/functions-window-common/src/partition.rs
+++ b/datafusion/functions-window-common/src/partition.rs
@@ -41,13 +41,13 @@ impl<'a> PartitionEvaluatorArgs<'a> {
     /// # Arguments
     ///
     /// * `input_exprs` - The expressions passed as arguments
-    ///     to the user-defined window function.
+    ///   to the user-defined window function.
     /// * `input_types` - The data types corresponding to the
-    ///     arguments to the user-defined window function.
+    ///   arguments to the user-defined window function.
     /// * `is_reversed` - Set to `true` if and only if the user-defined
-    ///     window function is reversible and is reversed.
+    ///   window function is reversible and is reversed.
     /// * `ignore_nulls` - Set to `true` when `IGNORE NULLS` is
-    ///     specified.
+    ///   specified.
     ///
     pub fn new(
         input_exprs: &'a [Arc<dyn PhysicalExpr>],
diff --git a/datafusion/functions-window/src/cume_dist.rs b/datafusion/functions-window/src/cume_dist.rs
index d777f7932b0e6..d156416a82a4b 100644
--- a/datafusion/functions-window/src/cume_dist.rs
+++ b/datafusion/functions-window/src/cume_dist.rs
@@ -43,8 +43,23 @@ define_udwf_and_expr!(
 /// CumeDist calculates the cume_dist in the window function with order by
 #[user_doc(
     doc_section(label = "Ranking Functions"),
-    description = "Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).",
-    syntax_example = "cume_dist()"
+    description = "Relative rank of the current row: (number of rows preceding or peer with the current row) / (total rows).",
+    syntax_example = "cume_dist()",
+    sql_example = r#"```sql
+    --Example usage of the cume_dist window function:
+    SELECT salary,
+       cume_dist() OVER (ORDER BY salary) AS cume_dist
+    FROM employees;
+```
+```sql
++--------+-----------+
+| salary | cume_dist |
++--------+-----------+
+| 30000  | 0.33      |
+| 50000  | 0.67      |
+| 70000  | 1.00      |
++--------+-----------+
+```"#
 )]
 #[derive(Debug)]
 pub struct CumeDist {
@@ -113,7 +128,7 @@ impl PartitionEvaluator for CumeDistEvaluator {
                     let len = range.end - range.start;
                     *acc += len as u64;
                     let value: f64 = (*acc as f64) / scalar;
-                    let result = iter::repeat(value).take(len);
+                    let result = iter::repeat_n(value, len);
                     Some(result)
                 })
                 .flatten(),
diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs
index 0a86ba6255330..2ef1eacba953d 100644
--- a/datafusion/functions-window/src/macros.rs
+++ b/datafusion/functions-window/src/macros.rs
@@ -29,12 +29,12 @@
 /// # Parameters
 ///
 /// * `$UDWF`: The struct which defines the [`Signature`](datafusion_expr::Signature)
-///     of the user-defined window function.
+///   of the user-defined window function.
 /// * `$OUT_FN_NAME`: The basename to generate a unique function name like
-///     `$OUT_FN_NAME_udwf`.
+///   `$OUT_FN_NAME_udwf`.
 /// * `$DOC`: Doc comments for UDWF.
 /// * (optional) `$CTOR`: Pass a custom constructor. When omitted it
-///     automatically resolves to `$UDWF::default()`.
+///   automatically resolves to `$UDWF::default()`.
 ///
 /// # Example
 ///
@@ -122,13 +122,13 @@ macro_rules! get_or_init_udwf {
 /// # Parameters
 ///
 /// * `$UDWF`: The struct which defines the [`Signature`] of the
-///     user-defined window function.
+///   user-defined window function.
 /// * `$OUT_FN_NAME`: The basename to generate a unique function name like
-///     `$OUT_FN_NAME_udwf`.
+///   `$OUT_FN_NAME_udwf`.
 /// * `$DOC`: Doc comments for UDWF.
 /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters
-///     for the generated function. The type of parameters is [`Expr`].
-///     When omitted this creates a function with zero parameters.
+///   for the generated function. The type of parameters is [`Expr`].
+///   When omitted this creates a function with zero parameters.
 ///
 /// [`Signature`]: datafusion_expr::Signature
 /// [`Expr`]: datafusion_expr::Expr
@@ -332,15 +332,15 @@ macro_rules! create_udwf_expr {
 /// # Arguments
 ///
 /// * `$UDWF`: The struct which defines the [`Signature`] of the
-///     user-defined window function.
+///   user-defined window function.
 /// * `$OUT_FN_NAME`: The basename to generate a unique function name like
-///     `$OUT_FN_NAME_udwf`.
+///   `$OUT_FN_NAME_udwf`.
 /// * (optional) `[$($PARAM:ident),+]`: An array of 1 or more parameters
-///     for the generated function. The type of parameters is [`Expr`].
-///     When omitted this creates a function with zero parameters.
+///   for the generated function. The type of parameters is [`Expr`].
+///   When omitted this creates a function with zero parameters.
 /// * `$DOC`: Doc comments for UDWF.
 /// * (optional) `$CTOR`: Pass a custom constructor. When omitted it
-///     automatically resolves to `$UDWF::default()`.
+///   automatically resolves to `$UDWF::default()`.
 ///
 /// [`Signature`]: datafusion_expr::Signature
 /// [`Expr`]: datafusion_expr::Expr
diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs
index 1c781bd8e5f3f..36e6b83d61ce4 100644
--- a/datafusion/functions-window/src/nth_value.rs
+++ b/datafusion/functions-window/src/nth_value.rs
@@ -160,16 +160,49 @@ fn get_last_value_doc() -> &'static Documentation {
 static NTH_VALUE_DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
     Documentation::builder(
         DOC_SECTION_ANALYTICAL,
-        "Returns value evaluated at the row that is the nth row of the window \
-            frame (counting from 1); null if no such row.",
+        "Returns the value evaluated at the nth row of the window frame \
+         (counting from 1). Returns NULL if no such row exists.",
         "nth_value(expression, n)",
     )
     .with_argument(
         "expression",
-        "The name the column of which nth \
-        value to retrieve",
+        "The column from which to retrieve the nth value.",
+    )
+    .with_argument(
+        "n",
+        "Integer. Specifies the row number (starting from 1) in the window frame.",
+    )
+    .with_sql_example(
+        r#"```sql
+-- Sample employees table:
+CREATE TABLE employees (id INT, salary INT);
+INSERT INTO employees (id, salary) VALUES
+(1, 30000),
+(2, 40000),
+(3, 50000),
+(4, 60000),
+(5, 70000);
+
+-- Example usage of nth_value:
+SELECT nth_value(salary, 2) OVER (
+  ORDER BY salary
+  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+) AS nth_value
+FROM employees;
+```
+
+```text
++-----------+
+| nth_value |
++-----------+
+| 40000     |
+| 40000     |
+| 40000     |
+| 40000     |
+| 40000     |
++-----------+
+```"#,
     )
-    .with_argument("n", "Integer. Specifies the n in nth")
     .build()
 });
 
diff --git a/datafusion/functions-window/src/rank.rs b/datafusion/functions-window/src/rank.rs
index bd2edc5722eb6..2ff2c31d8c2aa 100644
--- a/datafusion/functions-window/src/rank.rs
+++ b/datafusion/functions-window/src/rank.rs
@@ -261,7 +261,7 @@ impl PartitionEvaluator for RankEvaluator {
                     .iter()
                     .scan(1_u64, |acc, range| {
                         let len = range.end - range.start;
-                        let result = iter::repeat(*acc).take(len);
+                        let result = iter::repeat_n(*acc, len);
                         *acc += len as u64;
                         Some(result)
                     })
@@ -274,7 +274,7 @@ impl PartitionEvaluator for RankEvaluator {
                     .zip(1u64..)
                     .flat_map(|(range, rank)| {
                         let len = range.end - range.start;
-                        iter::repeat(rank).take(len)
+                        iter::repeat_n(rank, len)
                     }),
             )),
 
@@ -287,7 +287,7 @@ impl PartitionEvaluator for RankEvaluator {
                         .scan(0_u64, |acc, range| {
                             let len = range.end - range.start;
                             let value = (*acc as f64) / (denominator - 1.0).max(1.0);
-                            let result = iter::repeat(value).take(len);
+                            let result = iter::repeat_n(value, len);
                             *acc += len as u64;
                             Some(result)
                         })
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 31ff55121b771..729770b8a65c6 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -66,7 +66,7 @@ arrow = { workspace = true }
 arrow-buffer = { workspace = true }
 base64 = { version = "0.22", optional = true }
 blake2 = { version = "^0.10.2", optional = true }
-blake3 = { version = "1.7", optional = true }
+blake3 = { version = "1.8", optional = true }
 chrono = { workspace = true }
 datafusion-common = { workspace = true }
 datafusion-doc = { workspace = true }
diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs
index 4750fb4666532..8575809c21c8b 100644
--- a/datafusion/functions/benches/chr.rs
+++ b/datafusion/functions/benches/chr.rs
@@ -17,15 +17,21 @@
 
 extern crate criterion;
 
-use arrow::{array::PrimitiveArray, datatypes::Int64Type, util::test_util::seedable_rng};
+use arrow::{array::PrimitiveArray, datatypes::Int64Type};
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string::chr;
-use rand::Rng;
+use rand::{Rng, SeedableRng};
 
 use arrow::datatypes::DataType;
+use rand::rngs::StdRng;
 use std::sync::Arc;
 
+/// Returns fixed seedable RNG
+pub fn seedable_rng() -> StdRng {
+    StdRng::seed_from_u64(42)
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     let cot_fn = chr();
     let size = 1024;
diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs
index 1f99cc3a5f0bc..3a1a6a71173e8 100644
--- a/datafusion/functions/benches/regx.rs
+++ b/datafusion/functions/benches/regx.rs
@@ -197,7 +197,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let regex = Arc::new(regex(&mut rng)) as ArrayRef;
         let flags = Arc::new(flags(&mut rng)) as ArrayRef;
         let replacement =
-            Arc::new(StringArray::from_iter_values(iter::repeat("XX").take(1000)))
+            Arc::new(StringArray::from_iter_values(iter::repeat_n("XX", 1000)))
                 as ArrayRef;
 
         b.iter(|| {
@@ -219,9 +219,9 @@ fn criterion_benchmark(c: &mut Criterion) {
         let regex = cast(&regex(&mut rng), &DataType::Utf8View).unwrap();
         // flags are not allowed to be utf8view according to the function
         let flags = Arc::new(flags(&mut rng)) as ArrayRef;
-        let replacement = Arc::new(StringViewArray::from_iter_values(
-            iter::repeat("XX").take(1000),
-        ));
+        let replacement = Arc::new(StringViewArray::from_iter_values(iter::repeat_n(
+            "XX", 1000,
+        )));
 
         b.iter(|| {
             black_box(
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index f1c61fe2b964d..52c86733f3327 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -18,15 +18,14 @@
 use std::any::Any;
 use std::sync::Arc;
 
+use crate::datetime::common::*;
 use arrow::datatypes::DataType::*;
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
 use arrow::datatypes::{
     ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
     TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
 };
-
-use crate::datetime::common::*;
-use datafusion_common::{exec_err, Result, ScalarType};
+use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -329,6 +328,30 @@ impl ScalarUDFImpl for ToTimestampFunc {
             Utf8View | LargeUtf8 | Utf8 => {
                 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
             }
+            Decimal128(_, _) => {
+                match &args[0] {
+                    ColumnarValue::Scalar(ScalarValue::Decimal128(
+                        Some(value),
+                        _,
+                        scale,
+                    )) => {
+                        // Convert decimal to seconds and nanoseconds
+                        let scale_factor = 10_i128.pow(*scale as u32);
+                        let seconds = value / scale_factor;
+                        let fraction = value % scale_factor;
+
+                        let nanos = (fraction * 1_000_000_000) / scale_factor;
+
+                        let timestamp_nanos = seconds * 1_000_000_000 + nanos;
+
+                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
+                            Some(timestamp_nanos as i64),
+                            None,
+                        )))
+                    }
+                    _ => exec_err!("Invalid decimal value"),
+                }
+            }
             other => {
                 exec_err!(
                     "Unsupported data type {:?} for function to_timestamp",
@@ -377,7 +400,7 @@ impl ScalarUDFImpl for ToTimestampSecondsFunc {
         }
 
         match args[0].data_type() {
-            Null | Int32 | Int64 | Timestamp(_, None) => {
+            Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
                 args[0].cast_to(&Timestamp(Second, None), None)
             }
             Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index 3413b365f67de..60358d20e2a1a 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -55,9 +55,15 @@ regex-syntax = "0.8.0"
 
 [dev-dependencies]
 async-trait = { workspace = true }
+criterion = { workspace = true }
 ctor = { workspace = true }
 datafusion-functions-aggregate = { workspace = true }
 datafusion-functions-window = { workspace = true }
 datafusion-functions-window-common = { workspace = true }
 datafusion-sql = { workspace = true }
 env_logger = { workspace = true }
+insta = { workspace = true }
+
+[[bench]]
+name = "projection_unnecessary"
+harness = false
diff --git a/datafusion/optimizer/benches/projection_unnecessary.rs b/datafusion/optimizer/benches/projection_unnecessary.rs
new file mode 100644
index 0000000000000..100ee97542ebb
--- /dev/null
+++ b/datafusion/optimizer/benches/projection_unnecessary.rs
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, Field, Schema};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_common::ToDFSchema;
+use datafusion_common::{Column, TableReference};
+use datafusion_expr::{logical_plan::LogicalPlan, projection_schema, Expr};
+use datafusion_optimizer::optimize_projections::is_projection_unnecessary;
+use std::sync::Arc;
+
+fn is_projection_unnecessary_old(
+    input: &LogicalPlan,
+    proj_exprs: &[Expr],
+) -> datafusion_common::Result<bool> {
+    // First check if all expressions are trivial (cheaper operation than `projection_schema`)
+    if !proj_exprs
+        .iter()
+        .all(|expr| matches!(expr, Expr::Column(_) | Expr::Literal(_)))
+    {
+        return Ok(false);
+    }
+    let proj_schema = projection_schema(input, proj_exprs)?;
+    Ok(&proj_schema == input.schema())
+}
+
+fn create_plan_with_many_exprs(num_exprs: usize) -> (LogicalPlan, Vec<Expr>) {
+    // Create schema with many fields
+    let fields = (0..num_exprs)
+        .map(|i| Field::new(format!("col{}", i), DataType::Int32, false))
+        .collect::<Vec<_>>();
+    let schema = Schema::new(fields);
+
+    // Create table scan
+    let table_scan = LogicalPlan::EmptyRelation(datafusion_expr::EmptyRelation {
+        produce_one_row: true,
+        schema: Arc::new(schema.clone().to_dfschema().unwrap()),
+    });
+
+    // Create projection expressions (just column references)
+    let exprs = (0..num_exprs)
+        .map(|i| Expr::Column(Column::new(None::<TableReference>, format!("col{}", i))))
+        .collect();
+
+    (table_scan, exprs)
+}
+
+fn benchmark_is_projection_unnecessary(c: &mut Criterion) {
+    let (plan, exprs) = create_plan_with_many_exprs(1000);
+
+    let mut group = c.benchmark_group("projection_unnecessary_comparison");
+
+    group.bench_function("is_projection_unnecessary_new", |b| {
+        b.iter(|| black_box(is_projection_unnecessary(&plan, &exprs).unwrap()))
+    });
+
+    group.bench_function("is_projection_unnecessary_old", |b| {
+        b.iter(|| black_box(is_projection_unnecessary_old(&plan, &exprs).unwrap()))
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, benchmark_is_projection_unnecessary);
+criterion_main!(benches);
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 71ff863b51a18..418619c8399e3 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -501,10 +501,7 @@ fn agg_exprs_evaluation_result_on_empty_batch(
         let info = SimplifyContext::new(&props).with_schema(Arc::clone(schema));
         let simplifier = ExprSimplifier::new(info);
         let result_expr = simplifier.simplify(result_expr)?;
-        if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) {
-            expr_result_map_for_count_bug
-                .insert(e.schema_name().to_string(), result_expr);
-        }
+        expr_result_map_for_count_bug.insert(e.schema_name().to_string(), result_expr);
     }
     Ok(())
 }
diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs
index b3a09e2dcbcc7..4452b2d4ce034 100644
--- a/datafusion/optimizer/src/optimize_projections/mod.rs
+++ b/datafusion/optimizer/src/optimize_projections/mod.rs
@@ -31,8 +31,7 @@ use datafusion_common::{
 use datafusion_expr::expr::Alias;
 use datafusion_expr::Unnest;
 use datafusion_expr::{
-    logical_plan::LogicalPlan, projection_schema, Aggregate, Distinct, Expr, Projection,
-    TableScan, Window,
+    logical_plan::LogicalPlan, Aggregate, Distinct, Expr, Projection, TableScan, Window,
 };
 
 use crate::optimize_projections::required_indices::RequiredIndices;
@@ -455,6 +454,17 @@ fn merge_consecutive_projections(proj: Projection) -> Result<Transformed<Project
         return Projection::try_new_with_schema(expr, input, schema).map(Transformed::no);
     };
 
+    // A fast path: if the previous projection is same as the current projection
+    // we can directly remove the current projection and return child projection.
+    if prev_projection.expr == expr {
+        return Projection::try_new_with_schema(
+            expr,
+            Arc::clone(&prev_projection.input),
+            schema,
+        )
+        .map(Transformed::yes);
+    }
+
     // Count usages (referrals) of each projection expression in its input fields:
     let mut column_referral_map = HashMap::<&Column, usize>::new();
     expr.iter()
@@ -774,9 +784,24 @@ fn rewrite_projection_given_requirements(
 /// Projection is unnecessary, when
 /// - input schema of the projection, output schema of the projection are same, and
 /// - all projection expressions are either Column or Literal
-fn is_projection_unnecessary(input: &LogicalPlan, proj_exprs: &[Expr]) -> Result<bool> {
-    let proj_schema = projection_schema(input, proj_exprs)?;
-    Ok(&proj_schema == input.schema() && proj_exprs.iter().all(is_expr_trivial))
+pub fn is_projection_unnecessary(
+    input: &LogicalPlan,
+    proj_exprs: &[Expr],
+) -> Result<bool> {
+    // First check if the number of expressions is equal to the number of fields in the input schema.
+    if proj_exprs.len() != input.schema().fields().len() {
+        return Ok(false);
+    }
+    Ok(input.schema().iter().zip(proj_exprs.iter()).all(
+        |((field_relation, field_name), expr)| {
+            // Check if the expression is a column and if it matches the field name
+            if let Expr::Column(col) = expr {
+                col.relation.as_ref() == field_relation && col.name.eq(field_name.name())
+            } else {
+                false
+            }
+        },
+    ))
 }
 
 #[cfg(test)]
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index ffbb95cb7f74e..b40121dbfeb7e 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -506,8 +506,11 @@ mod tests {
         });
         let err = opt.optimize(plan, &config, &observe).unwrap_err();
 
-        // Simplify assert to check the error message contains the expected message, which is only the schema length mismatch
-        assert_contains!(err.strip_backtrace(), "Schema mismatch: the schema length are not same Expected schema length: 3, got: 0");
+        // Simplify assert to check the error message contains the expected message
+        assert_contains!(
+            err.strip_backtrace(),
+            "Failed due to a difference in schemas: original schema: DFSchema"
+        );
     }
 
     #[test]
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 499447861a58b..5c89bc29a596a 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -22,9 +22,10 @@ use std::sync::Arc;
 
 use crate::decorrelate::{PullUpCorrelatedExpr, UN_MATCHED_ROW_INDICATOR};
 use crate::optimizer::ApplyOrder;
-use crate::utils::replace_qualified_name;
+use crate::utils::{evaluates_to_null, replace_qualified_name};
 use crate::{OptimizerConfig, OptimizerRule};
 
+use crate::analyzer::type_coercion::TypeCoercionRewriter;
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion, TreeNodeRewriter,
@@ -348,6 +349,10 @@ fn build_join(
     let mut computation_project_expr = HashMap::new();
     if let Some(expr_map) = collected_count_expr_map {
         for (name, result) in expr_map {
+            if evaluates_to_null(result.clone(), result.column_refs())? {
+                // If expr always returns null when column is null, skip processing
+                continue;
+            }
             let computer_expr = if let Some(filter) = &pull_up.pull_up_having_expr {
                 Expr::Case(expr::Case {
                     expr: None,
@@ -381,7 +386,11 @@ fn build_join(
                     )))),
                 })
             };
-            computation_project_expr.insert(name, computer_expr);
+            let mut expr_rewrite = TypeCoercionRewriter {
+                schema: new_plan.schema(),
+            };
+            computation_project_expr
+                .insert(name, computer_expr.rewrite(&mut expr_rewrite).data()?);
         }
     }
 
@@ -425,18 +434,18 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: Int32(1) < __scalar_sq_1.max(orders.o_custkey) AND Int32(1) < __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: __scalar_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: Int32(1) < __scalar_sq_1.max(orders.o_custkey) AND Int32(1) < __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: __scalar_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n        SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n            Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+            \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
             plan,
@@ -480,19 +489,19 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_acctbal < __scalar_sq_1.sum(orders.o_totalprice) [c_custkey:Int64, c_name:Utf8, sum(orders.o_totalprice):Float64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, sum(orders.o_totalprice):Float64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [sum(orders.o_totalprice):Float64;N, o_custkey:Int64]\
-        \n        Projection: sum(orders.o_totalprice), orders.o_custkey [sum(orders.o_totalprice):Float64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[sum(orders.o_totalprice)]] [o_custkey:Int64, sum(orders.o_totalprice):Float64;N]\
-        \n            Filter: orders.o_totalprice < __scalar_sq_2.sum(lineitem.l_extendedprice) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N]\
-        \n              Left Join:  Filter: __scalar_sq_2.l_orderkey = orders.o_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N]\
-        \n                TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n                SubqueryAlias: __scalar_sq_2 [sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64]\
-        \n                  Projection: sum(lineitem.l_extendedprice), lineitem.l_orderkey [sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64]\
-        \n                    Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_extendedprice)]] [l_orderkey:Int64, sum(lineitem.l_extendedprice):Float64;N]\
-        \n                      TableScan: lineitem [l_orderkey:Int64, l_partkey:Int64, l_suppkey:Int64, l_linenumber:Int32, l_quantity:Float64, l_extendedprice:Float64]";
+            \n  Filter: customer.c_acctbal < __scalar_sq_1.sum(orders.o_totalprice) [c_custkey:Int64, c_name:Utf8, sum(orders.o_totalprice):Float64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, sum(orders.o_totalprice):Float64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [sum(orders.o_totalprice):Float64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: sum(orders.o_totalprice), orders.o_custkey, __always_true [sum(orders.o_totalprice):Float64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[sum(orders.o_totalprice)]] [o_custkey:Int64, __always_true:Boolean, sum(orders.o_totalprice):Float64;N]\
+            \n            Filter: orders.o_totalprice < __scalar_sq_2.sum(lineitem.l_extendedprice) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N, __always_true:Boolean;N]\
+            \n              Left Join:  Filter: __scalar_sq_2.l_orderkey = orders.o_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N, __always_true:Boolean;N]\
+            \n                TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+            \n                SubqueryAlias: __scalar_sq_2 [sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64, __always_true:Boolean]\
+            \n                  Projection: sum(lineitem.l_extendedprice), lineitem.l_orderkey, __always_true [sum(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64, __always_true:Boolean]\
+            \n                    Aggregate: groupBy=[[lineitem.l_orderkey, Boolean(true) AS __always_true]], aggr=[[sum(lineitem.l_extendedprice)]] [l_orderkey:Int64, __always_true:Boolean, sum(lineitem.l_extendedprice):Float64;N]\
+            \n                      TableScan: lineitem [l_orderkey:Int64, l_partkey:Int64, l_suppkey:Int64, l_linenumber:Int32, l_quantity:Float64, l_extendedprice:Float64]";
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
             plan,
@@ -522,14 +531,14 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+            \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -760,13 +769,56 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey) + Int32(1), orders.o_custkey [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey) + Int32(1), orders.o_custkey, __always_true [max(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+
+        assert_multi_rules_optimized_plan_eq_display_indent(
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
+            plan,
+            expected,
+        );
+        Ok(())
+    }
+
+    /// Test for correlated scalar subquery with non-strong project
+    #[test]
+    fn scalar_subquery_with_non_strong_project() -> Result<()> {
+        let case = Expr::Case(expr::Case {
+            expr: None,
+            when_then_expr: vec![(
+                Box::new(col("max(orders.o_totalprice)")),
+                Box::new(lit("a")),
+            )],
+            else_expr: Some(Box::new(lit("b"))),
+        });
+
+        let sq = Arc::new(
+            LogicalPlanBuilder::from(scan_tpch_table("orders"))
+                .filter(
+                    out_ref_col(DataType::Int64, "customer.c_custkey")
+                        .eq(col("orders.o_custkey")),
+                )?
+                .aggregate(Vec::<Expr>::new(), vec![max(col("orders.o_totalprice"))])?
+                .project(vec![case])?
+                .build()?,
+        );
+
+        let plan = LogicalPlanBuilder::from(scan_tpch_table("customer"))
+            .project(vec![col("customer.c_custkey"), scalar_subquery(sq)])?
+            .build()?;
+
+        let expected = "Projection: customer.c_custkey, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN CASE WHEN CAST(NULL AS Boolean) THEN Utf8(\"a\") ELSE Utf8(\"b\") END ELSE __scalar_sq_1.CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END END AS CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END [c_custkey:Int64, CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END:Utf8;N]\
+            \n  Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END:Utf8;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n    SubqueryAlias: __scalar_sq_1 [CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END:Utf8, o_custkey:Int64, __always_true:Boolean]\
+            \n      Projection: CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END, orders.o_custkey, __always_true [CASE WHEN max(orders.o_totalprice) THEN Utf8(\"a\") ELSE Utf8(\"b\") END:Utf8, o_custkey:Int64, __always_true:Boolean]\
+            \n        Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_totalprice)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_totalprice):Float64;N]\
+            \n          TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -824,13 +876,13 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey >= __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey >= __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -863,13 +915,13 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -903,13 +955,13 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey = __scalar_sq_1.max(orders.o_custkey) OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n      SubqueryAlias: __scalar_sq_1 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -936,13 +988,13 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.c [c:UInt32]\
-        \n  Filter: test.c < __scalar_sq_1.min(sq.c) [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N]\
-        \n    Left Join:  Filter: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N]\
-        \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n      SubqueryAlias: __scalar_sq_1 [min(sq.c):UInt32;N, a:UInt32]\
-        \n        Projection: min(sq.c), sq.a [min(sq.c):UInt32;N, a:UInt32]\
-        \n          Aggregate: groupBy=[[sq.a]], aggr=[[min(sq.c)]] [a:UInt32, min(sq.c):UInt32;N]\
-        \n            TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
+            \n  Filter: test.c < __scalar_sq_1.min(sq.c) [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, min(sq.c):UInt32;N, a:UInt32;N, __always_true:Boolean;N]\
+            \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
+            \n      SubqueryAlias: __scalar_sq_1 [min(sq.c):UInt32;N, a:UInt32, __always_true:Boolean]\
+            \n        Projection: min(sq.c), sq.a, __always_true [min(sq.c):UInt32;N, a:UInt32, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[sq.a, Boolean(true) AS __always_true]], aggr=[[min(sq.c)]] [a:UInt32, __always_true:Boolean, min(sq.c):UInt32;N]\
+            \n            TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
@@ -1051,18 +1103,18 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.min(orders.o_custkey) AND __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n      Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
-        \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [min(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Projection: min(orders.o_custkey), orders.o_custkey [min(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[min(orders.o_custkey)]] [o_custkey:Int64, min(orders.o_custkey):Int64;N]\
-        \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n        Projection: max(orders.o_custkey), orders.o_custkey [max(orders.o_custkey):Int64;N, o_custkey:Int64]\
-        \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, max(orders.o_custkey):Int64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+            \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.min(orders.o_custkey) AND __scalar_sq_2.max(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N, max(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n      Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, min(orders.o_custkey):Int64;N, o_custkey:Int64;N, __always_true:Boolean;N]\
+            \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+            \n        SubqueryAlias: __scalar_sq_1 [min(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Projection: min(orders.o_custkey), orders.o_custkey, __always_true [min(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n            Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[min(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, min(orders.o_custkey):Int64;N]\
+            \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+            \n      SubqueryAlias: __scalar_sq_2 [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n        Projection: max(orders.o_custkey), orders.o_custkey, __always_true [max(orders.o_custkey):Int64;N, o_custkey:Int64, __always_true:Boolean]\
+            \n          Aggregate: groupBy=[[orders.o_custkey, Boolean(true) AS __always_true]], aggr=[[max(orders.o_custkey)]] [o_custkey:Int64, __always_true:Boolean, max(orders.o_custkey):Int64;N]\
+            \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
             vec![Arc::new(ScalarSubqueryToJoin::new())],
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 9003467703df2..b1c03dcd00aaa 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -33,8 +33,8 @@ use datafusion_common::{
 };
 use datafusion_common::{internal_err, DFSchema, DataFusionError, Result, ScalarValue};
 use datafusion_expr::{
-    and, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like, Operator, Volatility,
-    WindowFunctionDefinition,
+    and, binary::BinaryTypeCoercer, lit, or, BinaryExpr, Case, ColumnarValue, Expr, Like,
+    Operator, Volatility, WindowFunctionDefinition,
 };
 use datafusion_expr::{expr::ScalarFunction, interval_arithmetic::NullableInterval};
 use datafusion_expr::{
@@ -188,7 +188,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// assert_eq!(expr, b_lt_2);
     /// ```
     pub fn simplify(&self, expr: Expr) -> Result<Expr> {
-        Ok(self.simplify_with_cycle_count(expr)?.0)
+        Ok(self.simplify_with_cycle_count_transformed(expr)?.0.data)
     }
 
     /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
@@ -198,7 +198,34 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     ///
     /// See [Self::simplify] for details and usage examples.
     ///
+    #[deprecated(
+        since = "48.0.0",
+        note = "Use `simplify_with_cycle_count_transformed` instead"
+    )]
+    #[allow(unused_mut)]
     pub fn simplify_with_cycle_count(&self, mut expr: Expr) -> Result<(Expr, u32)> {
+        let (transformed, cycle_count) =
+            self.simplify_with_cycle_count_transformed(expr)?;
+        Ok((transformed.data, cycle_count))
+    }
+
+    /// Like [Self::simplify], simplifies this [`Expr`] as much as possible, evaluating
+    /// constants and applying algebraic simplifications. Additionally returns a `u32`
+    /// representing the number of simplification cycles performed, which can be useful for testing
+    /// optimizations.
+    ///
+    /// # Returns
+    ///
+    /// A tuple containing:
+    /// - The simplified expression wrapped in a `Transformed<Expr>` indicating if changes were made
+    /// - The number of simplification cycles that were performed
+    ///
+    /// See [Self::simplify] for details and usage examples.
+    ///
+    pub fn simplify_with_cycle_count_transformed(
+        &self,
+        mut expr: Expr,
+    ) -> Result<(Transformed<Expr>, u32)> {
         let mut simplifier = Simplifier::new(&self.info);
         let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;
         let mut shorten_in_list_simplifier = ShortenInListSimplifier::new();
@@ -212,6 +239,7 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
         // simplifications can enable new constant evaluation
         // see `Self::with_max_cycles`
         let mut num_cycles = 0;
+        let mut has_transformed = false;
         loop {
             let Transformed {
                 data, transformed, ..
@@ -221,13 +249,18 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
                 .transform_data(|expr| expr.rewrite(&mut guarantee_rewriter))?;
             expr = data;
             num_cycles += 1;
+            // Track if any transformation occurred
+            has_transformed = has_transformed || transformed;
             if !transformed || num_cycles >= self.max_simplifier_cycles {
                 break;
             }
         }
         // shorten inlist should be started after other inlist rules are applied
         expr = expr.rewrite(&mut shorten_in_list_simplifier).data()?;
-        Ok((expr, num_cycles))
+        Ok((
+            Transformed::new_transformed(expr, has_transformed),
+            num_cycles,
+        ))
     }
 
     /// Apply type coercion to an [`Expr`] so that it can be
@@ -392,15 +425,15 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// let expr = col("a").is_not_null();
     ///
     /// // When using default maximum cycles, 2 cycles will be performed.
-    /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count(expr.clone()).unwrap();
-    /// assert_eq!(simplified_expr, lit(true));
+    /// let (simplified_expr, count) = simplifier.simplify_with_cycle_count_transformed(expr.clone()).unwrap();
+    /// assert_eq!(simplified_expr.data, lit(true));
     /// // 2 cycles were executed, but only 1 was needed
     /// assert_eq!(count, 2);
     ///
     /// // Only 1 simplification pass is necessary here, so we can set the maximum cycles to 1.
-    /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count(expr.clone()).unwrap();
+    /// let (simplified_expr, count) = simplifier.with_max_cycles(1).simplify_with_cycle_count_transformed(expr.clone()).unwrap();
     /// // Expression has been rewritten to: (c = a AND b = 1)
-    /// assert_eq!(simplified_expr, lit(true));
+    /// assert_eq!(simplified_expr.data, lit(true));
     /// // Only 1 cycle was executed
     /// assert_eq!(count, 1);
     ///
@@ -760,6 +793,25 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
                     None => lit_bool_null(),
                 })
             }
+            // According to SQL's null semantics, NULL = NULL evaluates to NULL
+            // Both sides are the same expression (A = A) and A is non-volatile expression
+            // A = A --> A IS NOT NULL OR NULL
+            // A = A --> true (if A not nullable)
+            Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Eq,
+                right,
+            }) if (left == right) & !left.is_volatile() => {
+                Transformed::yes(match !info.nullable(&left)? {
+                    true => lit(true),
+                    false => Expr::BinaryExpr(BinaryExpr {
+                        left: Box::new(Expr::IsNotNull(left)),
+                        op: Or,
+                        right: Box::new(lit_bool_null()),
+                    }),
+                })
+            }
+
             // Rules for NotEq
             //
 
@@ -976,30 +1028,39 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
             // Rules for Multiply
             //
 
-            // A * 1 --> A
+            // A * 1 --> A (with type coercion if needed)
             Expr::BinaryExpr(BinaryExpr {
                 left,
                 op: Multiply,
                 right,
-            }) if is_one(&right) => Transformed::yes(*left),
-            // 1 * A --> A
+            }) if is_one(&right) => {
+                simplify_right_is_one_case(info, left, &Multiply, &right)?
+            }
+            // A * null --> null
             Expr::BinaryExpr(BinaryExpr {
                 left,
                 op: Multiply,
                 right,
-            }) if is_one(&left) => Transformed::yes(*right),
-            // A * null --> null
+            }) if is_null(&right) => {
+                simplify_right_is_null_case(info, &left, &Multiply, right)?
+            }
+            // 1 * A --> A
             Expr::BinaryExpr(BinaryExpr {
-                left: _,
+                left,
                 op: Multiply,
                 right,
-            }) if is_null(&right) => Transformed::yes(*right),
+            }) if is_one(&left) => {
+                // 1 * A is equivalent to A * 1
+                simplify_right_is_one_case(info, right, &Multiply, &left)?
+            }
             // null * A --> null
             Expr::BinaryExpr(BinaryExpr {
                 left,
                 op: Multiply,
-                right: _,
-            }) if is_null(&left) => Transformed::yes(*left),
+                right,
+            }) if is_null(&left) => {
+                simplify_right_is_null_case(info, &right, &Multiply, left)?
+            }
 
             // A * 0 --> 0 (if A is not null and not floating, since NAN * 0 -> NAN)
             Expr::BinaryExpr(BinaryExpr {
@@ -1033,19 +1094,23 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
                 left,
                 op: Divide,
                 right,
-            }) if is_one(&right) => Transformed::yes(*left),
-            // null / A --> null
+            }) if is_one(&right) => {
+                simplify_right_is_one_case(info, left, &Divide, &right)?
+            }
+            // A / null --> null
             Expr::BinaryExpr(BinaryExpr {
                 left,
                 op: Divide,
-                right: _,
-            }) if is_null(&left) => Transformed::yes(*left),
-            // A / null --> null
+                right,
+            }) if is_null(&right) => {
+                simplify_right_is_null_case(info, &left, &Divide, right)?
+            }
+            // null / A --> null
             Expr::BinaryExpr(BinaryExpr {
-                left: _,
+                left,
                 op: Divide,
                 right,
-            }) if is_null(&right) => Transformed::yes(*right),
+            }) if is_null(&left) => simplify_null_div_other_case(info, left, &right)?,
 
             //
             // Rules for Modulo
@@ -1997,6 +2062,84 @@ fn is_exactly_true(expr: Expr, info: &impl SimplifyInfo) -> Result<Expr> {
     }
 }
 
+// A * 1 -> A
+// A / 1 -> A
+//
+// Move this function body out of the large match branch avoid stack overflow
+fn simplify_right_is_one_case<S: SimplifyInfo>(
+    info: &S,
+    left: Box<Expr>,
+    op: &Operator,
+    right: &Expr,
+) -> Result<Transformed<Expr>> {
+    // Check if resulting type would be different due to coercion
+    let left_type = info.get_data_type(&left)?;
+    let right_type = info.get_data_type(right)?;
+    match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
+        Ok(result_type) => {
+            // Only cast if the types differ
+            if left_type != result_type {
+                Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
+            } else {
+                Ok(Transformed::yes(*left))
+            }
+        }
+        Err(_) => Ok(Transformed::yes(*left)),
+    }
+}
+
+// A * null -> null
+// A / null -> null
+//
+// Move this function body out of the large match branch avoid stack overflow
+fn simplify_right_is_null_case<S: SimplifyInfo>(
+    info: &S,
+    left: &Expr,
+    op: &Operator,
+    right: Box<Expr>,
+) -> Result<Transformed<Expr>> {
+    // Check if resulting type would be different due to coercion
+    let left_type = info.get_data_type(left)?;
+    let right_type = info.get_data_type(&right)?;
+    match BinaryTypeCoercer::new(&left_type, op, &right_type).get_result_type() {
+        Ok(result_type) => {
+            // Only cast if the types differ
+            if right_type != result_type {
+                Ok(Transformed::yes(Expr::Cast(Cast::new(right, result_type))))
+            } else {
+                Ok(Transformed::yes(*right))
+            }
+        }
+        Err(_) => Ok(Transformed::yes(*right)),
+    }
+}
+
+// null / A --> null
+//
+// Move this function body out of the large match branch avoid stack overflow
+fn simplify_null_div_other_case<S: SimplifyInfo>(
+    info: &S,
+    left: Box<Expr>,
+    right: &Expr,
+) -> Result<Transformed<Expr>> {
+    // Check if resulting type would be different due to coercion
+    let left_type = info.get_data_type(&left)?;
+    let right_type = info.get_data_type(right)?;
+    match BinaryTypeCoercer::new(&left_type, &Operator::Divide, &right_type)
+        .get_result_type()
+    {
+        Ok(result_type) => {
+            // Only cast if the types differ
+            if left_type != result_type {
+                Ok(Transformed::yes(Expr::Cast(Cast::new(left, result_type))))
+            } else {
+                Ok(Transformed::yes(*left))
+            }
+        }
+        Err(_) => Ok(Transformed::yes(*left)),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use crate::simplify_expressions::SimplifyContext;
@@ -2152,6 +2295,21 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_simplify_eq_not_self() {
+        // `expr_a`: column `c2` is nullable, so `c2 = c2` simplifies to `c2 IS NOT NULL OR NULL`
+        // This ensures the expression is only true when `c2` is not NULL, accounting for SQL's NULL semantics.
+        let expr_a = col("c2").eq(col("c2"));
+        let expected_a = col("c2").is_not_null().or(lit_bool_null());
+
+        // `expr_b`: column `c2_non_null` is explicitly non-nullable, so `c2_non_null = c2_non_null` is always true
+        let expr_b = col("c2_non_null").eq(col("c2_non_null"));
+        let expected_b = lit(true);
+
+        assert_eq!(simplify(expr_a), expected_a);
+        assert_eq!(simplify(expr_b), expected_b);
+    }
+
     #[test]
     fn test_simplify_or_true() {
         let expr_a = col("c2").or(lit(true));
@@ -2316,12 +2474,12 @@ mod tests {
         // A / null --> null
         let null = lit(ScalarValue::Null);
         {
-            let expr = col("c") / null.clone();
+            let expr = col("c1") / null.clone();
             assert_eq!(simplify(expr), null);
         }
         // null / A --> null
         {
-            let expr = null.clone() / col("c");
+            let expr = null.clone() / col("c1");
             assert_eq!(simplify(expr), null);
         }
     }
@@ -3185,6 +3343,15 @@ mod tests {
         simplifier.simplify(expr)
     }
 
+    fn coerce(expr: Expr) -> Expr {
+        let schema = expr_test_schema();
+        let execution_props = ExecutionProps::new();
+        let simplifier = ExprSimplifier::new(
+            SimplifyContext::new(&execution_props).with_schema(Arc::clone(&schema)),
+        );
+        simplifier.coerce(expr, schema.as_ref()).unwrap()
+    }
+
     fn simplify(expr: Expr) -> Expr {
         try_simplify(expr).unwrap()
     }
@@ -3195,7 +3362,8 @@ mod tests {
         let simplifier = ExprSimplifier::new(
             SimplifyContext::new(&execution_props).with_schema(schema),
         );
-        simplifier.simplify_with_cycle_count(expr)
+        let (expr, count) = simplifier.simplify_with_cycle_count_transformed(expr)?;
+        Ok((expr.data, count))
     }
 
     fn simplify_with_cycle_count(expr: Expr) -> (Expr, u32) {
@@ -3227,6 +3395,7 @@ mod tests {
                     Field::new("c2_non_null", DataType::Boolean, false),
                     Field::new("c3_non_null", DataType::Int64, false),
                     Field::new("c4_non_null", DataType::UInt32, false),
+                    Field::new("c5", DataType::FixedSizeBinary(3), true),
                 ]
                 .into(),
                 HashMap::new(),
@@ -4350,6 +4519,34 @@ mod tests {
         }
     }
 
+    #[test]
+    fn simplify_fixed_size_binary_eq_lit() {
+        let bytes = [1u8, 2, 3].as_slice();
+
+        // The expression starts simple.
+        let expr = col("c5").eq(lit(bytes));
+
+        // The type coercer introduces a cast.
+        let coerced = coerce(expr.clone());
+        let schema = expr_test_schema();
+        assert_eq!(
+            coerced,
+            col("c5")
+                .cast_to(&DataType::Binary, schema.as_ref())
+                .unwrap()
+                .eq(lit(bytes))
+        );
+
+        // The simplifier removes the cast.
+        assert_eq!(
+            simplify(coerced),
+            col("c5").eq(Expr::Literal(ScalarValue::FixedSizeBinary(
+                3,
+                Some(bytes.to_vec()),
+            )))
+        );
+    }
+
     fn if_not_null(expr: Expr, then: bool) -> Expr {
         Expr::Case(Case {
             expr: Some(expr.is_not_null().into()),
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index e33869ca2b636..6314209dc7670 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -123,10 +123,11 @@ impl SimplifyExpressions {
         let name_preserver = NamePreserver::new(&plan);
         let mut rewrite_expr = |expr: Expr| {
             let name = name_preserver.save(&expr);
-            let expr = simplifier.simplify(expr)?;
-            // TODO it would be nice to have a way to know if the expression was simplified
-            // or not. For now conservatively return Transformed::yes
-            Ok(Transformed::yes(name.restore(expr)))
+            let expr = simplifier.simplify_with_cycle_count_transformed(expr)?.0;
+            Ok(Transformed::new_transformed(
+                name.restore(expr.data),
+                expr.transformed,
+            ))
         };
 
         plan.map_expressions(|expr| {
diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
index be71a8cd19b00..37116018cdca5 100644
--- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
+++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
@@ -197,6 +197,7 @@ fn is_supported_type(data_type: &DataType) -> bool {
     is_supported_numeric_type(data_type)
         || is_supported_string_type(data_type)
         || is_supported_dictionary_type(data_type)
+        || is_supported_binary_type(data_type)
 }
 
 /// Returns true if unwrap_cast_in_comparison support this numeric type
@@ -230,6 +231,10 @@ fn is_supported_dictionary_type(data_type: &DataType) -> bool {
                     DataType::Dictionary(_, inner) if is_supported_type(inner))
 }
 
+fn is_supported_binary_type(data_type: &DataType) -> bool {
+    matches!(data_type, DataType::Binary | DataType::FixedSizeBinary(_))
+}
+
 ///// Tries to move a cast from an expression (such as column) to the literal other side of a comparison operator./
 ///
 /// Specifically, rewrites
@@ -292,6 +297,7 @@ pub(super) fn try_cast_literal_to_type(
     try_cast_numeric_literal(lit_value, target_type)
         .or_else(|| try_cast_string_literal(lit_value, target_type))
         .or_else(|| try_cast_dictionary(lit_value, target_type))
+        .or_else(|| try_cast_binary(lit_value, target_type))
 }
 
 /// Convert a numeric value from one numeric data type to another
@@ -501,6 +507,20 @@ fn cast_between_timestamp(from: &DataType, to: &DataType, value: i128) -> Option
     }
 }
 
+fn try_cast_binary(
+    lit_value: &ScalarValue,
+    target_type: &DataType,
+) -> Option<ScalarValue> {
+    match (lit_value, target_type) {
+        (ScalarValue::Binary(Some(v)), DataType::FixedSizeBinary(n))
+            if v.len() == *n as usize =>
+        {
+            Some(ScalarValue::FixedSizeBinary(*n, Some(v.clone())))
+        }
+        _ => None,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1450,4 +1470,13 @@ mod tests {
             )
         }
     }
+
+    #[test]
+    fn try_cast_to_fixed_size_binary() {
+        expect_cast(
+            ScalarValue::Binary(Some(vec![1, 2, 3])),
+            DataType::FixedSizeBinary(3),
+            ExpectedCast::Value(ScalarValue::FixedSizeBinary(3, Some(vec![1, 2, 3]))),
+        )
+    }
 }
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index c734d908f6d6c..41c40ec06d652 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -79,6 +79,50 @@ pub fn is_restrict_null_predicate<'a>(
         return Ok(true);
     }
 
+    // If result is single `true`, return false;
+    // If result is single `NULL` or `false`, return true;
+    Ok(
+        match evaluate_expr_with_null_column(predicate, join_cols_of_predicate)? {
+            ColumnarValue::Array(array) => {
+                if array.len() == 1 {
+                    let boolean_array = as_boolean_array(&array)?;
+                    boolean_array.is_null(0) || !boolean_array.value(0)
+                } else {
+                    false
+                }
+            }
+            ColumnarValue::Scalar(scalar) => matches!(
+                scalar,
+                ScalarValue::Boolean(None) | ScalarValue::Boolean(Some(false))
+            ),
+        },
+    )
+}
+
+/// Determines if an expression will always evaluate to null.
+/// `c0 + 8` return true
+/// `c0 IS NULL` return false
+/// `CASE WHEN c0 > 1 then 0 else 1` return false
+pub fn evaluates_to_null<'a>(
+    predicate: Expr,
+    null_columns: impl IntoIterator<Item = &'a Column>,
+) -> Result<bool> {
+    if matches!(predicate, Expr::Column(_)) {
+        return Ok(true);
+    }
+
+    Ok(
+        match evaluate_expr_with_null_column(predicate, null_columns)? {
+            ColumnarValue::Array(_) => false,
+            ColumnarValue::Scalar(scalar) => scalar.is_null(),
+        },
+    )
+}
+
+fn evaluate_expr_with_null_column<'a>(
+    predicate: Expr,
+    null_columns: impl IntoIterator<Item = &'a Column>,
+) -> Result<ColumnarValue> {
     static DUMMY_COL_NAME: &str = "?";
     let schema = Schema::new(vec![Field::new(DUMMY_COL_NAME, DataType::Null, true)]);
     let input_schema = DFSchema::try_from(schema.clone())?;
@@ -87,37 +131,15 @@ pub fn is_restrict_null_predicate<'a>(
     let execution_props = ExecutionProps::default();
     let null_column = Column::from_name(DUMMY_COL_NAME);
 
-    let join_cols_to_replace = join_cols_of_predicate
+    let join_cols_to_replace = null_columns
         .into_iter()
         .map(|column| (column, &null_column))
         .collect::<HashMap<_, _>>();
 
     let replaced_predicate = replace_col(predicate, &join_cols_to_replace)?;
     let coerced_predicate = coerce(replaced_predicate, &input_schema)?;
-    let phys_expr =
-        create_physical_expr(&coerced_predicate, &input_schema, &execution_props)?;
-
-    let result_type = phys_expr.data_type(&schema)?;
-    if !matches!(&result_type, DataType::Boolean) {
-        return Ok(false);
-    }
-
-    // If result is single `true`, return false;
-    // If result is single `NULL` or `false`, return true;
-    Ok(match phys_expr.evaluate(&input_batch)? {
-        ColumnarValue::Array(array) => {
-            if array.len() == 1 {
-                let boolean_array = as_boolean_array(&array)?;
-                boolean_array.is_null(0) || !boolean_array.value(0)
-            } else {
-                false
-            }
-        }
-        ColumnarValue::Scalar(scalar) => matches!(
-            scalar,
-            ScalarValue::Boolean(None) | ScalarValue::Boolean(Some(false))
-        ),
-    })
+    create_physical_expr(&coerced_predicate, &input_schema, &execution_props)?
+        .evaluate(&input_batch)
 }
 
 fn coerce(expr: Expr, schema: &DFSchema) -> Result<Expr> {
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index 098027dd06420..941e5bd7b4d77 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -37,6 +37,7 @@ use datafusion_sql::planner::{ContextProvider, SqlToRel};
 use datafusion_sql::sqlparser::ast::Statement;
 use datafusion_sql::sqlparser::dialect::GenericDialect;
 use datafusion_sql::sqlparser::parser::Parser;
+use insta::assert_snapshot;
 
 #[cfg(test)]
 #[ctor::ctor]
@@ -49,16 +50,25 @@ fn init() {
 fn case_when() -> Result<()> {
     let sql = "SELECT CASE WHEN col_int32 > 0 THEN 1 ELSE 0 END FROM test";
     let plan = test_sql(sql)?;
-    let expected =
-        "Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END\
-         \n  TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+Projection: CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END
+  TableScan: test projection=[col_int32]
+"#
+    );
 
     let sql = "SELECT CASE WHEN col_uint32 > 0 THEN 1 ELSE 0 END FROM test";
     let plan = test_sql(sql)?;
-    let expected = "Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_uint32 > Int64(0) THEN Int64(1) ELSE Int64(0) END\
-                    \n  TableScan: test projection=[col_uint32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+        format!("{plan}"),
+    @r#"
+    Projection: CASE WHEN test.col_uint32 > UInt32(0) THEN Int64(1) ELSE Int64(0) END AS CASE WHEN test.col_uint32 > Int64(0) THEN Int64(1) ELSE Int64(0) END
+      TableScan: test projection=[col_uint32]
+    "#
+    );
     Ok(())
 }
 
@@ -72,16 +82,21 @@ fn subquery_filter_with_cast() -> Result<()> {
         AND (cast('2002-05-08' as date) + interval '5 days')\
     )";
     let plan = test_sql(sql)?;
-    let expected = "Projection: test.col_int32\
-    \n  Inner Join:  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.avg(test.col_int32)\
-    \n    TableScan: test projection=[col_int32]\
-    \n    SubqueryAlias: __scalar_sq_1\
-    \n      Aggregate: groupBy=[[]], aggr=[[avg(CAST(test.col_int32 AS Float64))]]\
-    \n        Projection: test.col_int32\
-    \n          Filter: __common_expr_4 >= Date32(\"2002-05-08\") AND __common_expr_4 <= Date32(\"2002-05-13\")\
-    \n            Projection: CAST(test.col_utf8 AS Date32) AS __common_expr_4, test.col_int32\
-    \n              TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+    Projection: test.col_int32
+      Inner Join:  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.avg(test.col_int32)
+        TableScan: test projection=[col_int32]
+        SubqueryAlias: __scalar_sq_1
+          Aggregate: groupBy=[[]], aggr=[[avg(CAST(test.col_int32 AS Float64))]]
+            Projection: test.col_int32
+              Filter: __common_expr_4 >= Date32("2002-05-08") AND __common_expr_4 <= Date32("2002-05-13")
+                Projection: CAST(test.col_utf8 AS Date32) AS __common_expr_4, test.col_int32
+                  TableScan: test projection=[col_int32, col_utf8]
+    "#
+    );
     Ok(())
 }
 
@@ -89,10 +104,15 @@ fn subquery_filter_with_cast() -> Result<()> {
 fn case_when_aggregate() -> Result<()> {
     let sql = "SELECT col_utf8, sum(CASE WHEN col_int32 > 0 THEN 1 ELSE 0 END) AS n FROM test GROUP BY col_utf8";
     let plan = test_sql(sql)?;
-    let expected = "Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n\
-                    \n  Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]]\
-                    \n    TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Projection: test.col_utf8, sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END) AS n
+          Aggregate: groupBy=[[test.col_utf8]], aggr=[[sum(CASE WHEN test.col_int32 > Int32(0) THEN Int64(1) ELSE Int64(0) END) AS sum(CASE WHEN test.col_int32 > Int64(0) THEN Int64(1) ELSE Int64(0) END)]]
+            TableScan: test projection=[col_int32, col_utf8]
+        "#
+    );
     Ok(())
 }
 
@@ -100,10 +120,15 @@ fn case_when_aggregate() -> Result<()> {
 fn unsigned_target_type() -> Result<()> {
     let sql = "SELECT col_utf8 FROM test WHERE col_uint32 > 0";
     let plan = test_sql(sql)?;
-    let expected = "Projection: test.col_utf8\
-                    \n  Filter: test.col_uint32 > UInt32(0)\
-                    \n    TableScan: test projection=[col_uint32, col_utf8]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+    Projection: test.col_utf8
+      Filter: test.col_uint32 > UInt32(0)
+        TableScan: test projection=[col_uint32, col_utf8]
+    "#
+    );
     Ok(())
 }
 
@@ -112,9 +137,14 @@ fn distribute_by() -> Result<()> {
     // regression test for https://github.com/apache/datafusion/issues/3234
     let sql = "SELECT col_int32, col_utf8 FROM test DISTRIBUTE BY (col_utf8)";
     let plan = test_sql(sql)?;
-    let expected = "Repartition: DistributeBy(test.col_utf8)\
-    \n  TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Repartition: DistributeBy(test.col_utf8)
+          TableScan: test projection=[col_int32, col_utf8]
+        "#
+    );
     Ok(())
 }
 
@@ -125,15 +155,20 @@ fn semi_join_with_join_filter() -> Result<()> {
                SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \
                AND test.col_uint32 != t2.col_uint32)";
     let plan = test_sql(sql)?;
-    let expected = "Projection: test.col_utf8\
-                    \n  LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32\
-                    \n    Filter: test.col_int32 IS NOT NULL\
-                    \n      TableScan: test projection=[col_int32, col_uint32, col_utf8]\
-                    \n    SubqueryAlias: __correlated_sq_1\
-                    \n      SubqueryAlias: t2\
-                    \n        Filter: test.col_int32 IS NOT NULL\
-                    \n          TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Projection: test.col_utf8
+          LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32, col_uint32, col_utf8]
+            SubqueryAlias: __correlated_sq_1
+              SubqueryAlias: t2
+                Filter: test.col_int32 IS NOT NULL
+                  TableScan: test projection=[col_int32, col_uint32]
+        "#
+    );
     Ok(())
 }
 
@@ -144,14 +179,19 @@ fn anti_join_with_join_filter() -> Result<()> {
                SELECT col_utf8 FROM test t2 WHERE test.col_int32 = t2.col_int32 \
                AND test.col_uint32 != t2.col_uint32)";
     let plan = test_sql(sql)?;
-    let expected = "Projection: test.col_utf8\
-    \n  LeftAnti Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32\
-    \n    TableScan: test projection=[col_int32, col_uint32, col_utf8]\
-    \n    SubqueryAlias: __correlated_sq_1\
-    \n      SubqueryAlias: t2\
-    \n        Filter: test.col_int32 IS NOT NULL\
-    \n          TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+Projection: test.col_utf8
+  LeftAnti Join: test.col_int32 = __correlated_sq_1.col_int32 Filter: test.col_uint32 != __correlated_sq_1.col_uint32
+    TableScan: test projection=[col_int32, col_uint32, col_utf8]
+    SubqueryAlias: __correlated_sq_1
+      SubqueryAlias: t2
+        Filter: test.col_int32 IS NOT NULL
+          TableScan: test projection=[col_int32, col_uint32]
+"#
+    );
     Ok(())
 }
 
@@ -160,15 +200,21 @@ fn where_exists_distinct() -> Result<()> {
     let sql = "SELECT col_int32 FROM test WHERE EXISTS (\
                SELECT DISTINCT col_int32 FROM test t2 WHERE test.col_int32 = t2.col_int32)";
     let plan = test_sql(sql)?;
-    let expected = "LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32\
-    \n  Filter: test.col_int32 IS NOT NULL\
-    \n    TableScan: test projection=[col_int32]\
-    \n  SubqueryAlias: __correlated_sq_1\
-    \n    Aggregate: groupBy=[[t2.col_int32]], aggr=[[]]\
-    \n      SubqueryAlias: t2\
-    \n        Filter: test.col_int32 IS NOT NULL\
-    \n          TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+LeftSemi Join: test.col_int32 = __correlated_sq_1.col_int32
+  Filter: test.col_int32 IS NOT NULL
+    TableScan: test projection=[col_int32]
+  SubqueryAlias: __correlated_sq_1
+    Aggregate: groupBy=[[t2.col_int32]], aggr=[[]]
+      SubqueryAlias: t2
+        Filter: test.col_int32 IS NOT NULL
+          TableScan: test projection=[col_int32]
+"#
+
+    );
     Ok(())
 }
 
@@ -178,15 +224,19 @@ fn intersect() -> Result<()> {
     INTERSECT SELECT col_int32, col_utf8 FROM test \
     INTERSECT SELECT col_int32, col_utf8 FROM test";
     let plan = test_sql(sql)?;
-    let expected =
-        "LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8\
-        \n  Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
-        \n    LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8\
-        \n      Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
-        \n        TableScan: test projection=[col_int32, col_utf8]\
-        \n      TableScan: test projection=[col_int32, col_utf8]\
-        \n  TableScan: test projection=[col_int32, col_utf8]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8
+  Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]
+    LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8
+      Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]
+        TableScan: test projection=[col_int32, col_utf8]
+      TableScan: test projection=[col_int32, col_utf8]
+  TableScan: test projection=[col_int32, col_utf8]
+"#
+    );
     Ok(())
 }
 
@@ -195,12 +245,16 @@ fn between_date32_plus_interval() -> Result<()> {
     let sql = "SELECT count(1) FROM test \
     WHERE col_date32 between '1998-03-18' AND cast('1998-03-18' as date) + INTERVAL '90 days'";
     let plan = test_sql(sql)?;
-    let expected =
-        "Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]\
-        \n  Projection: \
-        \n    Filter: test.col_date32 >= Date32(\"1998-03-18\") AND test.col_date32 <= Date32(\"1998-06-16\")\
-        \n      TableScan: test projection=[col_date32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+  Projection: 
+    Filter: test.col_date32 >= Date32("1998-03-18") AND test.col_date32 <= Date32("1998-06-16")
+      TableScan: test projection=[col_date32]
+"#
+    );
     Ok(())
 }
 
@@ -209,12 +263,16 @@ fn between_date64_plus_interval() -> Result<()> {
     let sql = "SELECT count(1) FROM test \
     WHERE col_date64 between '1998-03-18T00:00:00' AND cast('1998-03-18' as date) + INTERVAL '90 days'";
     let plan = test_sql(sql)?;
-    let expected =
-        "Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]\
-        \n  Projection: \
-        \n    Filter: test.col_date64 >= Date64(\"1998-03-18\") AND test.col_date64 <= Date64(\"1998-06-16\")\
-        \n      TableScan: test projection=[col_date64]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+          Projection: 
+            Filter: test.col_date64 >= Date64("1998-03-18") AND test.col_date64 <= Date64("1998-06-16")
+              TableScan: test projection=[col_date64]
+        "#
+    );
     Ok(())
 }
 
@@ -223,54 +281,73 @@ fn propagate_empty_relation() {
     let sql = "SELECT test.col_int32 FROM test JOIN ( SELECT col_int32 FROM test WHERE false ) AS ta1 ON test.col_int32 = ta1.col_int32;";
     let plan = test_sql(sql).unwrap();
     // when children exist EmptyRelation, it will bottom-up propagate.
-    let expected = "EmptyRelation";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn join_keys_in_subquery_alias() {
     let sql = "SELECT * FROM test AS A, ( SELECT col_int32 as key FROM test ) AS B where A.col_int32 = B.key;";
     let plan = test_sql(sql).unwrap();
-    let expected = "Inner Join: a.col_int32 = b.key\
-    \n  SubqueryAlias: a\
-    \n    Filter: test.col_int32 IS NOT NULL\
-    \n      TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]\
-    \n  SubqueryAlias: b\
-    \n    Projection: test.col_int32 AS key\
-    \n      Filter: test.col_int32 IS NOT NULL\
-    \n        TableScan: test projection=[col_int32]";
-
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Inner Join: a.col_int32 = b.key
+          SubqueryAlias: a
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+          SubqueryAlias: b
+            Projection: test.col_int32 AS key
+              Filter: test.col_int32 IS NOT NULL
+                TableScan: test projection=[col_int32]
+        "#
+    );
 }
 
 #[test]
 fn join_keys_in_subquery_alias_1() {
     let sql = "SELECT * FROM test AS A, ( SELECT test.col_int32 AS key FROM test JOIN test AS C on test.col_int32 = C.col_int32 ) AS B where A.col_int32 = B.key;";
     let plan = test_sql(sql).unwrap();
-    let expected = "Inner Join: a.col_int32 = b.key\
-    \n  SubqueryAlias: a\
-    \n    Filter: test.col_int32 IS NOT NULL\
-    \n      TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]\
-    \n  SubqueryAlias: b\
-    \n    Projection: test.col_int32 AS key\
-    \n      Inner Join: test.col_int32 = c.col_int32\
-    \n        Filter: test.col_int32 IS NOT NULL\
-    \n          TableScan: test projection=[col_int32]\
-    \n        SubqueryAlias: c\
-    \n          Filter: test.col_int32 IS NOT NULL\
-    \n            TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Inner Join: a.col_int32 = b.key
+          SubqueryAlias: a
+            Filter: test.col_int32 IS NOT NULL
+              TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+          SubqueryAlias: b
+            Projection: test.col_int32 AS key
+              Inner Join: test.col_int32 = c.col_int32
+                Filter: test.col_int32 IS NOT NULL
+                  TableScan: test projection=[col_int32]
+                SubqueryAlias: c
+                  Filter: test.col_int32 IS NOT NULL
+                    TableScan: test projection=[col_int32]
+        "#
+    );
 }
 
 #[test]
 fn push_down_filter_groupby_expr_contains_alias() {
     let sql = "SELECT * FROM (SELECT (col_int32 + col_uint32) AS c, count(*) FROM test GROUP BY 1) where c > 3";
     let plan = test_sql(sql).unwrap();
-    let expected = "Projection: test.col_int32 + test.col_uint32 AS c, count(Int64(1)) AS count(*)\
-    \n  Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[count(Int64(1))]]\
-    \n    Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3)\
-    \n      TableScan: test projection=[col_int32, col_uint32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Projection: test.col_int32 + test.col_uint32 AS c, count(Int64(1)) AS count(*)
+          Aggregate: groupBy=[[CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64)]], aggr=[[count(Int64(1))]]
+            Filter: CAST(test.col_int32 AS Int64) + CAST(test.col_uint32 AS Int64) > Int64(3)
+              TableScan: test projection=[col_int32, col_uint32]
+        "#
+    );
 }
 
 #[test]
@@ -278,13 +355,18 @@ fn push_down_filter_groupby_expr_contains_alias() {
 fn test_same_name_but_not_ambiguous() {
     let sql = "SELECT t1.col_int32 AS col_int32 FROM test t1 intersect SELECT col_int32 FROM test t2";
     let plan = test_sql(sql).unwrap();
-    let expected = "LeftSemi Join: t1.col_int32 = t2.col_int32\
-    \n  Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]\
-    \n    SubqueryAlias: t1\
-    \n      TableScan: test projection=[col_int32]\
-    \n  SubqueryAlias: t2\
-    \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        LeftSemi Join: t1.col_int32 = t2.col_int32
+          Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]
+            SubqueryAlias: t1
+              TableScan: test projection=[col_int32]
+          SubqueryAlias: t2
+            TableScan: test projection=[col_int32]
+        "#
+    );
 }
 
 #[test]
@@ -295,11 +377,14 @@ fn eliminate_nested_filters() {
         AND (1=1) AND (1=0 OR 1=1)";
 
     let plan = test_sql(sql).unwrap();
-    let expected = "\
-        Filter: test.col_int32 > Int32(0)\
-        \n  TableScan: test projection=[col_int32]";
 
-    assert_eq!(expected, format!("{plan}"));
+    assert_snapshot!(
+          format!("{plan}"),
+          @r#"
+Filter: test.col_int32 > Int32(0)
+  TableScan: test projection=[col_int32]
+  "#
+    );
 }
 
 #[test]
@@ -310,10 +395,15 @@ fn eliminate_redundant_null_check_on_count() {
         GROUP BY col_int32
         HAVING c IS NOT NULL";
     let plan = test_sql(sql).unwrap();
-    let expected = "Projection: test.col_int32, count(Int64(1)) AS count(*) AS c\
-    \n  Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]\
-    \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Projection: test.col_int32, count(Int64(1)) AS count(*) AS c
+          Aggregate: groupBy=[[test.col_int32]], aggr=[[count(Int64(1))]]
+            TableScan: test projection=[col_int32]
+        "#
+    );
 }
 
 #[test]
@@ -333,13 +423,16 @@ fn test_propagate_empty_relation_inner_join_and_unions() {
         SELECT test.col_int32 FROM test WHERE 1 = 0";
 
     let plan = test_sql(sql).unwrap();
-    let expected = "\
-        Union\
-        \n  TableScan: test projection=[col_int32]\
-        \n  TableScan: test projection=[col_int32]\
-        \n  Filter: test.col_int32 < Int32(0)\
-        \n    TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+Union
+  TableScan: test projection=[col_int32]
+  TableScan: test projection=[col_int32]
+  Filter: test.col_int32 < Int32(0)
+    TableScan: test projection=[col_int32]
+  "#);
 }
 
 #[test]
@@ -347,10 +440,14 @@ fn select_wildcard_with_repeated_column_but_is_aliased() {
     let sql = "SELECT *, col_int32 as col_32 FROM test";
 
     let plan = test_sql(sql).unwrap();
-    let expected = "Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32\
-    \n  TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]";
 
-    assert_eq!(expected, format!("{plan}"));
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64, test.col_ts_nano_none, test.col_ts_nano_utc, test.col_int32 AS col_32
+          TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+        "#
+    );
 }
 
 #[test]
@@ -367,15 +464,20 @@ fn select_correlated_predicate_subquery_with_uppercase_ident() {
             )
     "#;
     let plan = test_sql(sql).unwrap();
-    let expected = "LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32\
-    \n  Filter: test.col_int32 IS NOT NULL\
-    \n    TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]\
-    \n  SubqueryAlias: __correlated_sq_1\
-    \n    SubqueryAlias: T1\
-    \n      Projection: test.col_int32 AS COL_INT32\
-    \n        Filter: test.col_int32 IS NOT NULL\
-    \n          TableScan: test projection=[col_int32]";
-    assert_eq!(expected, format!("{plan}"));
+
+    assert_snapshot!(
+    format!("{plan}"),
+    @r#"
+        LeftSemi Join: test.col_int32 = __correlated_sq_1.COL_INT32
+          Filter: test.col_int32 IS NOT NULL
+            TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64, col_ts_nano_none, col_ts_nano_utc]
+          SubqueryAlias: __correlated_sq_1
+            SubqueryAlias: T1
+              Projection: test.col_int32 AS COL_INT32
+                Filter: test.col_int32 IS NOT NULL
+                  TableScan: test projection=[col_int32]
+        "#
+    );
 }
 
 fn test_sql(sql: &str) -> Result<LogicalPlan> {
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index 43f214607f9fc..3bc41d2652d9a 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -27,6 +27,7 @@ use arrow::array::BooleanArray;
 use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
+use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{internal_err, not_impl_err, Result, ScalarValue};
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
@@ -283,6 +284,55 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash {
     /// See the [`fmt_sql`] function for an example of printing `PhysicalExpr`s as SQL.
     ///
     fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result;
+
+    /// Take a snapshot of this `PhysicalExpr`, if it is dynamic.
+    ///
+    /// "Dynamic" in this case means containing references to structures that may change
+    /// during plan execution, such as hash tables.
+    ///
+    /// This method is used to capture the current state of `PhysicalExpr`s that may contain
+    /// dynamic references to other operators in order to serialize it over the wire
+    /// or treat it via downcast matching.
+    ///
+    /// You should not call this method directly as it does not handle recursion.
+    /// Instead use [`snapshot_physical_expr`] to handle recursion and capture the
+    /// full state of the `PhysicalExpr`.
+    ///
+    /// This is expected to return "simple" expressions that do not have mutable state
+    /// and are composed of DataFusion's built-in `PhysicalExpr` implementations.
+    /// Callers however should *not* assume anything about the returned expressions
+    /// since callers and implementers may not agree on what "simple" or "built-in"
+    /// means.
+    /// In other words, if you need to serialize a `PhysicalExpr` across the wire
+    /// you should call this method and then try to serialize the result,
+    /// but you should handle unknown or unexpected `PhysicalExpr` implementations gracefully
+    /// just as if you had not called this method at all.
+    ///
+    /// In particular, consider:
+    /// * A `PhysicalExpr` that references the current state of a `datafusion::physical_plan::TopK`
+    ///   that is involved in a query with `SELECT * FROM t1 ORDER BY a LIMIT 10`.
+    ///   This function may return something like `a >= 12`.
+    /// * A `PhysicalExpr` that references the current state of a `datafusion::physical_plan::joins::HashJoinExec`
+    ///   from a query such as `SELECT * FROM t1 JOIN t2 ON t1.a = t2.b`.
+    ///   This function may return something like `t2.b IN (1, 5, 7)`.
+    ///
+    /// A system or function that can only deal with a hardcoded set of `PhysicalExpr` implementations
+    /// or needs to serialize this state to bytes may not be able to handle these dynamic references.
+    /// In such cases, we should return a simplified version of the `PhysicalExpr` that does not
+    /// contain these dynamic references.
+    ///
+    /// Systems that implement remote execution of plans, e.g. serialize a portion of the query plan
+    /// and send it across the wire to a remote executor may want to call this method after
+    /// every batch on the source side and brodcast / update the current snaphot to the remote executor.
+    ///
+    /// Note for implementers: this method should *not* handle recursion.
+    /// Recursion is handled in [`snapshot_physical_expr`].
+    fn snapshot(&self) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+        // By default, we return None to indicate that this PhysicalExpr does not
+        // have any dynamic references or state.
+        // This is a safe default behavior.
+        Ok(None)
+    }
 }
 
 /// [`PhysicalExpr`] can't be constrained by [`Eq`] directly because it must remain object
@@ -446,3 +496,30 @@ pub fn fmt_sql(expr: &dyn PhysicalExpr) -> impl Display + '_ {
 
     Wrapper { expr }
 }
+
+/// Take a snapshot of the given `PhysicalExpr` if it is dynamic.
+///
+/// Take a snapshot of this `PhysicalExpr` if it is dynamic.
+/// This is used to capture the current state of `PhysicalExpr`s that may contain
+/// dynamic references to other operators in order to serialize it over the wire
+/// or treat it via downcast matching.
+///
+/// See the documentation of [`PhysicalExpr::snapshot`] for more details.
+///
+/// # Returns
+///
+/// Returns an `Option<Arc<dyn PhysicalExpr>>` which is the snapshot of the
+/// `PhysicalExpr` if it is dynamic. If the `PhysicalExpr` does not have
+/// any dynamic references or state, it returns `None`.
+pub fn snapshot_physical_expr(
+    expr: Arc<dyn PhysicalExpr>,
+) -> Result<Arc<dyn PhysicalExpr>> {
+    expr.transform_up(|e| {
+        if let Some(snapshot) = e.snapshot()? {
+            Ok(Transformed::yes(snapshot))
+        } else {
+            Ok(Transformed::no(Arc::clone(&e)))
+        }
+    })
+    .data()
+}
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 72baa0db00a21..47e3291e5cb4d 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -57,6 +57,7 @@ petgraph = "0.7.1"
 arrow = { workspace = true, features = ["test_utils"] }
 criterion = { workspace = true }
 datafusion-functions = { workspace = true }
+insta = { workspace = true }
 rand = { workspace = true }
 rstest = { workspace = true }
 
@@ -71,3 +72,7 @@ name = "case_when"
 [[bench]]
 harness = false
 name = "is_null"
+
+[[bench]]
+harness = false
+name = "binary_op"
diff --git a/datafusion/physical-expr/benches/binary_op.rs b/datafusion/physical-expr/benches/binary_op.rs
new file mode 100644
index 0000000000000..216d8a520e489
--- /dev/null
+++ b/datafusion/physical-expr/benches/binary_op.rs
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::{
+    array::BooleanArray,
+    datatypes::{DataType, Field, Schema},
+};
+use arrow::{array::StringArray, record_batch::RecordBatch};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::{and, binary_expr, col, lit, or, Operator};
+use datafusion_physical_expr::{
+    expressions::{BinaryExpr, Column},
+    planner::logical2physical,
+    PhysicalExpr,
+};
+use std::sync::Arc;
+
+/// Generates BooleanArrays with different true/false distributions for benchmarking.
+///
+/// Returns a vector of tuples containing scenario name and corresponding BooleanArray.
+///
+/// # Arguments
+/// - `TEST_ALL_FALSE` - Used to generate what kind of test data
+/// - `len` - Length of the BooleanArray to generate
+fn generate_boolean_cases<const TEST_ALL_FALSE: bool>(
+    len: usize,
+) -> Vec<(String, BooleanArray)> {
+    let mut cases = Vec::with_capacity(6);
+
+    // Scenario 1: All elements false or all elements true
+    if TEST_ALL_FALSE {
+        let all_false = BooleanArray::from(vec![false; len]);
+        cases.push(("all_false".to_string(), all_false));
+    } else {
+        let all_true = BooleanArray::from(vec![true; len]);
+        cases.push(("all_true".to_string(), all_true));
+    }
+
+    // Scenario 2: Single true at first position or single false at first position
+    if TEST_ALL_FALSE {
+        let mut first_true = vec![false; len];
+        first_true[0] = true;
+        cases.push(("one_true_first".to_string(), BooleanArray::from(first_true)));
+    } else {
+        let mut first_false = vec![true; len];
+        first_false[0] = false;
+        cases.push((
+            "one_false_first".to_string(),
+            BooleanArray::from(first_false),
+        ));
+    }
+
+    // Scenario 3: Single true at last position or single false at last position
+    if TEST_ALL_FALSE {
+        let mut last_true = vec![false; len];
+        last_true[len - 1] = true;
+        cases.push(("one_true_last".to_string(), BooleanArray::from(last_true)));
+    } else {
+        let mut last_false = vec![true; len];
+        last_false[len - 1] = false;
+        cases.push(("one_false_last".to_string(), BooleanArray::from(last_false)));
+    }
+
+    // Scenario 4: Single true at exact middle or single false at exact middle
+    let mid = len / 2;
+    if TEST_ALL_FALSE {
+        let mut mid_true = vec![false; len];
+        mid_true[mid] = true;
+        cases.push(("one_true_middle".to_string(), BooleanArray::from(mid_true)));
+    } else {
+        let mut mid_false = vec![true; len];
+        mid_false[mid] = false;
+        cases.push((
+            "one_false_middle".to_string(),
+            BooleanArray::from(mid_false),
+        ));
+    }
+
+    // Scenario 5: Single true at 25% position or single false at 25% position
+    let mid_left = len / 4;
+    if TEST_ALL_FALSE {
+        let mut mid_left_true = vec![false; len];
+        mid_left_true[mid_left] = true;
+        cases.push((
+            "one_true_middle_left".to_string(),
+            BooleanArray::from(mid_left_true),
+        ));
+    } else {
+        let mut mid_left_false = vec![true; len];
+        mid_left_false[mid_left] = false;
+        cases.push((
+            "one_false_middle_left".to_string(),
+            BooleanArray::from(mid_left_false),
+        ));
+    }
+
+    // Scenario 6: Single true at 75% position or single false at 75% position
+    let mid_right = (3 * len) / 4;
+    if TEST_ALL_FALSE {
+        let mut mid_right_true = vec![false; len];
+        mid_right_true[mid_right] = true;
+        cases.push((
+            "one_true_middle_right".to_string(),
+            BooleanArray::from(mid_right_true),
+        ));
+    } else {
+        let mut mid_right_false = vec![true; len];
+        mid_right_false[mid_right] = false;
+        cases.push((
+            "one_false_middle_right".to_string(),
+            BooleanArray::from(mid_right_false),
+        ));
+    }
+
+    // Scenario 7: Test all true or all false in AND/OR
+    // This situation won't cause a short circuit, but it can skip the bool calculation
+    if TEST_ALL_FALSE {
+        let all_true = vec![true; len];
+        cases.push(("all_true_in_and".to_string(), BooleanArray::from(all_true)));
+    } else {
+        let all_false = vec![false; len];
+        cases.push(("all_false_in_or".to_string(), BooleanArray::from(all_false)));
+    }
+
+    cases
+}
+
+/// Benchmarks AND/OR operator short-circuiting by evaluating complex regex conditions.
+///
+/// Creates 7 test scenarios per operator:
+/// 1. All values enable short-circuit (all_true/all_false)
+/// 2. 2-6 Single true/false value at different positions to measure early exit
+/// 3. Test all true or all false in AND/OR
+///
+/// You can run this benchmark with:
+/// ```sh
+/// cargo bench --bench binary_op -- short_circuit
+/// ```
+fn benchmark_binary_op_in_short_circuit(c: &mut Criterion) {
+    // Create schema with three columns
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("a", DataType::Boolean, false),
+        Field::new("b", DataType::Utf8, false),
+        Field::new("c", DataType::Utf8, false),
+    ]));
+
+    // Generate test data with extended content
+    let (b_values, c_values) = generate_test_strings(8192);
+
+    let batches_and =
+        create_record_batch::<true>(schema.clone(), &b_values, &c_values).unwrap();
+    let batches_or =
+        create_record_batch::<false>(schema.clone(), &b_values, &c_values).unwrap();
+
+    // Build complex string matching conditions
+    let right_condition_and = and(
+        // Check for API endpoint pattern in URLs
+        binary_expr(
+            col("b"),
+            Operator::RegexMatch,
+            lit(r#"^https://(\w+\.)?example\.(com|org)/"#),
+        ),
+        // Check for markdown code blocks and summary section
+        binary_expr(
+            col("c"),
+            Operator::RegexMatch,
+            lit("```(rust|python|go)\nfn? main$$"),
+        ),
+    );
+
+    let right_condition_or = or(
+        // Check for secure HTTPS protocol
+        binary_expr(
+            col("b"),
+            Operator::RegexMatch,
+            lit(r#"^https://(\w+\.)?example\.(com|org)/"#),
+        ),
+        // Check for Rust code examples
+        binary_expr(
+            col("c"),
+            Operator::RegexMatch,
+            lit("```(rust|python|go)\nfn? main$$"),
+        ),
+    );
+
+    // Create physical binary expressions
+    // a AND ((b ~ regex) AND (c ~ regex))
+    let expr_and = BinaryExpr::new(
+        Arc::new(Column::new("a", 0)),
+        Operator::And,
+        logical2physical(&right_condition_and, &schema),
+    );
+
+    // a OR ((b ~ regex) OR (c ~ regex))
+    let expr_or = BinaryExpr::new(
+        Arc::new(Column::new("a", 0)),
+        Operator::Or,
+        logical2physical(&right_condition_or, &schema),
+    );
+
+    // Each scenario when the test operator is `and`
+    {
+        for (name, batch) in batches_and.into_iter() {
+            c.bench_function(&format!("short_circuit/and/{}", name), |b| {
+                b.iter(|| expr_and.evaluate(black_box(&batch)).unwrap())
+            });
+        }
+    }
+    // Each scenario when the test operator is `or`
+    {
+        for (name, batch) in batches_or.into_iter() {
+            c.bench_function(&format!("short_circuit/or/{}", name), |b| {
+                b.iter(|| expr_or.evaluate(black_box(&batch)).unwrap())
+            });
+        }
+    }
+}
+
+/// Generate test data with computationally expensive patterns
+fn generate_test_strings(num_rows: usize) -> (Vec<String>, Vec<String>) {
+    // Extended URL patterns with query parameters and paths
+    let base_urls = [
+        "https://api.example.com/v2/users/12345/posts?category=tech&sort=date&lang=en-US",
+        "https://cdn.example.net/assets/images/2023/08/15/sample-image-highres.jpg?width=1920&quality=85",
+        "http://service.demo.org:8080/api/data/transactions/20230815123456.csv",
+        "ftp://legacy.archive.example/backups/2023/Q3/database-dump.sql.gz",
+        "https://docs.example.co.uk/reference/advanced-topics/concurrency/parallel-processing.md#implementation-details",
+    ];
+
+    // Extended markdown content with code blocks and structure
+    let base_markdowns = [
+        concat!(
+            "# Advanced Topics in Computer Science\n\n",
+            "## Summary\nThis article explores complex system design patterns and...\n\n",
+            "```rust\nfn process_data(data: &mut [i32]) {\n    // Parallel processing example\n    data.par_iter_mut().for_each(|x| *x *= 2);\n}\n```\n\n",
+            "## Performance Considerations\nWhen implementing concurrent systems...\n"
+        ),
+        concat!(
+            "## API Documentation\n\n",
+            "```json\n{\n  \"endpoint\": \"/api/v2/users\",\n  \"methods\": [\"GET\", \"POST\"],\n  \"parameters\": {\n    \"page\": \"number\"\n  }\n}\n```\n\n",
+            "# Authentication Guide\nSecure your API access using OAuth 2.0...\n"
+        ),
+        concat!(
+            "# Data Processing Pipeline\n\n",
+            "```python\nfrom multiprocessing import Pool\n\ndef main():\n    with Pool(8) as p:\n        results = p.map(process_item, data)\n```\n\n",
+            "## Summary of Optimizations\n1. Batch processing\n2. Memory pooling\n3. Concurrent I/O operations\n"
+        ),
+        concat!(
+            "# System Architecture Overview\n\n",
+            "## Components\n- Load Balancer\n- Database Cluster\n- Cache Service\n\n",
+            "```go\nfunc main() {\n    router := gin.Default()\n    router.GET(\"/api/health\", healthCheck)\n    router.Run(\":8080\")\n}\n```\n"
+        ),
+        concat!(
+            "## Configuration Reference\n\n",
+            "```yaml\nserver:\n  port: 8080\n  max_threads: 32\n\ndatabase:\n  url: postgres://user@prod-db:5432/main\n```\n\n",
+            "# Deployment Strategies\nBlue-green deployment patterns with...\n"
+        ),
+    ];
+
+    let mut urls = Vec::with_capacity(num_rows);
+    let mut markdowns = Vec::with_capacity(num_rows);
+
+    for i in 0..num_rows {
+        urls.push(base_urls[i % 5].to_string());
+        markdowns.push(base_markdowns[i % 5].to_string());
+    }
+
+    (urls, markdowns)
+}
+
+/// Creates record batches with boolean arrays that test different short-circuit scenarios.
+/// When TEST_ALL_FALSE = true: creates data for AND operator benchmarks (needs early false exit)
+/// When TEST_ALL_FALSE = false: creates data for OR operator benchmarks (needs early true exit)
+fn create_record_batch<const TEST_ALL_FALSE: bool>(
+    schema: Arc<Schema>,
+    b_values: &[String],
+    c_values: &[String],
+) -> arrow::error::Result<Vec<(String, RecordBatch)>> {
+    // Generate data for six scenarios, but only the data for the "all_false" and "all_true" cases can be optimized through short-circuiting
+    let boolean_array = generate_boolean_cases::<TEST_ALL_FALSE>(b_values.len());
+    let mut rbs = Vec::with_capacity(boolean_array.len());
+    for (name, a_array) in boolean_array {
+        let b_array = StringArray::from(b_values.to_vec());
+        let c_array = StringArray::from(c_values.to_vec());
+        rbs.push((
+            name,
+            RecordBatch::try_new(
+                schema.clone(),
+                vec![Arc::new(a_array), Arc::new(b_array), Arc::new(c_array)],
+            )?,
+        ));
+    }
+    Ok(rbs)
+}
+
+criterion_group!(benches, benchmark_binary_op_in_short_circuit);
+
+criterion_main!(benches);
diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
index ae3d9050fa628..49912954ac81c 100644
--- a/datafusion/physical-expr/src/aggregate.rs
+++ b/datafusion/physical-expr/src/aggregate.rs
@@ -97,6 +97,94 @@ impl AggregateExprBuilder {
     /// Constructs an `AggregateFunctionExpr` from the builder
     ///
     /// Note that an [`Self::alias`] must be provided before calling this method.
+    ///
+    /// # Example: Create an [`AggregateUDF`]
+    ///
+    /// In the following example, [`AggregateFunctionExpr`] will be built using [`AggregateExprBuilder`]
+    /// which provides a build function. Full example could be accessed from the source file.
+    ///
+    /// ```
+    /// # use std::any::Any;
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion_common::{Result, ScalarValue};
+    /// # use datafusion_expr::{col, ColumnarValue, Documentation, Signature, Volatility, Expr};
+    /// # use datafusion_expr::{AggregateUDFImpl, AggregateUDF, Accumulator, function::{AccumulatorArgs, StateFieldsArgs}};
+    /// # use arrow::datatypes::Field;
+    /// #
+    /// # #[derive(Debug, Clone)]
+    /// # struct FirstValueUdf {
+    /// #     signature: Signature,
+    /// # }
+    /// #
+    /// # impl FirstValueUdf {
+    /// #     fn new() -> Self {
+    /// #         Self {
+    /// #             signature: Signature::any(1, Volatility::Immutable),
+    /// #         }
+    /// #     }
+    /// # }
+    /// #
+    /// # impl AggregateUDFImpl for FirstValueUdf {
+    /// #     fn as_any(&self) -> &dyn Any {
+    /// #         unimplemented!()
+    /// # }
+    /// #     fn name(&self) -> &str {
+    /// #         unimplemented!()
+    /// }
+    /// #     fn signature(&self) -> &Signature {
+    /// #         unimplemented!()
+    /// # }
+    /// #     fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+    /// #         unimplemented!()
+    /// #     }
+    /// #     
+    /// #     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+    /// #         unimplemented!()
+    /// #         }
+    /// #     
+    /// #     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<Field>> {
+    /// #         unimplemented!()
+    /// #     }
+    /// #     
+    /// #     fn documentation(&self) -> Option<&Documentation> {
+    /// #         unimplemented!()
+    /// #     }
+    /// # }
+    /// #
+    /// # let first_value = AggregateUDF::from(FirstValueUdf::new());
+    /// # let expr = first_value.call(vec![col("a")]);
+    /// #
+    /// # use datafusion_physical_expr::expressions::Column;
+    /// # use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    /// # use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    /// # use datafusion_physical_expr::expressions::PhysicalSortExpr;
+    /// # use datafusion_physical_expr::PhysicalSortRequirement;
+    /// #
+    /// fn build_aggregate_expr() -> Result<()> {
+    ///     let args = vec![Arc::new(Column::new("a", 0)) as Arc<dyn PhysicalExpr>];
+    ///     let order_by = vec![PhysicalSortExpr {
+    ///         expr: Arc::new(Column::new("x", 1)) as Arc<dyn PhysicalExpr>,
+    ///         options: Default::default(),
+    ///     }];
+    ///
+    ///     let first_value = AggregateUDF::from(FirstValueUdf::new());
+    ///     
+    ///     let aggregate_expr = AggregateExprBuilder::new(
+    ///         Arc::new(first_value),
+    ///         args
+    ///     )
+    ///     .order_by(order_by.into())
+    ///     .alias("first_a_by_x")
+    ///     .ignore_nulls()
+    ///     .build()?;
+    ///     
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
+    /// This creates a physical expression equivalent to SQL:
+    /// `first_value(a ORDER BY x) IGNORE NULLS AS first_a_by_x`
     pub fn build(self) -> Result<AggregateFunctionExpr> {
         let Self {
             fun,
diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs
index 035678fbf1f39..a33339091c85d 100644
--- a/datafusion/physical-expr/src/equivalence/projection.rs
+++ b/datafusion/physical-expr/src/equivalence/projection.rs
@@ -67,8 +67,8 @@ impl ProjectionMapping {
                             let matching_input_field = input_schema.field(idx);
                             if col.name() != matching_input_field.name() {
                                 return internal_err!("Input field name {} does not match with the projection expression {}",
-                                    matching_input_field.name(),col.name())
-                                }
+                                matching_input_field.name(),col.name())
+                            }
                             let matching_input_column =
                                 Column::new(matching_input_field.name(), idx);
                             Ok(Transformed::yes(Arc::new(matching_input_column)))
diff --git a/datafusion/physical-expr/src/equivalence/properties/mod.rs b/datafusion/physical-expr/src/equivalence/properties/mod.rs
index c7c33ba5b2ba5..5b34a02a91424 100644
--- a/datafusion/physical-expr/src/equivalence/properties/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/mod.rs
@@ -546,22 +546,26 @@ impl EquivalenceProperties {
         self.ordering_satisfy_requirement(&sort_requirements)
     }
 
-    /// Checks whether the given sort requirements are satisfied by any of the
-    /// existing orderings.
-    pub fn ordering_satisfy_requirement(&self, reqs: &LexRequirement) -> bool {
-        let mut eq_properties = self.clone();
-        // First, standardize the given requirement:
-        let normalized_reqs = eq_properties.normalize_sort_requirements(reqs);
-
+    /// Returns the number of consecutive requirements (starting from the left)
+    /// that are satisfied by the plan ordering.
+    fn compute_common_sort_prefix_length(
+        &self,
+        normalized_reqs: &LexRequirement,
+    ) -> usize {
         // Check whether given ordering is satisfied by constraints first
-        if self.satisfied_by_constraints(&normalized_reqs) {
-            return true;
+        if self.satisfied_by_constraints(normalized_reqs) {
+            // If the constraints satisfy all requirements, return the full normalized requirements length
+            return normalized_reqs.len();
         }
 
-        for normalized_req in normalized_reqs {
+        let mut eq_properties = self.clone();
+
+        for (i, normalized_req) in normalized_reqs.iter().enumerate() {
             // Check whether given ordering is satisfied
-            if !eq_properties.ordering_satisfy_single(&normalized_req) {
-                return false;
+            if !eq_properties.ordering_satisfy_single(normalized_req) {
+                // As soon as one requirement is not satisfied, return
+                // how many we've satisfied so far
+                return i;
             }
             // Treat satisfied keys as constants in subsequent iterations. We
             // can do this because the "next" key only matters in a lexicographical
@@ -575,10 +579,35 @@ impl EquivalenceProperties {
             // From the analysis above, we know that `[a ASC]` is satisfied. Then,
             // we add column `a` as constant to the algorithm state. This enables us
             // to deduce that `(b + c) ASC` is satisfied, given `a` is constant.
-            eq_properties = eq_properties
-                .with_constants(std::iter::once(ConstExpr::from(normalized_req.expr)));
+            eq_properties = eq_properties.with_constants(std::iter::once(
+                ConstExpr::from(Arc::clone(&normalized_req.expr)),
+            ));
         }
-        true
+
+        // All requirements are satisfied.
+        normalized_reqs.len()
+    }
+
+    /// Determines the longest prefix of `reqs` that is satisfied by the existing ordering.
+    /// Returns that prefix as a new `LexRequirement`, and a boolean indicating if all the requirements are satisfied.
+    pub fn extract_common_sort_prefix(
+        &self,
+        reqs: &LexRequirement,
+    ) -> (LexRequirement, bool) {
+        // First, standardize the given requirement:
+        let normalized_reqs = self.normalize_sort_requirements(reqs);
+
+        let prefix_len = self.compute_common_sort_prefix_length(&normalized_reqs);
+        (
+            LexRequirement::new(normalized_reqs[..prefix_len].to_vec()),
+            prefix_len == normalized_reqs.len(),
+        )
+    }
+
+    /// Checks whether the given sort requirements are satisfied by any of the
+    /// existing orderings.
+    pub fn ordering_satisfy_requirement(&self, reqs: &LexRequirement) -> bool {
+        self.extract_common_sort_prefix(reqs).1
     }
 
     /// Checks if the sort requirements are satisfied by any of the table constraints (primary key or unique).
@@ -1083,7 +1112,7 @@ impl EquivalenceProperties {
     /// # Arguments
     ///
     /// * `mapping` - A reference to `ProjectionMapping` that defines how expressions are mapped
-    ///               in the projection operation
+    ///   in the projection operation
     ///
     /// # Returns
     ///
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index f21d3e7652cdc..6c68d11e2c94c 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -29,7 +29,9 @@ use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
 use arrow::compute::kernels::cmp::*;
 use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar};
 use arrow::compute::kernels::concat_elements::concat_elements_utf8;
-use arrow::compute::{cast, ilike, like, nilike, nlike};
+use arrow::compute::{
+    cast, filter_record_batch, ilike, like, nilike, nlike, SlicesIterator,
+};
 use arrow::datatypes::*;
 use arrow::error::ArrowError;
 use datafusion_common::cast::as_boolean_array;
@@ -358,7 +360,26 @@ impl PhysicalExpr for BinaryExpr {
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
         use arrow::compute::kernels::numeric::*;
 
+        // Evaluate left-hand side expression.
         let lhs = self.left.evaluate(batch)?;
+
+        // Check if we can apply short-circuit evaluation.
+        match check_short_circuit(&lhs, &self.op) {
+            ShortCircuitStrategy::None => {}
+            ShortCircuitStrategy::ReturnLeft => return Ok(lhs),
+            ShortCircuitStrategy::ReturnRight => {
+                let rhs = self.right.evaluate(batch)?;
+                return Ok(rhs);
+            }
+            ShortCircuitStrategy::PreSelection(selection) => {
+                // The function `evaluate_selection` was not called for filtering and calculation,
+                // as it takes into account cases where the selection contains null values.
+                let batch = filter_record_batch(batch, selection)?;
+                let right_ret = self.right.evaluate(&batch)?;
+                return pre_selection_scatter(selection, right_ret);
+            }
+        }
+
         let rhs = self.right.evaluate(batch)?;
         let left_data_type = lhs.data_type();
         let right_data_type = rhs.data_type();
@@ -399,23 +420,19 @@ impl PhysicalExpr for BinaryExpr {
 
         let result_type = self.data_type(input_schema)?;
 
-        // Attempt to use special kernels if one input is scalar and the other is an array
-        let scalar_result = match (&lhs, &rhs) {
-            (ColumnarValue::Array(array), ColumnarValue::Scalar(scalar)) => {
-                // if left is array and right is literal(not NULL) - use scalar operations
-                if scalar.is_null() {
-                    None
-                } else {
-                    self.evaluate_array_scalar(array, scalar.clone())?.map(|r| {
-                        r.and_then(|a| to_result_type_array(&self.op, a, &result_type))
-                    })
+        // If the left-hand side is an array and the right-hand side is a non-null scalar, try the optimized kernel.
+        if let (ColumnarValue::Array(array), ColumnarValue::Scalar(ref scalar)) =
+            (&lhs, &rhs)
+        {
+            if !scalar.is_null() {
+                if let Some(result_array) =
+                    self.evaluate_array_scalar(array, scalar.clone())?
+                {
+                    let final_array = result_array
+                        .and_then(|a| to_result_type_array(&self.op, a, &result_type));
+                    return final_array.map(ColumnarValue::Array);
                 }
             }
-            (_, _) => None, // default to array implementation
-        };
-
-        if let Some(result) = scalar_result {
-            return result.map(ColumnarValue::Array);
         }
 
         // if both arrays or both literals - extract arrays and continue execution
@@ -805,6 +822,201 @@ impl BinaryExpr {
     }
 }
 
+enum ShortCircuitStrategy<'a> {
+    None,
+    ReturnLeft,
+    ReturnRight,
+    PreSelection(&'a BooleanArray),
+}
+
+/// Based on the results calculated from the left side of the short-circuit operation,
+/// if the proportion of `true` is less than 0.2 and the current operation is an `and`,
+/// the `RecordBatch` will be filtered in advance.
+const PRE_SELECTION_THRESHOLD: f32 = 0.2;
+
+/// Checks if a logical operator (`AND`/`OR`) can short-circuit evaluation based on the left-hand side (lhs) result.
+///
+/// Short-circuiting occurs under these circumstances:
+/// - For `AND`:
+///    - if LHS is all false => short-circuit → return LHS
+///    - if LHS is all true  => short-circuit → return RHS
+///    - if LHS is mixed and true_count/sum_count <= [`PRE_SELECTION_THRESHOLD`] -> pre-selection
+/// - For `OR`:
+///    - if LHS is all true  => short-circuit → return LHS
+///    - if LHS is all false => short-circuit → return RHS
+/// # Arguments
+/// * `lhs` - The left-hand side (lhs) columnar value (array or scalar)
+/// * `lhs` - The left-hand side (lhs) columnar value (array or scalar)
+/// * `op` - The logical operator (`AND` or `OR`)
+///
+/// # Implementation Notes
+/// 1. Only works with Boolean-typed arguments (other types automatically return `false`)
+/// 2. Handles both scalar values and array values
+/// 3. For arrays, uses optimized bit counting techniques for boolean arrays
+fn check_short_circuit<'a>(
+    lhs: &'a ColumnarValue,
+    op: &Operator,
+) -> ShortCircuitStrategy<'a> {
+    // Quick reject for non-logical operators,and quick judgment when op is and
+    let is_and = match op {
+        Operator::And => true,
+        Operator::Or => false,
+        _ => return ShortCircuitStrategy::None,
+    };
+
+    // Non-boolean types can't be short-circuited
+    if lhs.data_type() != DataType::Boolean {
+        return ShortCircuitStrategy::None;
+    }
+
+    match lhs {
+        ColumnarValue::Array(array) => {
+            // Fast path for arrays - try to downcast to boolean array
+            if let Ok(bool_array) = as_boolean_array(array) {
+                // Arrays with nulls can't be short-circuited
+                if bool_array.null_count() > 0 {
+                    return ShortCircuitStrategy::None;
+                }
+
+                let len = bool_array.len();
+                if len == 0 {
+                    return ShortCircuitStrategy::None;
+                }
+
+                let true_count = bool_array.values().count_set_bits();
+                if is_and {
+                    // For AND, prioritize checking for all-false (short circuit case)
+                    // Uses optimized false_count() method provided by Arrow
+
+                    // Short circuit if all values are false
+                    if true_count == 0 {
+                        return ShortCircuitStrategy::ReturnLeft;
+                    }
+
+                    // If no false values, then all must be true
+                    if true_count == len {
+                        return ShortCircuitStrategy::ReturnRight;
+                    }
+
+                    // determine if we can pre-selection
+                    if true_count as f32 / len as f32 <= PRE_SELECTION_THRESHOLD {
+                        return ShortCircuitStrategy::PreSelection(bool_array);
+                    }
+                } else {
+                    // For OR, prioritize checking for all-true (short circuit case)
+                    // Uses optimized true_count() method provided by Arrow
+
+                    // Short circuit if all values are true
+                    if true_count == len {
+                        return ShortCircuitStrategy::ReturnLeft;
+                    }
+
+                    // If no true values, then all must be false
+                    if true_count == 0 {
+                        return ShortCircuitStrategy::ReturnRight;
+                    }
+                }
+            }
+        }
+        ColumnarValue::Scalar(scalar) => {
+            // Fast path for scalar values
+            if let ScalarValue::Boolean(Some(is_true)) = scalar {
+                // Return Left for:
+                // - AND with false value
+                // - OR with true value
+                if (is_and && !is_true) || (!is_and && *is_true) {
+                    return ShortCircuitStrategy::ReturnLeft;
+                } else {
+                    return ShortCircuitStrategy::ReturnRight;
+                }
+            }
+        }
+    }
+
+    // If we can't short-circuit, indicate that normal evaluation should continue
+    ShortCircuitStrategy::None
+}
+
+/// Creates a new boolean array based on the evaluation of the right expression,
+/// but only for positions where the left_result is true.
+///
+/// This function is used for short-circuit evaluation optimization of logical AND operations:
+/// - When left_result has few true values, we only evaluate the right expression for those positions
+/// - Values are copied from right_array where left_result is true
+/// - All other positions are filled with false values
+///
+/// # Parameters
+/// - `left_result` Boolean array with selection mask (typically from left side of AND)
+/// - `right_result` Result of evaluating right side of expression (only for selected positions)
+///
+/// # Returns
+/// A combined ColumnarValue with values from right_result where left_result is true
+///
+/// # Example
+///  Initial Data: { 1, 2, 3, 4, 5 }
+///  Left Evaluation
+///     (Condition: Equal to 2 or 3)
+///          ↓
+///  Filtered Data: {2, 3}
+///    Left Bitmap: { 0, 1, 1, 0, 0 }
+///          ↓
+///   Right Evaluation
+///     (Condition: Even numbers)
+///          ↓
+///  Right Data: { 2 }
+///    Right Bitmap: { 1, 0 }
+///          ↓
+///   Combine Results
+///  Final Bitmap: { 0, 1, 0, 0, 0 }
+///
+/// # Note
+/// Perhaps it would be better to modify `left_result` directly without creating a copy?
+/// In practice, `left_result` should have only one owner, so making changes should be safe.
+/// However, this is difficult to achieve under the immutable constraints of [`Arc`] and [`BooleanArray`].
+fn pre_selection_scatter(
+    left_result: &BooleanArray,
+    right_result: ColumnarValue,
+) -> Result<ColumnarValue> {
+    let right_boolean_array = match &right_result {
+        ColumnarValue::Array(array) => array.as_boolean(),
+        ColumnarValue::Scalar(_) => return Ok(right_result),
+    };
+
+    let result_len = left_result.len();
+
+    let mut result_array_builder = BooleanArray::builder(result_len);
+
+    // keep track of current position we have in right boolean array
+    let mut right_array_pos = 0;
+
+    // keep track of how much is filled
+    let mut last_end = 0;
+    SlicesIterator::new(left_result).for_each(|(start, end)| {
+        // the gap needs to be filled with false
+        if start > last_end {
+            result_array_builder.append_n(start - last_end, false);
+        }
+
+        // copy values from right array for this slice
+        let len = end - start;
+        right_boolean_array
+            .slice(right_array_pos, len)
+            .iter()
+            .for_each(|v| result_array_builder.append_option(v));
+
+        right_array_pos += len;
+        last_end = end;
+    });
+
+    // Fill any remaining positions with false
+    if last_end < result_len {
+        result_array_builder.append_n(result_len - last_end, false);
+    }
+    let boolean_result = result_array_builder.finish();
+
+    Ok(ColumnarValue::Array(Arc::new(boolean_result)))
+}
+
 fn concat_elements(left: Arc<dyn Array>, right: Arc<dyn Array>) -> Result<ArrayRef> {
     Ok(match left.data_type() {
         DataType::Utf8 => Arc::new(concat_elements_utf8(
@@ -859,10 +1071,14 @@ pub fn similar_to(
 mod tests {
     use super::*;
     use crate::expressions::{col, lit, try_cast, Column, Literal};
+    use datafusion_expr::lit as expr_lit;
 
     use datafusion_common::plan_datafusion_err;
     use datafusion_physical_expr_common::physical_expr::fmt_sql;
 
+    use crate::planner::logical2physical;
+    use arrow::array::BooleanArray;
+    use datafusion_expr::col as logical_col;
     /// Performs a binary operation, applying any type coercion necessary
     fn binary_op(
         left: Arc<dyn PhysicalExpr>,
@@ -4832,4 +5048,262 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_check_short_circuit() {
+        // Test with non-nullable arrays
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]));
+        let a_array = Int32Array::from(vec![1, 3, 4, 5, 6]);
+        let b_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![Arc::new(a_array), Arc::new(b_array)],
+        )
+        .unwrap();
+
+        // op: AND left: all false
+        let left_expr = logical2physical(&logical_col("a").eq(expr_lit(2)), &schema);
+        let left_value = left_expr.evaluate(&batch).unwrap();
+        assert!(matches!(
+            check_short_circuit(&left_value, &Operator::And),
+            ShortCircuitStrategy::ReturnLeft
+        ));
+
+        // op: AND left: not all false
+        let left_expr = logical2physical(&logical_col("a").eq(expr_lit(3)), &schema);
+        let left_value = left_expr.evaluate(&batch).unwrap();
+        let ColumnarValue::Array(array) = &left_value else {
+            panic!("Expected ColumnarValue::Array");
+        };
+        let ShortCircuitStrategy::PreSelection(value) =
+            check_short_circuit(&left_value, &Operator::And)
+        else {
+            panic!("Expected ShortCircuitStrategy::PreSelection");
+        };
+        let expected_boolean_arr: Vec<_> =
+            as_boolean_array(array).unwrap().iter().collect();
+        let boolean_arr: Vec<_> = value.iter().collect();
+        assert_eq!(expected_boolean_arr, boolean_arr);
+
+        // op: OR left: all true
+        let left_expr = logical2physical(&logical_col("a").gt(expr_lit(0)), &schema);
+        let left_value = left_expr.evaluate(&batch).unwrap();
+        assert!(matches!(
+            check_short_circuit(&left_value, &Operator::Or),
+            ShortCircuitStrategy::ReturnLeft
+        ));
+
+        // op: OR left: not all true
+        let left_expr: Arc<dyn PhysicalExpr> =
+            logical2physical(&logical_col("a").gt(expr_lit(2)), &schema);
+        let left_value = left_expr.evaluate(&batch).unwrap();
+        assert!(matches!(
+            check_short_circuit(&left_value, &Operator::Or),
+            ShortCircuitStrategy::None
+        ));
+
+        // Test with nullable arrays and null values
+        let schema_nullable = Arc::new(Schema::new(vec![
+            Field::new("c", DataType::Boolean, true),
+            Field::new("d", DataType::Boolean, true),
+        ]));
+
+        // Create arrays with null values
+        let c_array = Arc::new(BooleanArray::from(vec![
+            Some(true),
+            Some(false),
+            None,
+            Some(true),
+            None,
+        ])) as ArrayRef;
+        let d_array = Arc::new(BooleanArray::from(vec![
+            Some(false),
+            Some(true),
+            Some(false),
+            None,
+            Some(true),
+        ])) as ArrayRef;
+
+        let batch_nullable = RecordBatch::try_new(
+            Arc::clone(&schema_nullable),
+            vec![Arc::clone(&c_array), Arc::clone(&d_array)],
+        )
+        .unwrap();
+
+        // Case: Mixed values with nulls - shouldn't short-circuit for AND
+        let mixed_nulls = logical2physical(&logical_col("c"), &schema_nullable);
+        let mixed_nulls_value = mixed_nulls.evaluate(&batch_nullable).unwrap();
+        assert!(matches!(
+            check_short_circuit(&mixed_nulls_value, &Operator::And),
+            ShortCircuitStrategy::None
+        ));
+
+        // Case: Mixed values with nulls - shouldn't short-circuit for OR
+        assert!(matches!(
+            check_short_circuit(&mixed_nulls_value, &Operator::Or),
+            ShortCircuitStrategy::None
+        ));
+
+        // Test with all nulls
+        let all_nulls = Arc::new(BooleanArray::from(vec![None, None, None])) as ArrayRef;
+        let null_batch = RecordBatch::try_new(
+            Arc::new(Schema::new(vec![Field::new("e", DataType::Boolean, true)])),
+            vec![all_nulls],
+        )
+        .unwrap();
+
+        let null_expr = logical2physical(&logical_col("e"), &null_batch.schema());
+        let null_value = null_expr.evaluate(&null_batch).unwrap();
+
+        // All nulls shouldn't short-circuit for AND or OR
+        assert!(matches!(
+            check_short_circuit(&null_value, &Operator::And),
+            ShortCircuitStrategy::None
+        ));
+        assert!(matches!(
+            check_short_circuit(&null_value, &Operator::Or),
+            ShortCircuitStrategy::None
+        ));
+
+        // Test with scalar values
+        // Scalar true
+        let scalar_true = ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)));
+        assert!(matches!(
+            check_short_circuit(&scalar_true, &Operator::Or),
+            ShortCircuitStrategy::ReturnLeft
+        )); // Should short-circuit OR
+        assert!(matches!(
+            check_short_circuit(&scalar_true, &Operator::And),
+            ShortCircuitStrategy::ReturnRight
+        )); // Should return the RHS for AND
+
+        // Scalar false
+        let scalar_false = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
+        assert!(matches!(
+            check_short_circuit(&scalar_false, &Operator::And),
+            ShortCircuitStrategy::ReturnLeft
+        )); // Should short-circuit AND
+        assert!(matches!(
+            check_short_circuit(&scalar_false, &Operator::Or),
+            ShortCircuitStrategy::ReturnRight
+        )); // Should return the RHS for OR
+
+        // Scalar null
+        let scalar_null = ColumnarValue::Scalar(ScalarValue::Boolean(None));
+        assert!(matches!(
+            check_short_circuit(&scalar_null, &Operator::And),
+            ShortCircuitStrategy::None
+        ));
+        assert!(matches!(
+            check_short_circuit(&scalar_null, &Operator::Or),
+            ShortCircuitStrategy::None
+        ));
+    }
+
+    /// Test for [pre_selection_scatter]
+    /// Since [check_short_circuit] ensures that the left side does not contain null and is neither all_true nor all_false, as well as not being empty,
+    /// the following tests have been designed:
+    /// 1. Test sparse left with interleaved true/false
+    /// 2. Test multiple consecutive true blocks
+    /// 3. Test multiple consecutive true blocks
+    /// 4. Test single true at first position
+    /// 5. Test single true at last position
+    /// 6. Test nulls in right array
+    /// 7. Test scalar right handling
+    #[test]
+    fn test_pre_selection_scatter() {
+        fn create_bool_array(bools: Vec<bool>) -> BooleanArray {
+            BooleanArray::from(bools.into_iter().map(Some).collect::<Vec<_>>())
+        }
+        // Test sparse left with interleaved true/false
+        {
+            // Left: [T, F, T, F, T]
+            // Right: [F, T, F] (values for 3 true positions)
+            let left = create_bool_array(vec![true, false, true, false, true]);
+            let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![
+                false, true, false,
+            ])));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            let result_arr = result.into_array(left.len()).unwrap();
+
+            let expected = create_bool_array(vec![false, false, true, false, false]);
+            assert_eq!(&expected, result_arr.as_boolean());
+        }
+        // Test multiple consecutive true blocks
+        {
+            // Left: [F, T, T, F, T, T, T]
+            // Right: [T, F, F, T, F]
+            let left =
+                create_bool_array(vec![false, true, true, false, true, true, true]);
+            let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![
+                true, false, false, true, false,
+            ])));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            let result_arr = result.into_array(left.len()).unwrap();
+
+            let expected =
+                create_bool_array(vec![false, true, false, false, false, true, false]);
+            assert_eq!(&expected, result_arr.as_boolean());
+        }
+        // Test single true at first position
+        {
+            // Left: [T, F, F]
+            // Right: [F]
+            let left = create_bool_array(vec![true, false, false]);
+            let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![false])));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            let result_arr = result.into_array(left.len()).unwrap();
+
+            let expected = create_bool_array(vec![false, false, false]);
+            assert_eq!(&expected, result_arr.as_boolean());
+        }
+        // Test single true at last position
+        {
+            // Left: [F, F, T]
+            // Right: [F]
+            let left = create_bool_array(vec![false, false, true]);
+            let right = ColumnarValue::Array(Arc::new(create_bool_array(vec![false])));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            let result_arr = result.into_array(left.len()).unwrap();
+
+            let expected = create_bool_array(vec![false, false, false]);
+            assert_eq!(&expected, result_arr.as_boolean());
+        }
+        // Test nulls in right array
+        {
+            // Left: [F, T, F, T]
+            // Right: [None, Some(false)] (with null at first position)
+            let left = create_bool_array(vec![false, true, false, true]);
+            let right_arr = BooleanArray::from(vec![None, Some(false)]);
+            let right = ColumnarValue::Array(Arc::new(right_arr));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            let result_arr = result.into_array(left.len()).unwrap();
+
+            let expected = BooleanArray::from(vec![
+                Some(false),
+                None, // null from right
+                Some(false),
+                Some(false),
+            ]);
+            assert_eq!(&expected, result_arr.as_boolean());
+        }
+        // Test scalar right handling
+        {
+            // Left: [T, F, T]
+            // Right: Scalar true
+            let left = create_bool_array(vec![true, false, true]);
+            let right = ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)));
+
+            let result = pre_selection_scatter(&left, right).unwrap();
+            assert!(matches!(result, ColumnarValue::Scalar(_)));
+        }
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
new file mode 100644
index 0000000000000..c0a3285f0e781
--- /dev/null
+++ b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -0,0 +1,474 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{
+    any::Any,
+    fmt::Display,
+    hash::Hash,
+    sync::{Arc, RwLock},
+};
+
+use crate::PhysicalExpr;
+use arrow::datatypes::{DataType, Schema};
+use datafusion_common::{
+    tree_node::{Transformed, TransformedResult, TreeNode},
+    Result,
+};
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr_common::physical_expr::{DynEq, DynHash};
+
+/// A dynamic [`PhysicalExpr`] that can be updated by anyone with a reference to it.
+#[derive(Debug)]
+pub struct DynamicFilterPhysicalExpr {
+    /// The original children of this PhysicalExpr, if any.
+    /// This is necessary because the dynamic filter may be initialized with a placeholder (e.g. `lit(true)`)
+    /// and later remapped to the actual expressions that are being filtered.
+    /// But we need to know the children (e.g. columns referenced in the expression) ahead of time to evaluate the expression correctly.
+    children: Vec<Arc<dyn PhysicalExpr>>,
+    /// If any of the children were remapped / modified (e.g. to adjust for projections) we need to keep track of the new children
+    /// so that when we update `current()` in subsequent iterations we can re-apply the replacements.
+    remapped_children: Option<Vec<Arc<dyn PhysicalExpr>>>,
+    /// The source of dynamic filters.
+    inner: Arc<RwLock<Arc<dyn PhysicalExpr>>>,
+    /// For testing purposes track the data type and nullability to make sure they don't change.
+    /// If they do, there's a bug in the implementation.
+    /// But this can have overhead in production, so it's only included in our tests.
+    data_type: Arc<RwLock<Option<DataType>>>,
+    nullable: Arc<RwLock<Option<bool>>>,
+}
+
+impl Hash for DynamicFilterPhysicalExpr {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        let inner = self.current().expect("Failed to get current expression");
+        inner.dyn_hash(state);
+        self.children.dyn_hash(state);
+        self.remapped_children.dyn_hash(state);
+    }
+}
+
+impl PartialEq for DynamicFilterPhysicalExpr {
+    fn eq(&self, other: &Self) -> bool {
+        let inner = self.current().expect("Failed to get current expression");
+        let our_children = self.remapped_children.as_ref().unwrap_or(&self.children);
+        let other_children = other.remapped_children.as_ref().unwrap_or(&other.children);
+        let other = other.current().expect("Failed to get current expression");
+        inner.dyn_eq(other.as_any()) && our_children == other_children
+    }
+}
+
+impl Eq for DynamicFilterPhysicalExpr {}
+
+impl Display for DynamicFilterPhysicalExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let inner = self.current().expect("Failed to get current expression");
+        write!(f, "DynamicFilterPhysicalExpr [ {} ]", inner)
+    }
+}
+
+impl DynamicFilterPhysicalExpr {
+    /// Create a new [`DynamicFilterPhysicalExpr`]
+    /// from an initial expression and a list of children.
+    /// The list of children is provided separately because
+    /// the initial expression may not have the same children.
+    /// For example, if the initial expression is just `true`
+    /// it will not reference any columns, but we may know that
+    /// we are going to replace this expression with a real one
+    /// that does reference certain columns.
+    /// In this case you **must** pass in the columns that will be
+    /// used in the final expression as children to this function
+    /// since DataFusion is generally not compatible with dynamic
+    /// *children* in expressions.
+    ///
+    /// To determine the children you can:
+    ///
+    /// - Use [`collect_columns`] to collect the columns from the expression.
+    /// - Use existing information, such as the sort columns in a `SortExec`.
+    ///
+    /// Generally the important bit is that the *leaf children that reference columns
+    /// do not change* since those will be used to determine what columns need to read or projected
+    /// when evaluating the expression.
+    ///
+    /// [`collect_columns`]: crate::utils::collect_columns
+    #[allow(dead_code)] // Only used in tests for now
+    pub fn new(
+        children: Vec<Arc<dyn PhysicalExpr>>,
+        inner: Arc<dyn PhysicalExpr>,
+    ) -> Self {
+        Self {
+            children,
+            remapped_children: None, // Initially no remapped children
+            inner: Arc::new(RwLock::new(inner)),
+            data_type: Arc::new(RwLock::new(None)),
+            nullable: Arc::new(RwLock::new(None)),
+        }
+    }
+
+    fn remap_children(
+        children: &[Arc<dyn PhysicalExpr>],
+        remapped_children: Option<&Vec<Arc<dyn PhysicalExpr>>>,
+        expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        if let Some(remapped_children) = remapped_children {
+            // Remap the children to the new children
+            // of the expression.
+            expr.transform_up(|child| {
+                // Check if this is any of our original children
+                if let Some(pos) =
+                    children.iter().position(|c| c.as_ref() == child.as_ref())
+                {
+                    // If so, remap it to the current children
+                    // of the expression.
+                    let new_child = Arc::clone(&remapped_children[pos]);
+                    Ok(Transformed::yes(new_child))
+                } else {
+                    // Otherwise, just return the expression
+                    Ok(Transformed::no(child))
+                }
+            })
+            .data()
+        } else {
+            // If we don't have any remapped children, just return the expression
+            Ok(Arc::clone(&expr))
+        }
+    }
+
+    /// Get the current expression.
+    /// This will return the current expression with any children
+    /// remapped to match calls to [`PhysicalExpr::with_new_children`].
+    pub fn current(&self) -> Result<Arc<dyn PhysicalExpr>> {
+        let inner = self
+            .inner
+            .read()
+            .map_err(|_| {
+                datafusion_common::DataFusionError::Execution(
+                    "Failed to acquire read lock for inner".to_string(),
+                )
+            })?
+            .clone();
+        let inner =
+            Self::remap_children(&self.children, self.remapped_children.as_ref(), inner)?;
+        Ok(inner)
+    }
+
+    /// Update the current expression.
+    /// Any children of this expression must be a subset of the original children
+    /// passed to the constructor.
+    /// This should be called e.g.:
+    /// - When we've computed the probe side's hash table in a HashJoinExec
+    /// - After every batch is processed if we update the TopK heap in a SortExec using a TopK approach.
+    #[allow(dead_code)] // Only used in tests for now
+    pub fn update(&self, new_expr: Arc<dyn PhysicalExpr>) -> Result<()> {
+        let mut current = self.inner.write().map_err(|_| {
+            datafusion_common::DataFusionError::Execution(
+                "Failed to acquire write lock for inner".to_string(),
+            )
+        })?;
+        // Remap the children of the new expression to match the original children
+        // We still do this again in `current()` but doing it preventively here
+        // reduces the work needed in some cases if `current()` is called multiple times
+        // and the same externally facing `PhysicalExpr` is used for both `with_new_children` and `update()`.`
+        let new_expr = Self::remap_children(
+            &self.children,
+            self.remapped_children.as_ref(),
+            new_expr,
+        )?;
+        *current = new_expr;
+        Ok(())
+    }
+}
+
+impl PhysicalExpr for DynamicFilterPhysicalExpr {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
+        self.remapped_children
+            .as_ref()
+            .unwrap_or(&self.children)
+            .iter()
+            .collect()
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn PhysicalExpr>>,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        Ok(Arc::new(Self {
+            children: self.children.clone(),
+            remapped_children: Some(children),
+            inner: Arc::clone(&self.inner),
+            data_type: Arc::clone(&self.data_type),
+            nullable: Arc::clone(&self.nullable),
+        }))
+    }
+
+    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
+        let res = self.current()?.data_type(input_schema)?;
+        #[cfg(test)]
+        {
+            use datafusion_common::internal_err;
+            // Check if the data type has changed.
+            let mut data_type_lock = self
+                .data_type
+                .write()
+                .expect("Failed to acquire write lock for data_type");
+            if let Some(existing) = &*data_type_lock {
+                if existing != &res {
+                    // If the data type has changed, we have a bug.
+                    return internal_err!(
+                        "DynamicFilterPhysicalExpr data type has changed unexpectedly. \
+                        Expected: {existing:?}, Actual: {res:?}"
+                    );
+                }
+            } else {
+                *data_type_lock = Some(res.clone());
+            }
+        }
+        Ok(res)
+    }
+
+    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
+        let res = self.current()?.nullable(input_schema)?;
+        #[cfg(test)]
+        {
+            use datafusion_common::internal_err;
+            // Check if the nullability has changed.
+            let mut nullable_lock = self
+                .nullable
+                .write()
+                .expect("Failed to acquire write lock for nullable");
+            if let Some(existing) = *nullable_lock {
+                if existing != res {
+                    // If the nullability has changed, we have a bug.
+                    return internal_err!(
+                        "DynamicFilterPhysicalExpr nullability has changed unexpectedly. \
+                        Expected: {existing}, Actual: {res}"
+                    );
+                }
+            } else {
+                *nullable_lock = Some(res);
+            }
+        }
+        Ok(res)
+    }
+
+    fn evaluate(
+        &self,
+        batch: &arrow::record_batch::RecordBatch,
+    ) -> Result<ColumnarValue> {
+        let current = self.current()?;
+        #[cfg(test)]
+        {
+            // Ensure that we are not evaluating after the expression has changed.
+            let schema = batch.schema();
+            self.nullable(&schema)?;
+            self.data_type(&schema)?;
+        };
+        current.evaluate(batch)
+    }
+
+    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let inner = self.current().map_err(|_| std::fmt::Error)?;
+        inner.fmt_sql(f)
+    }
+
+    fn snapshot(&self) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+        // Return the current expression as a snapshot.
+        Ok(Some(self.current()?))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::{
+        expressions::{col, lit, BinaryExpr},
+        utils::reassign_predicate_columns,
+    };
+    use arrow::{
+        array::RecordBatch,
+        datatypes::{DataType, Field, Schema},
+    };
+    use datafusion_common::ScalarValue;
+
+    use super::*;
+
+    #[test]
+    fn test_remap_children() {
+        let table_schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]));
+        let expr = Arc::new(BinaryExpr::new(
+            col("a", &table_schema).unwrap(),
+            datafusion_expr::Operator::Eq,
+            lit(42) as Arc<dyn PhysicalExpr>,
+        ));
+        let dynamic_filter = Arc::new(DynamicFilterPhysicalExpr::new(
+            vec![col("a", &table_schema).unwrap()],
+            expr as Arc<dyn PhysicalExpr>,
+        ));
+        // Simulate two `ParquetSource` files with different filter schemas
+        // Both of these should hit the same inner `PhysicalExpr` even after `update()` is called
+        // and be able to remap children independently.
+        let filter_schema_1 = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+        ]));
+        let filter_schema_2 = Arc::new(Schema::new(vec![
+            Field::new("b", DataType::Int32, false),
+            Field::new("a", DataType::Int32, false),
+        ]));
+        // Each ParquetExec calls `with_new_children` on the DynamicFilterPhysicalExpr
+        // and remaps the children to the file schema.
+        let dynamic_filter_1 = reassign_predicate_columns(
+            Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
+            &filter_schema_1,
+            false,
+        )
+        .unwrap();
+        let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42) }, fail_on_overflow: false }"#);
+        let dynamic_filter_2 = reassign_predicate_columns(
+            Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
+            &filter_schema_2,
+            false,
+        )
+        .unwrap();
+        let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42) }, fail_on_overflow: false }"#);
+        // Both filters allow evaluating the same expression
+        let batch_1 = RecordBatch::try_new(
+            Arc::clone(&filter_schema_1),
+            vec![
+                // a
+                ScalarValue::Int32(Some(42)).to_array_of_size(1).unwrap(),
+                // b
+                ScalarValue::Int32(Some(43)).to_array_of_size(1).unwrap(),
+            ],
+        )
+        .unwrap();
+        let batch_2 = RecordBatch::try_new(
+            Arc::clone(&filter_schema_2),
+            vec![
+                // b
+                ScalarValue::Int32(Some(43)).to_array_of_size(1).unwrap(),
+                // a
+                ScalarValue::Int32(Some(42)).to_array_of_size(1).unwrap(),
+            ],
+        )
+        .unwrap();
+        // Evaluate the expression on both batches
+        let result_1 = dynamic_filter_1.evaluate(&batch_1).unwrap();
+        let result_2 = dynamic_filter_2.evaluate(&batch_2).unwrap();
+        // Check that the results are the same
+        let ColumnarValue::Array(arr_1) = result_1 else {
+            panic!("Expected ColumnarValue::Array");
+        };
+        let ColumnarValue::Array(arr_2) = result_2 else {
+            panic!("Expected ColumnarValue::Array");
+        };
+        assert!(arr_1.eq(&arr_2));
+        let expected = ScalarValue::Boolean(Some(true))
+            .to_array_of_size(1)
+            .unwrap();
+        assert!(arr_1.eq(&expected));
+        // Now lets update the expression
+        // Note that we update the *original* expression and that should be reflected in both the derived expressions
+        let new_expr = Arc::new(BinaryExpr::new(
+            col("a", &table_schema).unwrap(),
+            datafusion_expr::Operator::Gt,
+            lit(43) as Arc<dyn PhysicalExpr>,
+        ));
+        dynamic_filter
+            .update(Arc::clone(&new_expr) as Arc<dyn PhysicalExpr>)
+            .expect("Failed to update expression");
+        // Now we should be able to evaluate the new expression on both batches
+        let result_1 = dynamic_filter_1.evaluate(&batch_1).unwrap();
+        let result_2 = dynamic_filter_2.evaluate(&batch_2).unwrap();
+        // Check that the results are the same
+        let ColumnarValue::Array(arr_1) = result_1 else {
+            panic!("Expected ColumnarValue::Array");
+        };
+        let ColumnarValue::Array(arr_2) = result_2 else {
+            panic!("Expected ColumnarValue::Array");
+        };
+        assert!(arr_1.eq(&arr_2));
+        let expected = ScalarValue::Boolean(Some(false))
+            .to_array_of_size(1)
+            .unwrap();
+        assert!(arr_1.eq(&expected));
+    }
+
+    #[test]
+    fn test_snapshot() {
+        let expr = lit(42) as Arc<dyn PhysicalExpr>;
+        let dynamic_filter = DynamicFilterPhysicalExpr::new(vec![], Arc::clone(&expr));
+
+        // Take a snapshot of the current expression
+        let snapshot = dynamic_filter.snapshot().unwrap();
+        assert_eq!(snapshot, Some(expr));
+
+        // Update the current expression
+        let new_expr = lit(100) as Arc<dyn PhysicalExpr>;
+        dynamic_filter.update(Arc::clone(&new_expr)).unwrap();
+        // Take another snapshot
+        let snapshot = dynamic_filter.snapshot().unwrap();
+        assert_eq!(snapshot, Some(new_expr));
+    }
+
+    #[test]
+    fn test_dynamic_filter_physical_expr_misbehaves_data_type_nullable() {
+        let dynamic_filter =
+            DynamicFilterPhysicalExpr::new(vec![], lit(42) as Arc<dyn PhysicalExpr>);
+
+        // First call to data_type and nullable should set the initial values.
+        let initial_data_type = dynamic_filter.data_type(&Schema::empty()).unwrap();
+        let initial_nullable = dynamic_filter.nullable(&Schema::empty()).unwrap();
+
+        // Call again and expect no change.
+        let second_data_type = dynamic_filter.data_type(&Schema::empty()).unwrap();
+        let second_nullable = dynamic_filter.nullable(&Schema::empty()).unwrap();
+        assert_eq!(
+            initial_data_type, second_data_type,
+            "Data type should not change on second call."
+        );
+        assert_eq!(
+            initial_nullable, second_nullable,
+            "Nullability should not change on second call."
+        );
+
+        // Now change the current expression to something else.
+        dynamic_filter
+            .update(lit(ScalarValue::Utf8(None)) as Arc<dyn PhysicalExpr>)
+            .expect("Failed to update expression");
+        // Check that we error if we call data_type, nullable or evaluate after changing the expression.
+        assert!(
+            dynamic_filter.data_type(&Schema::empty()).is_err(),
+            "Expected err when data_type is called after changing the expression."
+        );
+        assert!(
+            dynamic_filter.nullable(&Schema::empty()).is_err(),
+            "Expected err when nullable is called after changing the expression."
+        );
+        let batch = RecordBatch::new_empty(Arc::new(Schema::empty()));
+        assert!(
+            dynamic_filter.evaluate(&batch).is_err(),
+            "Expected err when evaluate is called after changing the expression."
+        );
+    }
+}
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index f00b49f503141..d77207fbbcd76 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -22,6 +22,7 @@ mod binary;
 mod case;
 mod cast;
 mod column;
+mod dynamic_filters;
 mod in_list;
 mod is_not_null;
 mod is_null;
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 93ced2eb628d8..9f795c81fa48e 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -68,7 +68,7 @@ pub use planner::{create_physical_expr, create_physical_exprs};
 pub use scalar_function::ScalarFunctionExpr;
 
 pub use datafusion_physical_expr_common::utils::reverse_order_bys;
-pub use utils::split_conjunction;
+pub use utils::{conjunction, conjunction_opt, split_conjunction};
 
 // For backwards compatibility
 pub mod tree_node {
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index fac83dfc45247..8660bff796d5a 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -102,7 +102,7 @@ use datafusion_expr::{
 ///
 /// * `e` - The logical expression
 /// * `input_dfschema` - The DataFusion schema for the input, used to resolve `Column` references
-///                      to qualified or unqualified fields by name.
+///   to qualified or unqualified fields by name.
 pub fn create_physical_expr(
     e: &Expr,
     input_dfschema: &DFSchema,
diff --git a/datafusion/physical-expr/src/utils/mod.rs b/datafusion/physical-expr/src/utils/mod.rs
index 7e4c7f0e10ba8..b4d0758fd2e81 100644
--- a/datafusion/physical-expr/src/utils/mod.rs
+++ b/datafusion/physical-expr/src/utils/mod.rs
@@ -47,6 +47,31 @@ pub fn split_conjunction(
     split_impl(Operator::And, predicate, vec![])
 }
 
+/// Create a conjunction of the given predicates.
+/// If the input is empty, return a literal true.
+/// If the input contains a single predicate, return the predicate.
+/// Otherwise, return a conjunction of the predicates (e.g. `a AND b AND c`).
+pub fn conjunction(
+    predicates: impl IntoIterator<Item = Arc<dyn PhysicalExpr>>,
+) -> Arc<dyn PhysicalExpr> {
+    conjunction_opt(predicates).unwrap_or_else(|| crate::expressions::lit(true))
+}
+
+/// Create a conjunction of the given predicates.
+/// If the input is empty or the return None.
+/// If the input contains a single predicate, return Some(predicate).
+/// Otherwise, return a Some(..) of a conjunction of the predicates (e.g. `Some(a AND b AND c)`).
+pub fn conjunction_opt(
+    predicates: impl IntoIterator<Item = Arc<dyn PhysicalExpr>>,
+) -> Option<Arc<dyn PhysicalExpr>> {
+    predicates
+        .into_iter()
+        .fold(None, |acc, predicate| match acc {
+            None => Some(predicate),
+            Some(acc) => Some(Arc::new(BinaryExpr::new(acc, Operator::And, predicate))),
+        })
+}
+
 /// Assume the predicate is in the form of DNF, split the predicate to a Vec of PhysicalExprs.
 ///
 /// For example, split "a1 = a2 OR b1 <= b2 OR c1 != c2" into ["a1 = a2", "b1 <= b2", "c1 != c2"]
diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs
index 0d3d83c58373f..28ee10eb650a0 100644
--- a/datafusion/physical-optimizer/src/aggregate_statistics.rs
+++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs
@@ -42,6 +42,7 @@ impl AggregateStatistics {
 
 impl PhysicalOptimizerRule for AggregateStatistics {
     #[cfg_attr(feature = "recursive_protection", recursive::recursive)]
+    #[allow(clippy::only_used_in_recursion)] // See https://github.com/rust-lang/rust-clippy/issues/14566
     fn optimize(
         &self,
         plan: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs
index 5e76edad1f569..523762401dfad 100644
--- a/datafusion/physical-optimizer/src/enforce_distribution.rs
+++ b/datafusion/physical-optimizer/src/enforce_distribution.rs
@@ -837,7 +837,7 @@ fn new_join_conditions(
 ///
 /// * `input`: Current node.
 /// * `n_target`: desired target partition number, if partition number of the
-///    current executor is less than this value. Partition number will be increased.
+///   current executor is less than this value. Partition number will be increased.
 ///
 /// # Returns
 ///
@@ -880,7 +880,7 @@ fn add_roundrobin_on_top(
 /// * `input`: Current node.
 /// * `hash_exprs`: Stores Physical Exprs that are used during hashing.
 /// * `n_target`: desired target partition number, if partition number of the
-///    current executor is less than this value. Partition number will be increased.
+///   current executor is less than this value. Partition number will be increased.
 ///
 /// # Returns
 ///
@@ -1018,7 +1018,7 @@ fn remove_dist_changing_operators(
 /// "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
 /// "      DataSourceExec: file_groups={2 groups: \[\[x], \[y]]}, projection=\[a, b, c, d, e], output_ordering=\[a@0 ASC], file_type=parquet",
 /// ```
-fn replace_order_preserving_variants(
+pub fn replace_order_preserving_variants(
     mut context: DistributionContext,
 ) -> Result<DistributionContext> {
     context.children = context
@@ -1035,7 +1035,10 @@ fn replace_order_preserving_variants(
 
     if is_sort_preserving_merge(&context.plan) {
         let child_plan = Arc::clone(&context.children[0].plan);
-        context.plan = Arc::new(CoalescePartitionsExec::new(child_plan));
+        // It's safe to unwrap because `CoalescePartitionsExec` supports `fetch`.
+        context.plan = CoalescePartitionsExec::new(child_plan)
+            .with_fetch(context.plan.fetch())
+            .unwrap();
         return Ok(context);
     } else if let Some(repartition) =
         context.plan.as_any().downcast_ref::<RepartitionExec>()
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
index 20733b65692fc..b606aa85c1e16 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
@@ -400,6 +400,7 @@ pub fn parallelize_sorts(
             ),
         ))
     } else if is_coalesce_partitions(&requirements.plan) {
+        let fetch = requirements.plan.fetch();
         // There is an unnecessary `CoalescePartitionsExec` in the plan.
         // This will handle the recursive `CoalescePartitionsExec` plans.
         requirements = remove_bottleneck_in_subplan(requirements)?;
@@ -408,7 +409,10 @@ pub fn parallelize_sorts(
 
         Ok(Transformed::yes(
             PlanWithCorrespondingCoalescePartitions::new(
-                Arc::new(CoalescePartitionsExec::new(Arc::clone(&requirements.plan))),
+                // Safe to unwrap, because `CoalescePartitionsExec` has a fetch
+                CoalescePartitionsExec::new(Arc::clone(&requirements.plan))
+                    .with_fetch(fetch)
+                    .unwrap(),
                 false,
                 vec![requirements],
             ),
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
index 2c5c0d4d510ec..7fe62a146afb9 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/replace_with_order_preserving_variants.rs
@@ -27,7 +27,7 @@ use crate::utils::{
 
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::Transformed;
-use datafusion_common::Result;
+use datafusion_common::{internal_err, Result};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::execution_plan::EmissionType;
@@ -93,7 +93,7 @@ pub fn update_order_preservation_ctx_children_data(opc: &mut OrderPreservationCo
 /// inside `sort_input` with their order-preserving variants. This will
 /// generate an alternative plan, which will be accepted or rejected later on
 /// depending on whether it helps us remove a `SortExec`.
-fn plan_with_order_preserving_variants(
+pub fn plan_with_order_preserving_variants(
     mut sort_input: OrderPreservationContext,
     // Flag indicating that it is desirable to replace `RepartitionExec`s with
     // `SortPreservingRepartitionExec`s:
@@ -138,6 +138,19 @@ fn plan_with_order_preserving_variants(
     } else if is_coalesce_partitions(&sort_input.plan) && is_spm_better {
         let child = &sort_input.children[0].plan;
         if let Some(ordering) = child.output_ordering() {
+            let mut fetch = fetch;
+            if let Some(coalesce_fetch) = sort_input.plan.fetch() {
+                if let Some(sort_fetch) = fetch {
+                    if coalesce_fetch < sort_fetch {
+                        return internal_err!(
+                                "CoalescePartitionsExec fetch [{:?}] should be greater than or equal to SortExec fetch [{:?}]", coalesce_fetch, sort_fetch
+                            );
+                    }
+                } else {
+                    // If the sort node does not have a fetch, we need to keep the coalesce node's fetch.
+                    fetch = Some(coalesce_fetch);
+                }
+            };
             // When the input of a `CoalescePartitionsExec` has an ordering,
             // replace it with a `SortPreservingMergeExec` if appropriate:
             let spm = SortPreservingMergeExec::new(ordering.clone(), Arc::clone(child))
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 35503f3b0b5f9..57dac21b6eeed 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -36,6 +36,7 @@ pub mod optimizer;
 pub mod output_requirements;
 pub mod projection_pushdown;
 pub mod pruning;
+pub mod push_down_filter;
 pub mod sanity_checker;
 pub mod topk_aggregation;
 pub mod update_aggr_exprs;
diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs
index 5887cb51a727b..7469c3af9344c 100644
--- a/datafusion/physical-optimizer/src/limit_pushdown.rs
+++ b/datafusion/physical-optimizer/src/limit_pushdown.rs
@@ -246,16 +246,7 @@ pub fn pushdown_limit_helper(
                 Ok((Transformed::no(pushdown_plan), global_state))
             }
         } else {
-            // Add fetch or a `LimitExec`:
-            // If the plan's children have limit and the child's limit < parent's limit, we shouldn't change the global state to true,
-            // because the children limit will be overridden if the global state is changed.
-            if !pushdown_plan
-                .children()
-                .iter()
-                .any(|&child| extract_limit(child).is_some())
-            {
-                global_state.satisfied = true;
-            }
+            global_state.satisfied = true;
             pushdown_plan = if let Some(plan_with_fetch) = maybe_fetchable {
                 if global_skip > 0 {
                     add_global_limit(plan_with_fetch, global_skip, Some(global_fetch))
diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs
index bab31150e2508..d4ff7d6b9e153 100644
--- a/datafusion/physical-optimizer/src/optimizer.rs
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -30,6 +30,7 @@ use crate::limit_pushdown::LimitPushdown;
 use crate::limited_distinct_aggregation::LimitedDistinctAggregation;
 use crate::output_requirements::OutputRequirements;
 use crate::projection_pushdown::ProjectionPushdown;
+use crate::push_down_filter::PushdownFilter;
 use crate::sanity_checker::SanityCheckPlan;
 use crate::topk_aggregation::TopKAggregation;
 use crate::update_aggr_exprs::OptimizeAggregateOrder;
@@ -121,6 +122,10 @@ impl PhysicalOptimizer {
             // into an `order by max(x) limit y`. In this case it will copy the limit value down
             // to the aggregation, allowing it to use only y number of accumulators.
             Arc::new(TopKAggregation::new()),
+            // The FilterPushdown rule tries to push down filters as far as it can.
+            // For example, it will push down filtering from a `FilterExec` to
+            // a `DataSourceExec`, or from a `TopK`'s current state to a `DataSourceExec`.
+            Arc::new(PushdownFilter::new()),
             // The LimitPushdown rule tries to push limits down as far as possible,
             // replacing operators with fetching variants, or adding limits
             // past operators that support limit pushdown.
diff --git a/datafusion/physical-optimizer/src/pruning.rs b/datafusion/physical-optimizer/src/pruning.rs
index b5287f3d33f3c..1dd168f181676 100644
--- a/datafusion/physical-optimizer/src/pruning.rs
+++ b/datafusion/physical-optimizer/src/pruning.rs
@@ -41,6 +41,7 @@ use datafusion_common::{Column, DFSchema};
 use datafusion_expr_common::operator::Operator;
 use datafusion_physical_expr::utils::{collect_columns, Guarantee, LiteralGuarantee};
 use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef};
+use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr;
 use datafusion_physical_plan::{ColumnarValue, PhysicalExpr};
 
 /// A source of runtime statistical information to [`PruningPredicate`]s.
@@ -312,13 +313,13 @@ pub trait PruningStatistics {
 /// * `true`: there MAY be rows that pass the predicate, **KEEPS** the container
 ///
 /// * `NULL`: there MAY be rows that pass the predicate, **KEEPS** the container
-///           Note that rewritten predicate can evaluate to NULL when some of
-///           the min/max values are not known. *Note that this is different than
-///           the SQL filter semantics where `NULL` means the row is filtered
-///           out.*
+///   Note that rewritten predicate can evaluate to NULL when some of
+///   the min/max values are not known. *Note that this is different than
+///   the SQL filter semantics where `NULL` means the row is filtered
+///   out.*
 ///
 /// * `false`: there are no rows that could possibly match the predicate,
-///            **PRUNES** the container
+///   **PRUNES** the container
 ///
 /// For example, given a column `x`, the `x_min`, `x_max`, `x_null_count`, and
 /// `x_row_count` represent the minimum and maximum values, the null count of
@@ -527,6 +528,9 @@ impl PruningPredicate {
     /// See the struct level documentation on [`PruningPredicate`] for more
     /// details.
     pub fn try_new(expr: Arc<dyn PhysicalExpr>, schema: SchemaRef) -> Result<Self> {
+        // Get a (simpler) snapshot of the physical expr here to use with `PruningPredicate`
+        // which does not handle dynamic exprs  in general
+        let expr = snapshot_physical_expr(expr)?;
         let unhandled_hook = Arc::new(ConstantUnhandledPredicateHook::default()) as _;
 
         // build predicate expression once
diff --git a/datafusion/physical-optimizer/src/push_down_filter.rs b/datafusion/physical-optimizer/src/push_down_filter.rs
new file mode 100644
index 0000000000000..80201454d06d4
--- /dev/null
+++ b/datafusion/physical-optimizer/src/push_down_filter.rs
@@ -0,0 +1,535 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::PhysicalOptimizerRule;
+
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion_common::{config::ConfigOptions, Result};
+use datafusion_physical_expr::conjunction;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::filter_pushdown::{
+    FilterDescription, FilterPushdownResult, FilterPushdownSupport,
+};
+use datafusion_physical_plan::tree_node::PlanContext;
+use datafusion_physical_plan::ExecutionPlan;
+
+/// Attempts to recursively push given filters from the top of the tree into leafs.
+///
+/// # Default Implementation
+///
+/// The default implementation in [`ExecutionPlan::try_pushdown_filters`] is a no-op
+/// that assumes that:
+///
+/// * Parent filters can't be passed onto children.
+/// * This node has no filters to contribute.
+///
+/// # Example: Push filter into a `DataSourceExec`
+///
+/// For example, consider the following plan:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ CoalesceBatchesExec  │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │      FilterExec      │
+/// │  filters = [ id=1]   │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// └──────────────────────┘
+/// ```
+///
+/// Our goal is to move the `id = 1` filter from the [`FilterExec`] node to the `DataSourceExec` node.
+///
+/// If this filter is selective pushing it into the scan can avoid massive
+/// amounts of data being read from the source (the projection is `*` so all
+/// matching columns are read).
+///
+/// The new plan looks like:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ CoalesceBatchesExec  │
+/// └──────────────────────┘
+///           │
+///           ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// │   filters = [ id=1]  │
+/// └──────────────────────┘
+/// ```
+///
+/// # Example: Push filters with `ProjectionExec`
+///
+/// Let's consider a more complex example involving a [`ProjectionExec`]
+/// node in between the [`FilterExec`] and `DataSourceExec` nodes that
+/// creates a new column that the filter depends on.
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ CoalesceBatchesExec  │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │      FilterExec      │
+/// │    filters =         │
+/// │     [cost>50,id=1]   │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │    ProjectionExec    │
+/// │ cost = price * 1.2   │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// └──────────────────────┘
+/// ```
+///
+/// We want to push down the filters `[id=1]` to the `DataSourceExec` node,
+/// but can't push down `cost>50` because it requires the [`ProjectionExec`]
+/// node to be executed first. A simple thing to do would be to split up the
+/// filter into two separate filters and push down the first one:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ CoalesceBatchesExec  │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │      FilterExec      │
+/// │    filters =         │
+/// │     [cost>50]        │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │    ProjectionExec    │
+/// │ cost = price * 1.2   │
+/// └──────────────────────┘
+///             │
+///             ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// │   filters = [ id=1]  │
+/// └──────────────────────┘
+/// ```
+///
+/// We can actually however do better by pushing down `price * 1.2 > 50`
+/// instead of `cost > 50`:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ CoalesceBatchesExec  │
+/// └──────────────────────┘
+///            │
+///            ▼
+/// ┌──────────────────────┐
+/// │    ProjectionExec    │
+/// │ cost = price * 1.2   │
+/// └──────────────────────┘
+///            │
+///            ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// │   filters = [id=1,   │
+/// │   price * 1.2 > 50]  │
+/// └──────────────────────┘
+/// ```
+///
+/// # Example: Push filters within a subtree
+///
+/// There are also cases where we may be able to push down filters within a
+/// subtree but not the entire tree. A good example of this is aggregation
+/// nodes:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ ProjectionExec       │
+/// │ projection = *       │
+/// └──────────────────────┘
+///           │
+///           ▼
+/// ┌──────────────────────┐
+/// │ FilterExec           │
+/// │ filters = [sum > 10] │
+/// └──────────────────────┘
+///           │
+///           ▼
+/// ┌───────────────────────┐
+/// │     AggregateExec     │
+/// │    group by = [id]    │
+/// │    aggregate =        │
+/// │      [sum(price)]     │
+/// └───────────────────────┘
+///           │
+///           ▼
+/// ┌──────────────────────┐
+/// │ FilterExec           │
+/// │ filters = [id=1]     │
+/// └──────────────────────┘
+///          │
+///          ▼
+/// ┌──────────────────────┐
+/// │ DataSourceExec       │
+/// │ projection = *       │
+/// └──────────────────────┘
+/// ```
+///
+/// The transformation here is to push down the `id=1` filter to the
+/// `DataSourceExec` node:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │ ProjectionExec       │
+/// │ projection = *       │
+/// └──────────────────────┘
+///           │
+///           ▼
+/// ┌──────────────────────┐
+/// │ FilterExec           │
+/// │ filters = [sum > 10] │
+/// └──────────────────────┘
+///           │
+///           ▼
+/// ┌───────────────────────┐
+/// │     AggregateExec     │
+/// │    group by = [id]    │
+/// │    aggregate =        │
+/// │      [sum(price)]     │
+/// └───────────────────────┘
+///           │
+///           ▼
+/// ┌──────────────────────┐
+/// │ DataSourceExec       │
+/// │ projection = *       │
+/// │ filters = [id=1]     │
+/// └──────────────────────┘
+/// ```
+///
+/// The point here is that:
+/// 1. We cannot push down `sum > 10` through the [`AggregateExec`] node into the `DataSourceExec` node.
+///    Any filters above the [`AggregateExec`] node are not pushed down.
+///    This is determined by calling [`ExecutionPlan::try_pushdown_filters`] on the [`AggregateExec`] node.
+/// 2. We need to keep recursing into the tree so that we can discover the other [`FilterExec`] node and push
+///    down the `id=1` filter.
+///
+/// # Example: Push filters through Joins
+///
+/// It is also possible to push down filters through joins and filters that
+/// originate from joins. For example, a hash join where we build a hash
+/// table of the left side and probe the right side (ignoring why we would
+/// choose this order, typically it depends on the size of each table,
+/// etc.).
+///
+/// ```text
+///              ┌─────────────────────┐
+///              │     FilterExec      │
+///              │ filters =           │
+///              │  [d.size > 100]     │
+///              └─────────────────────┘
+///                         │
+///                         │
+///              ┌──────────▼──────────┐
+///              │                     │
+///              │    HashJoinExec     │
+///              │ [u.dept@hash(d.id)] │
+///              │                     │
+///              └─────────────────────┘
+///                         │
+///            ┌────────────┴────────────┐
+/// ┌──────────▼──────────┐   ┌──────────▼──────────┐
+/// │   DataSourceExec    │   │   DataSourceExec    │
+/// │  alias [users as u] │   │  alias [dept as d]  │
+/// │                     │   │                     │
+/// └─────────────────────┘   └─────────────────────┘
+/// ```
+///
+/// There are two pushdowns we can do here:
+/// 1. Push down the `d.size > 100` filter through the `HashJoinExec` node to the `DataSourceExec`
+///    node for the `departments` table.
+/// 2. Push down the hash table state from the `HashJoinExec` node to the `DataSourceExec` node to avoid reading
+///    rows from the `users` table that will be eliminated by the join.
+///    This can be done via a bloom filter or similar and is not (yet) supported
+///    in DataFusion. See <https://github.com/apache/datafusion/issues/7955>.
+///
+/// ```text
+///              ┌─────────────────────┐
+///              │                     │
+///              │    HashJoinExec     │
+///              │ [u.dept@hash(d.id)] │
+///              │                     │
+///              └─────────────────────┘
+///                         │
+///            ┌────────────┴────────────┐
+/// ┌──────────▼──────────┐   ┌──────────▼──────────┐
+/// │   DataSourceExec    │   │   DataSourceExec    │
+/// │  alias [users as u] │   │  alias [dept as d]  │
+/// │ filters =           │   │  filters =          │
+/// │   [depg@hash(d.id)] │   │    [ d.size > 100]  │
+/// └─────────────────────┘   └─────────────────────┘
+/// ```
+///
+/// You may notice in this case that the filter is *dynamic*: the hash table
+/// is built _after_ the `departments` table is read and at runtime. We
+/// don't have a concrete `InList` filter or similar to push down at
+/// optimization time. These sorts of dynamic filters are handled by
+/// building a specialized [`PhysicalExpr`] that can be evaluated at runtime
+/// and internally maintains a reference to the hash table or other state.
+///
+/// To make working with these sorts of dynamic filters more tractable we have the method [`PhysicalExpr::snapshot`]
+/// which attempts to simplify a dynamic filter into a "basic" non-dynamic filter.
+/// For a join this could mean converting it to an `InList` filter or a min/max filter for example.
+/// See `datafusion/physical-plan/src/dynamic_filters.rs` for more details.
+///
+/// # Example: Push TopK filters into Scans
+///
+/// Another form of dynamic filter is pushing down the state of a `TopK`
+/// operator for queries like `SELECT * FROM t ORDER BY id LIMIT 10`:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │       TopK           │
+/// │     limit = 10       │
+/// │   order by = [id]    │
+/// └──────────────────────┘
+///            │
+///            ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// └──────────────────────┘
+/// ```
+///
+/// We can avoid large amounts of data processing by transforming this into:
+///
+/// ```text
+/// ┌──────────────────────┐
+/// │       TopK           │
+/// │     limit = 10       │
+/// │   order by = [id]    │
+/// └──────────────────────┘
+///            │
+///            ▼
+/// ┌──────────────────────┐
+/// │    DataSourceExec    │
+/// │    projection = *    │
+/// │ filters =            │
+/// │    [id < @ TopKHeap] │
+/// └──────────────────────┘
+/// ```
+///
+/// Now as we fill our `TopK` heap we can push down the state of the heap to
+/// the `DataSourceExec` node to avoid reading files / row groups / pages /
+/// rows that could not possibly be in the top 10.
+///
+/// This is not yet implemented in DataFusion. See
+/// <https://github.com/apache/datafusion/issues/15037>
+///
+/// [`PhysicalExpr`]: datafusion_physical_plan::PhysicalExpr
+/// [`PhysicalExpr::snapshot`]: datafusion_physical_plan::PhysicalExpr::snapshot
+/// [`FilterExec`]: datafusion_physical_plan::filter::FilterExec
+/// [`ProjectionExec`]: datafusion_physical_plan::projection::ProjectionExec
+/// [`AggregateExec`]: datafusion_physical_plan::aggregates::AggregateExec
+#[derive(Debug)]
+pub struct PushdownFilter {}
+
+impl Default for PushdownFilter {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+pub type FilterDescriptionContext = PlanContext<FilterDescription>;
+
+impl PhysicalOptimizerRule for PushdownFilter {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let context = FilterDescriptionContext::new_default(plan);
+
+        context
+            .transform_up(|node| {
+                if node.plan.as_any().downcast_ref::<FilterExec>().is_some() {
+                    let initial_plan = Arc::clone(&node.plan);
+                    let mut accept_updated = false;
+                    let updated_node = node.transform_down(|filter_node| {
+                        Self::try_pushdown(filter_node, config, &mut accept_updated)
+                    });
+
+                    if accept_updated {
+                        updated_node
+                    } else {
+                        Ok(Transformed::no(FilterDescriptionContext::new_default(
+                            initial_plan,
+                        )))
+                    }
+                }
+                // Other filter introducing operators extends here
+                else {
+                    Ok(Transformed::no(node))
+                }
+            })
+            .map(|updated| updated.data.plan)
+    }
+
+    fn name(&self) -> &str {
+        "PushdownFilter"
+    }
+
+    fn schema_check(&self) -> bool {
+        true // Filter pushdown does not change the schema of the plan
+    }
+}
+
+impl PushdownFilter {
+    pub fn new() -> Self {
+        Self {}
+    }
+
+    fn try_pushdown(
+        mut node: FilterDescriptionContext,
+        config: &ConfigOptions,
+        accept_updated: &mut bool,
+    ) -> Result<Transformed<FilterDescriptionContext>> {
+        let initial_description = FilterDescription {
+            filters: node.data.take_description(),
+        };
+
+        let FilterPushdownResult {
+            support,
+            remaining_description,
+        } = node
+            .plan
+            .try_pushdown_filters(initial_description, config)?;
+
+        match support {
+            FilterPushdownSupport::Supported {
+                mut child_descriptions,
+                op,
+                revisit,
+            } => {
+                if revisit {
+                    // This check handles cases where the current operator is entirely removed
+                    // from the plan and replaced with its child. In such cases, to not skip
+                    // over the new node, we need to explicitly re-apply this pushdown logic
+                    // to the new node.
+                    //
+                    // TODO: If TreeNodeRecursion supports a Revisit mechanism in the future,
+                    //       this manual recursion could be removed.
+
+                    // If the operator is removed, it should not leave any filters as remaining
+                    debug_assert!(remaining_description.filters.is_empty());
+                    // Operators having 2 children cannot be removed
+                    debug_assert_eq!(child_descriptions.len(), 1);
+                    debug_assert_eq!(node.children.len(), 1);
+
+                    node.plan = op;
+                    node.data = child_descriptions.swap_remove(0);
+                    node.children = node.children.swap_remove(0).children;
+                    Self::try_pushdown(node, config, accept_updated)
+                } else {
+                    if remaining_description.filters.is_empty() {
+                        // Filter can be pushed down safely
+                        node.plan = op;
+                        if node.children.is_empty() {
+                            *accept_updated = true;
+                        } else {
+                            for (child, descr) in
+                                node.children.iter_mut().zip(child_descriptions)
+                            {
+                                child.data = descr;
+                            }
+                        }
+                    } else {
+                        // Filter cannot be pushed down
+                        node = insert_filter_exec(
+                            node,
+                            child_descriptions,
+                            remaining_description,
+                        )?;
+                    }
+                    Ok(Transformed::yes(node))
+                }
+            }
+            FilterPushdownSupport::NotSupported => {
+                if remaining_description.filters.is_empty() {
+                    Ok(Transformed {
+                        data: node,
+                        transformed: false,
+                        tnr: TreeNodeRecursion::Stop,
+                    })
+                } else {
+                    node = insert_filter_exec(
+                        node,
+                        vec![FilterDescription::empty(); 1],
+                        remaining_description,
+                    )?;
+                    Ok(Transformed {
+                        data: node,
+                        transformed: true,
+                        tnr: TreeNodeRecursion::Stop,
+                    })
+                }
+            }
+        }
+    }
+}
+
+fn insert_filter_exec(
+    node: FilterDescriptionContext,
+    mut child_descriptions: Vec<FilterDescription>,
+    remaining_description: FilterDescription,
+) -> Result<FilterDescriptionContext> {
+    let mut new_child_node = node;
+
+    // Filter has one child
+    if !child_descriptions.is_empty() {
+        debug_assert_eq!(child_descriptions.len(), 1);
+        new_child_node.data = child_descriptions.swap_remove(0);
+    }
+    let new_plan = Arc::new(FilterExec::try_new(
+        conjunction(remaining_description.filters),
+        Arc::clone(&new_child_node.plan),
+    )?);
+    let new_children = vec![new_child_node];
+    let new_data = FilterDescription::empty();
+
+    Ok(FilterDescriptionContext::new(
+        new_plan,
+        new_data,
+        new_children,
+    ))
+}
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 1f38e2ed31263..5210ee26755c9 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -72,6 +72,7 @@ insta = { workspace = true }
 rand = { workspace = true }
 rstest = { workspace = true }
 rstest_reuse = "0.7.0"
+tempfile = "3.19.1"
 tokio = { workspace = true, features = [
     "rt-multi-thread",
     "fs",
@@ -81,3 +82,7 @@ tokio = { workspace = true, features = [
 [[bench]]
 harness = false
 name = "partial_ordering"
+
+[[bench]]
+harness = false
+name = "spill_io"
diff --git a/datafusion/physical-plan/benches/spill_io.rs b/datafusion/physical-plan/benches/spill_io.rs
new file mode 100644
index 0000000000000..3b877671ad583
--- /dev/null
+++ b/datafusion/physical-plan/benches/spill_io.rs
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{
+    Date32Builder, Decimal128Builder, Int32Builder, RecordBatch, StringBuilder,
+};
+use arrow::datatypes::{DataType, Field, Schema};
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use datafusion_execution::runtime_env::RuntimeEnv;
+use datafusion_physical_plan::common::collect;
+use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, SpillMetrics};
+use datafusion_physical_plan::SpillManager;
+use std::sync::Arc;
+use tokio::runtime::Runtime;
+
+pub fn create_batch(num_rows: usize, allow_nulls: bool) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("c0", DataType::Int32, true),
+        Field::new("c1", DataType::Utf8, true),
+        Field::new("c2", DataType::Date32, true),
+        Field::new("c3", DataType::Decimal128(11, 2), true),
+    ]));
+
+    let mut a = Int32Builder::new();
+    let mut b = StringBuilder::new();
+    let mut c = Date32Builder::new();
+    let mut d = Decimal128Builder::new()
+        .with_precision_and_scale(11, 2)
+        .unwrap();
+
+    for i in 0..num_rows {
+        a.append_value(i as i32);
+        c.append_value(i as i32);
+        d.append_value((i * 1000000) as i128);
+        if allow_nulls && i % 10 == 0 {
+            b.append_null();
+        } else {
+            b.append_value(format!("this is string number {i}"));
+        }
+    }
+
+    let a = a.finish();
+    let b = b.finish();
+    let c = c.finish();
+    let d = d.finish();
+
+    RecordBatch::try_new(
+        schema.clone(),
+        vec![Arc::new(a), Arc::new(b), Arc::new(c), Arc::new(d)],
+    )
+    .unwrap()
+}
+
+// BENCHMARK: REVALIDATION OVERHEAD COMPARISON
+// ---------------------------------------------------------
+// To compare performance with/without Arrow IPC validation:
+//
+// 1. Locate the function `read_spill`
+// 2. Modify the `skip_validation` flag:
+//    - Set to `false` to enable validation
+// 3. Rerun `cargo bench --bench spill_io`
+fn bench_spill_io(c: &mut Criterion) {
+    let env = Arc::new(RuntimeEnv::default());
+    let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("c0", DataType::Int32, true),
+        Field::new("c1", DataType::Utf8, true),
+        Field::new("c2", DataType::Date32, true),
+        Field::new("c3", DataType::Decimal128(11, 2), true),
+    ]));
+    let spill_manager = SpillManager::new(env, metrics, schema);
+
+    let mut group = c.benchmark_group("spill_io");
+    let rt = Runtime::new().unwrap();
+
+    group.bench_with_input(
+        BenchmarkId::new("StreamReader/read_100", ""),
+        &spill_manager,
+        |b, spill_manager| {
+            b.iter_batched(
+                // Setup phase: Create fresh state for each benchmark iteration.
+                // - generate an ipc file.
+                // This ensures each iteration starts with clean resources.
+                || {
+                    let batch = create_batch(8192, true);
+                    spill_manager
+                        .spill_record_batch_and_finish(&vec![batch; 100], "Test")
+                        .unwrap()
+                        .unwrap()
+                },
+                // Benchmark phase:
+                // - Execute the read operation via SpillManager
+                // - Wait for the consumer to finish processing
+                |spill_file| {
+                    rt.block_on(async {
+                        let stream =
+                            spill_manager.read_spill_as_stream(spill_file).unwrap();
+                        let _ = collect(stream).await.unwrap();
+                    })
+                },
+                BatchSize::LargeInput,
+            )
+        },
+    );
+    group.finish();
+}
+
+criterion_group!(benches, bench_spill_io);
+criterion_main!(benches);
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
index 005dcc8da3863..e9c3c42e632b5 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
@@ -158,7 +158,7 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
             (true, Some(false)) => {
                 self.nulls.append_n(rows.len(), true);
                 self.group_values
-                    .extend(iter::repeat(T::default_value()).take(rows.len()));
+                    .extend(iter::repeat_n(T::default_value(), rows.len()));
             }
 
             (false, _) => {
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 63751d4703135..75c0e32491abc 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -202,6 +202,7 @@ impl GroupValues for GroupValuesRows {
             EmitTo::All => {
                 let output = self.row_converter.convert_rows(&group_values)?;
                 group_values.clear();
+                self.map.clear();
                 output
             }
             EmitTo::First(n) => {
diff --git a/datafusion/physical-plan/src/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs
index 218855459b1e2..eb98611f79dfb 100644
--- a/datafusion/physical-plan/src/aggregates/order/full.rs
+++ b/datafusion/physical-plan/src/aggregates/order/full.rs
@@ -92,7 +92,7 @@ impl GroupOrderingFull {
                     Some(EmitTo::First(*current))
                 }
             }
-            State::Complete { .. } => Some(EmitTo::All),
+            State::Complete => Some(EmitTo::All),
         }
     }
 
@@ -106,7 +106,7 @@ impl GroupOrderingFull {
                 assert!(*current >= n);
                 *current -= n;
             }
-            State::Complete { .. } => panic!("invalid state: complete"),
+            State::Complete => panic!("invalid state: complete"),
         }
     }
 
@@ -133,7 +133,7 @@ impl GroupOrderingFull {
                     current: max_group_index,
                 }
             }
-            State::Complete { .. } => {
+            State::Complete => {
                 panic!("Saw new group after input was complete");
             }
         };
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index aff69277a4cef..c7a75e5f26404 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -181,7 +181,7 @@ impl GroupOrderingPartial {
                 assert!(*current_sort >= n);
                 *current_sort -= n;
             }
-            State::Complete { .. } => panic!("invalid state: complete"),
+            State::Complete => panic!("invalid state: complete"),
         }
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 077f18d510339..232565a04466f 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -507,6 +507,16 @@ impl GroupedHashAggregateStream {
             AggregateMode::Partial,
         )?;
 
+        // Need to update the GROUP BY expressions to point to the correct column after schema change
+        let merging_group_by_expr = agg_group_by
+            .expr
+            .iter()
+            .enumerate()
+            .map(|(idx, (_, name))| {
+                (Arc::new(Column::new(name.as_str(), idx)) as _, name.clone())
+            })
+            .collect();
+
         let partial_agg_schema = Arc::new(partial_agg_schema);
 
         let spill_expr = group_schema
@@ -550,7 +560,7 @@ impl GroupedHashAggregateStream {
             spill_schema: partial_agg_schema,
             is_stream_merging: false,
             merging_aggregate_arguments,
-            merging_group_by: PhysicalGroupBy::new_single(agg_group_by.expr.clone()),
+            merging_group_by: PhysicalGroupBy::new_single(merging_group_by_expr),
             peak_mem_used: MetricBuilder::new(&agg.metrics)
                 .gauge("peak_mem_used", partition),
             spill_manager,
@@ -965,7 +975,7 @@ impl GroupedHashAggregateStream {
     /// memory. Currently only [`GroupOrdering::None`] is supported for spilling.
     fn spill_previous_if_necessary(&mut self, batch: &RecordBatch) -> Result<()> {
         // TODO: support group_ordering for spilling
-        if self.group_values.len() > 0
+        if !self.group_values.is_empty()
             && batch.num_rows() > 0
             && matches!(self.group_ordering, GroupOrdering::None)
             && !self.spill_state.is_stream_merging
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
index eb4a7d875c95a..0eca27f8e40e0 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -90,7 +90,7 @@ impl BatchCoalescer {
     /// # Arguments
     /// - `schema` - the schema of the output batches
     /// - `target_batch_size` - the minimum number of rows for each
-    ///    output batch (until limit reached)
+    ///   output batch (until limit reached)
     /// - `fetch` - the maximum number of rows to fetch, `None` means fetch all rows
     pub fn new(
         schema: SchemaRef,
@@ -285,7 +285,7 @@ mod tests {
     fn test_coalesce() {
         let batch = uint32_batch(0..8);
         Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
+            .with_batches(std::iter::repeat_n(batch, 10))
             // expected output is batches of at least 20 rows (except for the final batch)
             .with_target_batch_size(21)
             .with_expected_output_sizes(vec![24, 24, 24, 8])
@@ -296,7 +296,7 @@ mod tests {
     fn test_coalesce_with_fetch_larger_than_input_size() {
         let batch = uint32_batch(0..8);
         Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
+            .with_batches(std::iter::repeat_n(batch, 10))
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 100
             // expected to behave the same as `test_concat_batches`
             .with_target_batch_size(21)
@@ -309,7 +309,7 @@ mod tests {
     fn test_coalesce_with_fetch_less_than_input_size() {
         let batch = uint32_batch(0..8);
         Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
+            .with_batches(std::iter::repeat_n(batch, 10))
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
             .with_target_batch_size(21)
             .with_fetch(Some(50))
@@ -321,7 +321,7 @@ mod tests {
     fn test_coalesce_with_fetch_less_than_target_and_no_remaining_rows() {
         let batch = uint32_batch(0..8);
         Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
+            .with_batches(std::iter::repeat_n(batch, 10))
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
             .with_target_batch_size(21)
             .with_fetch(Some(48))
@@ -333,7 +333,7 @@ mod tests {
     fn test_coalesce_with_fetch_less_target_batch_size() {
         let batch = uint32_batch(0..8);
         Test::new()
-            .with_batches(std::iter::repeat(batch).take(10))
+            .with_batches(std::iter::repeat_n(batch, 10))
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 10
             .with_target_batch_size(21)
             .with_fetch(Some(10))
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 5244038b9ae27..faab5fdc5eb6c 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -35,6 +35,10 @@ use datafusion_execution::TaskContext;
 
 use crate::coalesce::{BatchCoalescer, CoalescerState};
 use crate::execution_plan::CardinalityEffect;
+use crate::filter_pushdown::{
+    filter_pushdown_transparent, FilterDescription, FilterPushdownResult,
+};
+use datafusion_common::config::ConfigOptions;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 
@@ -212,6 +216,17 @@ impl ExecutionPlan for CoalesceBatchesExec {
     fn cardinality_effect(&self) -> CardinalityEffect {
         CardinalityEffect::Equal
     }
+
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn ExecutionPlan>>> {
+        Ok(filter_pushdown_transparent::<Arc<dyn ExecutionPlan>>(
+            Arc::new(self.clone()),
+            fd,
+        ))
+    }
 }
 
 /// Stream for [`CoalesceBatchesExec`]. See [`CoalesceBatchesExec`] for more details.
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index f437295a35551..e247f5ad9d194 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -657,7 +657,7 @@ impl TreeRenderVisitor<'_, '_> {
             }
         }
 
-        let halfway_point = (extra_height + 1) / 2;
+        let halfway_point = extra_height.div_ceil(2);
 
         // Render the actual node.
         for render_y in 0..=extra_height {
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index 2bc5706ee0e18..2b6eac7be0675 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -16,6 +16,9 @@
 // under the License.
 
 pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay};
+use crate::filter_pushdown::{
+    filter_pushdown_not_supported, FilterDescription, FilterPushdownResult,
+};
 pub use crate::metrics::Metric;
 pub use crate::ordering::InputOrderMode;
 pub use crate::stream::EmptyRecordBatchStream;
@@ -467,6 +470,41 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         Ok(None)
     }
+
+    /// Attempts to recursively push given filters from the top of the tree into leafs.
+    ///
+    /// This is used for various optimizations, such as:
+    ///
+    /// * Pushing down filters into scans in general to minimize the amount of data that needs to be materialzied.
+    /// * Pushing down dynamic filters from operators like TopK and Joins into scans.
+    ///
+    /// Generally the further down (closer to leaf nodes) that filters can be pushed, the better.
+    ///
+    /// Consider the case of a query such as `SELECT * FROM t WHERE a = 1 AND b = 2`.
+    /// With no filter pushdown the scan needs to read and materialize all the data from `t` and then filter based on `a` and `b`.
+    /// With filter pushdown into the scan it can first read only `a`, then `b` and keep track of
+    /// which rows match the filter.
+    /// Then only for rows that match the filter does it have to materialize the rest of the columns.
+    ///
+    /// # Default Implementation
+    ///
+    /// The default implementation assumes:
+    /// * Parent filters can't be passed onto children.
+    /// * This node has no filters to contribute.
+    ///
+    /// # Implementation Notes
+    ///
+    /// Most of the actual logic is implemented as a Physical Optimizer rule.
+    /// See [`PushdownFilter`] for more details.
+    ///
+    /// [`PushdownFilter`]: https://docs.rs/datafusion/latest/datafusion/physical_optimizer/filter_pushdown/struct.PushdownFilter.html
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn ExecutionPlan>>> {
+        Ok(filter_pushdown_not_supported(fd))
+    }
 }
 
 /// [`ExecutionPlan`] Invariant Level
@@ -519,13 +557,15 @@ pub trait ExecutionPlanProperties {
     /// If this ExecutionPlan makes no changes to the schema of the rows flowing
     /// through it or how columns within each row relate to each other, it
     /// should return the equivalence properties of its input. For
-    /// example, since `FilterExec` may remove rows from its input, but does not
+    /// example, since [`FilterExec`] may remove rows from its input, but does not
     /// otherwise modify them, it preserves its input equivalence properties.
     /// However, since `ProjectionExec` may calculate derived expressions, it
     /// needs special handling.
     ///
     /// See also [`ExecutionPlan::maintains_input_order`] and [`Self::output_ordering`]
     /// for related concepts.
+    ///
+    /// [`FilterExec`]: crate::filter::FilterExec
     fn equivalence_properties(&self) -> &EquivalenceProperties;
 }
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index a8a9973ea0434..95fa67025e90d 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -26,6 +26,9 @@ use super::{
 };
 use crate::common::can_project;
 use crate::execution_plan::CardinalityEffect;
+use crate::filter_pushdown::{
+    FilterDescription, FilterPushdownResult, FilterPushdownSupport,
+};
 use crate::projection::{
     make_with_child, try_embed_projection, update_expr, EmbeddedProjection,
     ProjectionExec,
@@ -39,6 +42,7 @@ use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::cast::as_boolean_array;
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
     internal_err, plan_err, project_schema, DataFusionError, Result, ScalarValue,
@@ -46,7 +50,7 @@ use datafusion_common::{
 use datafusion_execution::TaskContext;
 use datafusion_expr::Operator;
 use datafusion_physical_expr::equivalence::ProjectionMapping;
-use datafusion_physical_expr::expressions::BinaryExpr;
+use datafusion_physical_expr::expressions::{BinaryExpr, Column};
 use datafusion_physical_expr::intervals::utils::check_support;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr::{
@@ -433,6 +437,56 @@ impl ExecutionPlan for FilterExec {
         }
         try_embed_projection(projection, self)
     }
+
+    fn try_pushdown_filters(
+        &self,
+        mut fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn ExecutionPlan>>> {
+        // Extend the filter descriptions
+        fd.filters.push(Arc::clone(&self.predicate));
+
+        // Extract the information
+        let child_descriptions = vec![fd];
+        let remaining_description = FilterDescription { filters: vec![] };
+        let filter_input = Arc::clone(self.input());
+
+        if let Some(projection_indices) = self.projection.as_ref() {
+            // Push the filters down, but leave a ProjectionExec behind, instead of the FilterExec
+            let filter_child_schema = filter_input.schema();
+            let proj_exprs = projection_indices
+                .iter()
+                .map(|p| {
+                    let field = filter_child_schema.field(*p).clone();
+                    (
+                        Arc::new(Column::new(field.name(), *p)) as Arc<dyn PhysicalExpr>,
+                        field.name().to_string(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            let projection_exec =
+                Arc::new(ProjectionExec::try_new(proj_exprs, filter_input)?) as _;
+
+            Ok(FilterPushdownResult {
+                support: FilterPushdownSupport::Supported {
+                    child_descriptions,
+                    op: projection_exec,
+                    revisit: false,
+                },
+                remaining_description,
+            })
+        } else {
+            // Pull out the FilterExec, and inform the rule as it should be re-run
+            Ok(FilterPushdownResult {
+                support: FilterPushdownSupport::Supported {
+                    child_descriptions,
+                    op: filter_input,
+                    revisit: true,
+                },
+                remaining_description,
+            })
+        }
+    }
 }
 
 impl EmbeddedProjection for FilterExec {
diff --git a/datafusion/physical-plan/src/filter_pushdown.rs b/datafusion/physical-plan/src/filter_pushdown.rs
new file mode 100644
index 0000000000000..38f5aef5923e1
--- /dev/null
+++ b/datafusion/physical-plan/src/filter_pushdown.rs
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use crate::ExecutionPlan;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+#[derive(Clone, Debug)]
+pub struct FilterDescription {
+    /// Expressions coming from the parent nodes
+    pub filters: Vec<Arc<dyn PhysicalExpr>>,
+}
+
+impl Default for FilterDescription {
+    fn default() -> Self {
+        Self::empty()
+    }
+}
+
+impl FilterDescription {
+    /// Takes the filters out of the struct, leaving an empty vector in its place.
+    pub fn take_description(&mut self) -> Vec<Arc<dyn PhysicalExpr>> {
+        std::mem::take(&mut self.filters)
+    }
+
+    pub fn empty() -> FilterDescription {
+        Self { filters: vec![] }
+    }
+}
+
+#[derive(Debug)]
+pub enum FilterPushdownSupport<T> {
+    Supported {
+        // Filter predicates which can be pushed down through the operator.
+        // NOTE that these are not placed into any operator.
+        child_descriptions: Vec<FilterDescription>,
+        // Possibly updated new operator
+        op: T,
+        // Whether the node is removed from the plan and the rule should be re-run manually
+        // on the new node.
+        // TODO: If TreeNodeRecursion supports Revisit mechanism, this flag can be removed
+        revisit: bool,
+    },
+    NotSupported,
+}
+
+#[derive(Debug)]
+pub struct FilterPushdownResult<T> {
+    pub support: FilterPushdownSupport<T>,
+    // Filters which cannot be pushed down through the operator.
+    // NOTE that caller of try_pushdown_filters() should handle these remanining predicates,
+    // possibly introducing a FilterExec on top of this operator.
+    pub remaining_description: FilterDescription,
+}
+
+pub fn filter_pushdown_not_supported<T>(
+    remaining_description: FilterDescription,
+) -> FilterPushdownResult<T> {
+    FilterPushdownResult {
+        support: FilterPushdownSupport::NotSupported,
+        remaining_description,
+    }
+}
+
+pub fn filter_pushdown_transparent<T>(
+    plan: Arc<dyn ExecutionPlan>,
+    fd: FilterDescription,
+) -> FilterPushdownResult<Arc<dyn ExecutionPlan>> {
+    let child_descriptions = vec![fd];
+    let remaining_description = FilterDescription::empty();
+
+    FilterPushdownResult {
+        support: FilterPushdownSupport::Supported {
+            child_descriptions,
+            op: plan,
+            revisit: false,
+        },
+        remaining_description,
+    }
+}
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 639fae7615af0..8dd1addff15ce 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -25,7 +25,6 @@ use super::utils::{
     BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer, OnceAsync, OnceFut,
     StatefulStreamResult,
 };
-use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::execution_plan::{boundedness_from_children, EmissionType};
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::projection::{
@@ -189,19 +188,11 @@ impl CrossJoinExec {
 
 /// Asynchronously collect the result of the left child
 async fn load_left_input(
-    left: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
+    stream: SendableRecordBatchStream,
     metrics: BuildProbeJoinMetrics,
     reservation: MemoryReservation,
 ) -> Result<JoinLeftData> {
-    // merge all left parts into a single stream
-    let left_schema = left.schema();
-    let merge = if left.output_partitioning().partition_count() != 1 {
-        Arc::new(CoalescePartitionsExec::new(left))
-    } else {
-        left
-    };
-    let stream = merge.execute(0, context)?;
+    let left_schema = stream.schema();
 
     // Load all batches and count the rows
     let (batches, _metrics, reservation) = stream
@@ -291,6 +282,13 @@ impl ExecutionPlan for CrossJoinExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
+        if self.left.output_partitioning().partition_count() != 1 {
+            return internal_err!(
+                "Invalid CrossJoinExec, the output partition count of the left child must be 1,\
+                 consider using CoalescePartitionsExec or the EnforceDistribution rule"
+            );
+        }
+
         let stream = self.right.execute(partition, Arc::clone(&context))?;
 
         let join_metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
@@ -303,14 +301,15 @@ impl ExecutionPlan for CrossJoinExec {
         let enforce_batch_size_in_joins =
             context.session_config().enforce_batch_size_in_joins();
 
-        let left_fut = self.left_fut.once(|| {
-            load_left_input(
-                Arc::clone(&self.left),
-                context,
+        let left_fut = self.left_fut.try_once(|| {
+            let left_stream = self.left.execute(0, context)?;
+
+            Ok(load_left_input(
+                left_stream,
                 join_metrics.clone(),
                 reservation,
-            )
-        });
+            ))
+        })?;
 
         if enforce_batch_size_in_joins {
             Ok(Box::pin(CrossJoinStream {
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index c2a313edd1564..e8904db0f3eaf 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -32,6 +32,7 @@ use super::{
     utils::{OnceAsync, OnceFut},
     PartitionMode, SharedBitmapBuilder,
 };
+use super::{JoinOn, JoinOnRef};
 use crate::execution_plan::{boundedness_from_children, EmissionType};
 use crate::projection::{
     try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData,
@@ -40,7 +41,6 @@ use crate::projection::{
 use crate::spill::get_record_batch_memory_size;
 use crate::ExecutionPlanProperties;
 use crate::{
-    coalesce_partitions::CoalescePartitionsExec,
     common::can_project,
     handle_state,
     hash_utils::create_hashes,
@@ -50,8 +50,7 @@ use crate::{
         build_batch_from_indices, build_join_schema, check_join_is_valid,
         estimate_join_statistics, need_produce_result_in_final,
         symmetric_join_output_partitioning, BuildProbeJoinMetrics, ColumnIndex,
-        JoinFilter, JoinHashMap, JoinHashMapType, JoinOn, JoinOnRef,
-        StatefulStreamResult,
+        JoinFilter, JoinHashMap, JoinHashMapType, StatefulStreamResult,
     },
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
@@ -792,34 +791,42 @@ impl ExecutionPlan for HashJoinExec {
             );
         }
 
+        if self.mode == PartitionMode::CollectLeft && left_partitions != 1 {
+            return internal_err!(
+                "Invalid HashJoinExec, the output partition count of the left child must be 1 in CollectLeft mode,\
+                 consider using CoalescePartitionsExec or the EnforceDistribution rule"
+            );
+        }
+
         let join_metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
         let left_fut = match self.mode {
-            PartitionMode::CollectLeft => self.left_fut.once(|| {
+            PartitionMode::CollectLeft => self.left_fut.try_once(|| {
+                let left_stream = self.left.execute(0, Arc::clone(&context))?;
+
                 let reservation =
                     MemoryConsumer::new("HashJoinInput").register(context.memory_pool());
-                collect_left_input(
-                    None,
+
+                Ok(collect_left_input(
                     self.random_state.clone(),
-                    Arc::clone(&self.left),
+                    left_stream,
                     on_left.clone(),
-                    Arc::clone(&context),
                     join_metrics.clone(),
                     reservation,
                     need_produce_result_in_final(self.join_type),
                     self.right().output_partitioning().partition_count(),
-                )
-            }),
+                ))
+            })?,
             PartitionMode::Partitioned => {
+                let left_stream = self.left.execute(partition, Arc::clone(&context))?;
+
                 let reservation =
                     MemoryConsumer::new(format!("HashJoinInput[{partition}]"))
                         .register(context.memory_pool());
 
                 OnceFut::new(collect_left_input(
-                    Some(partition),
                     self.random_state.clone(),
-                    Arc::clone(&self.left),
+                    left_stream,
                     on_left.clone(),
-                    Arc::clone(&context),
                     join_metrics.clone(),
                     reservation,
                     need_produce_result_in_final(self.join_type),
@@ -930,36 +937,22 @@ impl ExecutionPlan for HashJoinExec {
 
 /// Reads the left (build) side of the input, buffering it in memory, to build a
 /// hash table (`LeftJoinData`)
-#[allow(clippy::too_many_arguments)]
 async fn collect_left_input(
-    partition: Option<usize>,
     random_state: RandomState,
-    left: Arc<dyn ExecutionPlan>,
+    left_stream: SendableRecordBatchStream,
     on_left: Vec<PhysicalExprRef>,
-    context: Arc<TaskContext>,
     metrics: BuildProbeJoinMetrics,
     reservation: MemoryReservation,
     with_visited_indices_bitmap: bool,
     probe_threads_count: usize,
 ) -> Result<JoinLeftData> {
-    let schema = left.schema();
-
-    let (left_input, left_input_partition) = if let Some(partition) = partition {
-        (left, partition)
-    } else if left.output_partitioning().partition_count() != 1 {
-        (Arc::new(CoalescePartitionsExec::new(left)) as _, 0)
-    } else {
-        (left, 0)
-    };
-
-    // Depending on partition argument load single partition or whole left side in memory
-    let stream = left_input.execute(left_input_partition, Arc::clone(&context))?;
+    let schema = left_stream.schema();
 
     // This operation performs 2 steps at once:
     // 1. creates a [JoinHashMap] of all batches from the stream
     // 2. stores the batches in a vector.
     let initial = (Vec::new(), 0, metrics, reservation);
-    let (batches, num_rows, metrics, mut reservation) = stream
+    let (batches, num_rows, metrics, mut reservation) = left_stream
         .try_fold(initial, |mut acc, batch| async {
             let batch_size = get_record_batch_memory_size(&batch);
             // Reserve memory for incoming batch
@@ -1655,6 +1648,7 @@ impl EmbeddedProjection for HashJoinExec {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::test::TestMemoryExec;
     use crate::{
         common, expressions::Column, repartition::RepartitionExec, test::build_table_i32,
@@ -2105,6 +2099,7 @@ mod tests {
         let left =
             TestMemoryExec::try_new_exec(&[vec![batch1], vec![batch2]], schema, None)
                 .unwrap();
+        let left = Arc::new(CoalescePartitionsExec::new(left));
 
         let right = build_table(
             ("a1", &vec![1, 2, 3]),
@@ -2177,6 +2172,7 @@ mod tests {
         let left =
             TestMemoryExec::try_new_exec(&[vec![batch1], vec![batch2]], schema, None)
                 .unwrap();
+        let left = Arc::new(CoalescePartitionsExec::new(left));
         let right = build_table(
             ("a2", &vec![20, 30, 10]),
             ("b2", &vec![5, 6, 4]),
diff --git a/datafusion/physical-plan/src/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs
index 22a8c0bc798c8..1d36db996434e 100644
--- a/datafusion/physical-plan/src/joins/mod.rs
+++ b/datafusion/physical-plan/src/joins/mod.rs
@@ -19,6 +19,7 @@
 
 use arrow::array::BooleanBufferBuilder;
 pub use cross_join::CrossJoinExec;
+use datafusion_physical_expr::PhysicalExprRef;
 pub use hash_join::HashJoinExec;
 pub use nested_loop_join::NestedLoopJoinExec;
 use parking_lot::Mutex;
@@ -39,6 +40,11 @@ mod join_hash_map;
 #[cfg(test)]
 pub mod test_utils;
 
+/// The on clause of the join, as vector of (left, right) columns.
+pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>;
+/// Reference for JoinOn.
+pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)];
+
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 /// Hash join Partitioning mode
 pub enum PartitionMode {
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index cdd2eaeca8997..b902795950966 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -28,7 +28,6 @@ use super::utils::{
     need_produce_result_in_final, reorder_output_after_swap, swap_join_projection,
     BatchSplitter, BatchTransformer, NoopBatchTransformer, StatefulStreamResult,
 };
-use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::common::can_project;
 use crate::execution_plan::{boundedness_from_children, EmissionType};
 use crate::joins::utils::{
@@ -483,6 +482,13 @@ impl ExecutionPlan for NestedLoopJoinExec {
         partition: usize,
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
+        if self.left.output_partitioning().partition_count() != 1 {
+            return internal_err!(
+                "Invalid NestedLoopJoinExec, the output partition count of the left child must be 1,\
+                 consider using CoalescePartitionsExec or the EnforceDistribution rule"
+            );
+        }
+
         let join_metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
 
         // Initialization reservation for load of inner table
@@ -490,16 +496,17 @@ impl ExecutionPlan for NestedLoopJoinExec {
             MemoryConsumer::new(format!("NestedLoopJoinLoad[{partition}]"))
                 .register(context.memory_pool());
 
-        let inner_table = self.inner_table.once(|| {
-            collect_left_input(
-                Arc::clone(&self.left),
-                Arc::clone(&context),
+        let inner_table = self.inner_table.try_once(|| {
+            let stream = self.left.execute(0, Arc::clone(&context))?;
+
+            Ok(collect_left_input(
+                stream,
                 join_metrics.clone(),
                 load_reservation,
                 need_produce_result_in_final(self.join_type),
                 self.right().output_partitioning().partition_count(),
-            )
-        });
+            ))
+        })?;
 
         let batch_size = context.session_config().batch_size();
         let enforce_batch_size_in_joins =
@@ -610,20 +617,13 @@ impl ExecutionPlan for NestedLoopJoinExec {
 
 /// Asynchronously collect input into a single batch, and creates `JoinLeftData` from it
 async fn collect_left_input(
-    input: Arc<dyn ExecutionPlan>,
-    context: Arc<TaskContext>,
+    stream: SendableRecordBatchStream,
     join_metrics: BuildProbeJoinMetrics,
     reservation: MemoryReservation,
     with_visited_left_side: bool,
     probe_threads_count: usize,
 ) -> Result<JoinLeftData> {
-    let schema = input.schema();
-    let merge = if input.output_partitioning().partition_count() != 1 {
-        Arc::new(CoalescePartitionsExec::new(input))
-    } else {
-        input
-    };
-    let stream = merge.execute(0, context)?;
+    let schema = stream.schema();
 
     // Load all batches and count the rows
     let (batches, metrics, mut reservation) = stream
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 716cff939f663..89f2e3c911f89 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -823,42 +823,65 @@ impl BufferedBatch {
 /// Sort-Merge join stream that consumes streamed and buffered data streams
 /// and produces joined output stream.
 struct SortMergeJoinStream {
-    /// Current state of the stream
-    pub state: SortMergeJoinState,
+    // ========================================================================
+    // PROPERTIES:
+    // These fields are initialized at the start and remain constant throughout
+    // the execution.
+    // ========================================================================
     /// Output schema
     pub schema: SchemaRef,
-    /// Sort options of join columns used to sort streamed and buffered data stream
-    pub sort_options: Vec<SortOptions>,
     /// null == null?
     pub null_equals_null: bool,
+    /// Sort options of join columns used to sort streamed and buffered data stream
+    pub sort_options: Vec<SortOptions>,
+    /// optional join filter
+    pub filter: Option<JoinFilter>,
+    /// How the join is performed
+    pub join_type: JoinType,
+    /// Target output batch size
+    pub batch_size: usize,
+
+    // ========================================================================
+    // STREAMED FIELDS:
+    // These fields manage the properties and state of the streamed input.
+    // ========================================================================
     /// Input schema of streamed
     pub streamed_schema: SchemaRef,
-    /// Input schema of buffered
-    pub buffered_schema: SchemaRef,
     /// Streamed data stream
     pub streamed: SendableRecordBatchStream,
-    /// Buffered data stream
-    pub buffered: SendableRecordBatchStream,
     /// Current processing record batch of streamed
     pub streamed_batch: StreamedBatch,
-    /// Current buffered data
-    pub buffered_data: BufferedData,
     /// (used in outer join) Is current streamed row joined at least once?
     pub streamed_joined: bool,
-    /// (used in outer join) Is current buffered batches joined at least once?
-    pub buffered_joined: bool,
     /// State of streamed
     pub streamed_state: StreamedState,
-    /// State of buffered
-    pub buffered_state: BufferedState,
-    /// The comparison result of current streamed row and buffered batches
-    pub current_ordering: Ordering,
     /// Join key columns of streamed
     pub on_streamed: Vec<PhysicalExprRef>,
+
+    // ========================================================================
+    // BUFFERED FIELDS:
+    // These fields manage the properties and state of the buffered input.
+    // ========================================================================
+    /// Input schema of buffered
+    pub buffered_schema: SchemaRef,
+    /// Buffered data stream
+    pub buffered: SendableRecordBatchStream,
+    /// Current buffered data
+    pub buffered_data: BufferedData,
+    /// (used in outer join) Is current buffered batches joined at least once?
+    pub buffered_joined: bool,
+    /// State of buffered
+    pub buffered_state: BufferedState,
     /// Join key columns of buffered
     pub on_buffered: Vec<PhysicalExprRef>,
-    /// optional join filter
-    pub filter: Option<JoinFilter>,
+
+    // ========================================================================
+    // MERGE JOIN STATES:
+    // These fields track the execution state of merge join and are updated
+    // during the execution.
+    // ========================================================================
+    /// Current state of the stream
+    pub state: SortMergeJoinState,
     /// Staging output array builders
     pub staging_output_record_batches: JoinedRecordBatches,
     /// Output buffer. Currently used by filtering as it requires double buffering
@@ -868,18 +891,21 @@ struct SortMergeJoinStream {
     /// Increased when we put rows into buffer and decreased after we actually output batches.
     /// Used to trigger output when sufficient rows are ready
     pub output_size: usize,
-    /// Target output batch size
-    pub batch_size: usize,
-    /// How the join is performed
-    pub join_type: JoinType,
+    /// The comparison result of current streamed row and buffered batches
+    pub current_ordering: Ordering,
+    /// Manages the process of spilling and reading back intermediate data
+    pub spill_manager: SpillManager,
+
+    // ========================================================================
+    // EXECUTION RESOURCES:
+    // Fields related to managing execution resources and monitoring performance.
+    // ========================================================================
     /// Metrics
     pub join_metrics: SortMergeJoinMetrics,
     /// Memory reservation
     pub reservation: MemoryReservation,
     /// Runtime env
     pub runtime_env: Arc<RuntimeEnv>,
-    /// Manages the process of spilling and reading back intermediate data
-    pub spill_manager: SpillManager,
     /// A unique number for each batch
     pub streamed_batch_counter: AtomicUsize,
 }
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index e70007aa651f7..d38637dae0282 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -444,8 +444,7 @@ pub fn build_sides_record_batches(
             .collect::<Vec<i32>>(),
     ));
     let ordered_asc_null_first = Arc::new(Int32Array::from_iter({
-        std::iter::repeat(None)
-            .take(index as usize)
+        std::iter::repeat_n(None, index as usize)
             .chain(rest_of.clone().map(Some))
             .collect::<Vec<Option<i32>>>()
     }));
@@ -453,13 +452,12 @@ pub fn build_sides_record_batches(
         rest_of
             .clone()
             .map(Some)
-            .chain(std::iter::repeat(None).take(index as usize))
+            .chain(std::iter::repeat_n(None, index as usize))
             .collect::<Vec<Option<i32>>>()
     }));
 
     let ordered_desc_null_first = Arc::new(Int32Array::from_iter({
-        std::iter::repeat(None)
-            .take(index as usize)
+        std::iter::repeat_n(None, index as usize)
             .chain(rest_of.rev().map(Some))
             .collect::<Vec<Option<i32>>>()
     }));
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index f6c720dbb707a..5516f172d5101 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -32,6 +32,7 @@ use crate::{
 // compatibility
 pub use super::join_filter::JoinFilter;
 pub use super::join_hash_map::{JoinHashMap, JoinHashMapType};
+pub use crate::joins::{JoinOn, JoinOnRef};
 
 use arrow::array::{
     builder::UInt64Builder, downcast_array, new_null_array, Array, ArrowPrimitiveType,
@@ -62,11 +63,6 @@ use futures::future::{BoxFuture, Shared};
 use futures::{ready, FutureExt};
 use parking_lot::Mutex;
 
-/// The on clause of the join, as vector of (left, right) columns.
-pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>;
-/// Reference for JoinOn.
-pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)];
-
 /// Checks whether the schemas "left" and "right" and columns "on" represent a valid join.
 /// They are valid whenever their columns' intersection equals the set `on`
 pub fn check_join_is_valid(left: &Schema, right: &Schema, on: JoinOnRef) -> Result<()> {
@@ -328,7 +324,7 @@ pub fn build_join_schema(
 }
 
 /// A [`OnceAsync`] runs an `async` closure once, where multiple calls to
-/// [`OnceAsync::once`] return a [`OnceFut`] that resolves to the result of the
+/// [`OnceAsync::try_once`] return a [`OnceFut`] that resolves to the result of the
 /// same computation.
 ///
 /// This is useful for joins where the results of one child are needed to proceed
@@ -341,7 +337,7 @@ pub fn build_join_schema(
 ///
 /// Each output partition waits on the same `OnceAsync` before proceeding.
 pub(crate) struct OnceAsync<T> {
-    fut: Mutex<Option<OnceFut<T>>>,
+    fut: Mutex<Option<SharedResult<OnceFut<T>>>>,
 }
 
 impl<T> Default for OnceAsync<T> {
@@ -360,19 +356,22 @@ impl<T> Debug for OnceAsync<T> {
 
 impl<T: 'static> OnceAsync<T> {
     /// If this is the first call to this function on this object, will invoke
-    /// `f` to obtain a future and return a [`OnceFut`] referring to this
+    /// `f` to obtain a future and return a [`OnceFut`] referring to this. `f`
+    /// may fail, in which case its error is returned.
     ///
     /// If this is not the first call, will return a [`OnceFut`] referring
-    /// to the same future as was returned by the first call
-    pub(crate) fn once<F, Fut>(&self, f: F) -> OnceFut<T>
+    /// to the same future as was returned by the first call - or the same
+    /// error if the initial call to `f` failed.
+    pub(crate) fn try_once<F, Fut>(&self, f: F) -> Result<OnceFut<T>>
     where
-        F: FnOnce() -> Fut,
+        F: FnOnce() -> Result<Fut>,
         Fut: Future<Output = Result<T>> + Send + 'static,
     {
         self.fut
             .lock()
-            .get_or_insert_with(|| OnceFut::new(f()))
+            .get_or_insert_with(|| f().map(OnceFut::new).map_err(Arc::new))
             .clone()
+            .map_err(DataFusionError::Shared)
     }
 }
 
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 04fbd06fabcde..a1862554b303e 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -50,6 +50,7 @@ pub use crate::ordering::InputOrderMode;
 pub use crate::stream::EmptyRecordBatchStream;
 pub use crate::topk::TopK;
 pub use crate::visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
+pub use spill::spill_manager::SpillManager;
 
 mod ordering;
 mod render_tree;
@@ -66,6 +67,7 @@ pub mod empty;
 pub mod execution_plan;
 pub mod explain;
 pub mod filter;
+pub mod filter_pushdown;
 pub mod joins;
 pub mod limit;
 pub mod memory;
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 1d3e23ea90974..72934c74446eb 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -33,7 +33,7 @@ use super::{
     SendableRecordBatchStream, Statistics,
 };
 use crate::execution_plan::CardinalityEffect;
-use crate::joins::utils::{ColumnIndex, JoinFilter};
+use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn, JoinOnRef};
 use crate::{ColumnStatistics, DisplayFormatType, ExecutionPlan, PhysicalExpr};
 
 use arrow::datatypes::{Field, Schema, SchemaRef};
@@ -446,11 +446,6 @@ pub fn try_embed_projection<Exec: EmbeddedProjection + 'static>(
     }
 }
 
-/// The on clause of the join, as vector of (left, right) columns.
-pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>;
-/// Reference for JoinOn.
-pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)];
-
 pub struct JoinData {
     pub projected_left_child: ProjectionExec,
     pub projected_right_child: ProjectionExec,
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index ebc751201378b..c480fc2abaa1a 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -43,6 +43,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Stat
 use arrow::array::{PrimitiveArray, RecordBatch, RecordBatchOptions};
 use arrow::compute::take_arrays;
 use arrow::datatypes::{SchemaRef, UInt32Type};
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::utils::transpose;
 use datafusion_common::HashMap;
 use datafusion_common::{not_impl_err, DataFusionError, Result};
@@ -52,6 +53,9 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
+use crate::filter_pushdown::{
+    filter_pushdown_transparent, FilterDescription, FilterPushdownResult,
+};
 use futures::stream::Stream;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use log::trace;
@@ -508,11 +512,18 @@ impl DisplayAs for RepartitionExec {
             }
             DisplayFormatType::TreeRender => {
                 writeln!(f, "partitioning_scheme={}", self.partitioning(),)?;
+
+                let input_partition_count =
+                    self.input.output_partitioning().partition_count();
+                let output_partition_count = self.partitioning().partition_count();
+                let input_to_output_partition_str =
+                    format!("{} -> {}", input_partition_count, output_partition_count);
                 writeln!(
                     f,
-                    "output_partition_count={}",
-                    self.input.output_partitioning().partition_count()
+                    "partition_count(in->out)={}",
+                    input_to_output_partition_str
                 )?;
+
                 if self.preserve_order {
                     writeln!(f, "preserve_order={}", self.preserve_order)?;
                 }
@@ -723,6 +734,17 @@ impl ExecutionPlan for RepartitionExec {
             new_partitioning,
         )?)))
     }
+
+    fn try_pushdown_filters(
+        &self,
+        fd: FilterDescription,
+        _config: &ConfigOptions,
+    ) -> Result<FilterPushdownResult<Arc<dyn ExecutionPlan>>> {
+        Ok(filter_pushdown_transparent::<Arc<dyn ExecutionPlan>>(
+            Arc::new(self.clone()),
+            fd,
+        ))
+    }
 }
 
 impl RepartitionExec {
diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
index 3d3bd81948e03..efb9c0a47bf58 100644
--- a/datafusion/physical-plan/src/sorts/cursor.rs
+++ b/datafusion/physical-plan/src/sorts/cursor.rs
@@ -284,7 +284,7 @@ impl<T: ByteArrayType> CursorArray for GenericByteArray<T> {
 impl CursorArray for StringViewArray {
     type Values = StringViewArray;
     fn values(&self) -> Self {
-        self.clone()
+        self.gc()
     }
 }
 
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index 1c2b8cd0c91b7..2b42457635f7b 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -217,9 +217,8 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
         // we skip the following block. Until then, this function may be called multiple
         // times and can return Poll::Pending if any partition returns Poll::Pending.
         if self.loser_tree.is_empty() {
-            let remaining_partitions = self.uninitiated_partitions.clone();
-            for i in remaining_partitions {
-                match self.maybe_poll_stream(cx, i) {
+            while let Some(&partition_idx) = self.uninitiated_partitions.front() {
+                match self.maybe_poll_stream(cx, partition_idx) {
                     Poll::Ready(Err(e)) => {
                         self.aborted = true;
                         return Poll::Ready(Some(Err(e)));
@@ -228,10 +227,8 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
                         // If a partition returns Poll::Pending, to avoid continuously polling it
                         // and potentially increasing upstream buffer sizes, we move it to the
                         // back of the polling queue.
-                        if let Some(front) = self.uninitiated_partitions.pop_front() {
-                            // This pop_front can never return `None`.
-                            self.uninitiated_partitions.push_back(front);
-                        }
+                        self.uninitiated_partitions.rotate_left(1);
+
                         // This function could remain in a pending state, so we manually wake it here.
                         // However, this approach can be investigated further to find a more natural way
                         // to avoid disrupting the runtime scheduler.
@@ -241,10 +238,13 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
                     _ => {
                         // If the polling result is Poll::Ready(Some(batch)) or Poll::Ready(None),
                         // we remove this partition from the queue so it is not polled again.
-                        self.uninitiated_partitions.retain(|idx| *idx != i);
+                        self.uninitiated_partitions.pop_front();
                     }
                 }
             }
+
+            // Claim the memory for the uninitiated partitions
+            self.uninitiated_partitions.shrink_to_fit();
             self.init_loser_tree();
         }
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index ed35492041be0..9d0f34cc7f0fd 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -49,8 +49,10 @@ use arrow::array::{
 };
 use arrow::compute::{concat_batches, lexsort_to_indices, take_arrays, SortColumn};
 use arrow::datatypes::{DataType, SchemaRef};
-use arrow::row::{RowConverter, SortField};
-use datafusion_common::{internal_datafusion_err, internal_err, Result};
+use arrow::row::{RowConverter, Rows, SortField};
+use datafusion_common::{
+    exec_datafusion_err, internal_datafusion_err, internal_err, DataFusionError, Result,
+};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::runtime_env::RuntimeEnv;
@@ -87,8 +89,9 @@ impl ExternalSorterMetrics {
 /// 1. get a non-empty new batch from input
 ///
 /// 2. check with the memory manager there is sufficient space to
-///    buffer the batch in memory 2.1 if memory sufficient, buffer
-///    batch in memory, go to 1.
+///    buffer the batch in memory.
+///
+/// 2.1 if memory is sufficient, buffer batch in memory, go to 1.
 ///
 /// 2.2 if no more memory is available, sort all buffered batches and
 ///     spill to file.  buffer the next batch in memory, go to 1.
@@ -203,8 +206,8 @@ struct ExternalSorter {
     schema: SchemaRef,
     /// Sort expressions
     expr: Arc<[PhysicalSortExpr]>,
-    /// If Some, the maximum number of output rows that will be produced
-    fetch: Option<usize>,
+    /// RowConverter corresponding to the sort expressions
+    sort_keys_row_converter: Arc<RowConverter>,
     /// The target number of rows for output batches
     batch_size: usize,
     /// If the in size of buffered memory batches is below this size,
@@ -216,10 +219,8 @@ struct ExternalSorter {
     // STATE BUFFERS:
     // Fields that hold intermediate data during sorting
     // ========================================================================
-    /// Potentially unsorted in memory buffer
+    /// Unsorted input batches stored in the memory buffer
     in_mem_batches: Vec<RecordBatch>,
-    /// if `Self::in_mem_batches` are sorted
-    in_mem_batches_sorted: bool,
 
     /// During external sorting, in-memory intermediate data will be appended to
     /// this file incrementally. Once finished, this file will be moved to [`Self::finished_spill_files`].
@@ -260,12 +261,11 @@ impl ExternalSorter {
         schema: SchemaRef,
         expr: LexOrdering,
         batch_size: usize,
-        fetch: Option<usize>,
         sort_spill_reservation_bytes: usize,
         sort_in_place_threshold_bytes: usize,
         metrics: &ExecutionPlanMetricsSet,
         runtime: Arc<RuntimeEnv>,
-    ) -> Self {
+    ) -> Result<Self> {
         let metrics = ExternalSorterMetrics::new(metrics, partition_id);
         let reservation = MemoryConsumer::new(format!("ExternalSorter[{partition_id}]"))
             .with_can_spill(true)
@@ -275,21 +275,36 @@ impl ExternalSorter {
             MemoryConsumer::new(format!("ExternalSorterMerge[{partition_id}]"))
                 .register(&runtime.memory_pool);
 
+        // Construct RowConverter for sort keys
+        let sort_fields = expr
+            .iter()
+            .map(|e| {
+                let data_type = e
+                    .expr
+                    .data_type(&schema)
+                    .map_err(|e| e.context("Resolving sort expression data type"))?;
+                Ok(SortField::new_with_options(data_type, e.options))
+            })
+            .collect::<Result<Vec<_>>>()?;
+
+        let converter = RowConverter::new(sort_fields).map_err(|e| {
+            exec_datafusion_err!("Failed to create RowConverter: {:?}", e)
+        })?;
+
         let spill_manager = SpillManager::new(
             Arc::clone(&runtime),
             metrics.spill_metrics.clone(),
             Arc::clone(&schema),
         );
 
-        Self {
+        Ok(Self {
             schema,
             in_mem_batches: vec![],
-            in_mem_batches_sorted: false,
             in_progress_spill_file: None,
             finished_spill_files: vec![],
             expr: expr.into(),
+            sort_keys_row_converter: Arc::new(converter),
             metrics,
-            fetch,
             reservation,
             spill_manager,
             merge_reservation,
@@ -297,7 +312,7 @@ impl ExternalSorter {
             batch_size,
             sort_spill_reservation_bytes,
             sort_in_place_threshold_bytes,
-        }
+        })
     }
 
     /// Appends an unsorted [`RecordBatch`] to `in_mem_batches`
@@ -309,18 +324,10 @@ impl ExternalSorter {
         }
 
         self.reserve_memory_for_merge()?;
-
-        let size = get_reserved_byte_for_record_batch(&input);
-        if self.reservation.try_grow(size).is_err() {
-            self.sort_or_spill_in_mem_batches(false).await?;
-            // We've already freed more than half of reserved memory,
-            // so we can grow the reservation again. There's nothing we can do
-            // if this try_grow fails.
-            self.reservation.try_grow(size)?;
-        }
+        self.reserve_memory_for_batch_and_maybe_spill(&input)
+            .await?;
 
         self.in_mem_batches.push(input);
-        self.in_mem_batches_sorted = false;
         Ok(())
     }
 
@@ -350,7 +357,7 @@ impl ExternalSorter {
             // `in_mem_batches` and the memory limit is almost reached, merging
             // them with the spilled files at the same time might cause OOM.
             if !self.in_mem_batches.is_empty() {
-                self.sort_or_spill_in_mem_batches(true).await?;
+                self.sort_and_spill_in_mem_batches().await?;
             }
 
             for spill in self.finished_spill_files.drain(..) {
@@ -369,7 +376,7 @@ impl ExternalSorter {
                 .with_expressions(expressions.as_ref())
                 .with_metrics(self.metrics.baseline.clone())
                 .with_batch_size(self.batch_size)
-                .with_fetch(self.fetch)
+                .with_fetch(None)
                 .with_reservation(self.merge_reservation.new_empty())
                 .build()
         } else {
@@ -397,16 +404,13 @@ impl ExternalSorter {
         self.metrics.spill_metrics.spill_file_count.value()
     }
 
-    /// When calling, all `in_mem_batches` must be sorted (*), and then all of them will
-    /// be appended to the in-progress spill file.
-    ///
-    /// (*) 'Sorted' here means globally sorted for all buffered batches when the
-    /// memory limit is reached, instead of partially sorted within the batch.
-    async fn spill_append(&mut self) -> Result<()> {
-        assert!(self.in_mem_batches_sorted);
-
-        // we could always get a chance to free some memory as long as we are holding some
-        if self.in_mem_batches.is_empty() {
+    /// Appending globally sorted batches to the in-progress spill file, and clears
+    /// the `globally_sorted_batches` (also its memory reservation) afterwards.
+    async fn consume_and_spill_append(
+        &mut self,
+        globally_sorted_batches: &mut Vec<RecordBatch>,
+    ) -> Result<()> {
+        if globally_sorted_batches.is_empty() {
             return Ok(());
         }
 
@@ -416,21 +420,25 @@ impl ExternalSorter {
                 Some(self.spill_manager.create_in_progress_file("Sorting")?);
         }
 
-        self.organize_stringview_arrays()?;
+        Self::organize_stringview_arrays(globally_sorted_batches)?;
 
         debug!("Spilling sort data of ExternalSorter to disk whilst inserting");
 
-        let batches = std::mem::take(&mut self.in_mem_batches);
+        let batches_to_spill = std::mem::take(globally_sorted_batches);
         self.reservation.free();
 
         let in_progress_file = self.in_progress_spill_file.as_mut().ok_or_else(|| {
             internal_datafusion_err!("In-progress spill file should be initialized")
         })?;
 
-        for batch in batches {
+        for batch in batches_to_spill {
             in_progress_file.append_batch(&batch)?;
         }
 
+        if !globally_sorted_batches.is_empty() {
+            return internal_err!("This function consumes globally_sorted_batches, so it should be empty after taking.");
+        }
+
         Ok(())
     }
 
@@ -449,7 +457,7 @@ impl ExternalSorter {
         Ok(())
     }
 
-    /// Reconstruct `self.in_mem_batches` to organize the payload buffers of each
+    /// Reconstruct `globally_sorted_batches` to organize the payload buffers of each
     /// `StringViewArray` in sequential order by calling `gc()` on them.
     ///
     /// Note this is a workaround until <https://github.com/apache/arrow-rs/issues/7185> is
@@ -478,10 +486,12 @@ impl ExternalSorter {
     ///
     /// Then when spilling each batch, the writer has to write all referenced buffers
     /// repeatedly.
-    fn organize_stringview_arrays(&mut self) -> Result<()> {
-        let mut organized_batches = Vec::with_capacity(self.in_mem_batches.len());
+    fn organize_stringview_arrays(
+        globally_sorted_batches: &mut Vec<RecordBatch>,
+    ) -> Result<()> {
+        let mut organized_batches = Vec::with_capacity(globally_sorted_batches.len());
 
-        for batch in self.in_mem_batches.drain(..) {
+        for batch in globally_sorted_batches.drain(..) {
             let mut new_columns: Vec<Arc<dyn Array>> =
                 Vec::with_capacity(batch.num_columns());
 
@@ -507,43 +517,40 @@ impl ExternalSorter {
             organized_batches.push(organized_batch);
         }
 
-        self.in_mem_batches = organized_batches;
+        *globally_sorted_batches = organized_batches;
 
         Ok(())
     }
 
-    /// Sorts the in_mem_batches in place
-    ///
-    /// Sorting may have freed memory, especially if fetch is `Some`. If
-    /// the memory usage has dropped by a factor of 2, then we don't have
-    /// to spill. Otherwise, we spill to free up memory for inserting
-    /// more batches.
-    /// The factor of 2 aims to avoid a degenerate case where the
-    /// memory required for `fetch` is just under the memory available,
-    /// causing repeated re-sorting of data
-    ///
-    /// # Arguments
-    ///
-    /// * `force_spill` - If true, the method will spill the in-memory batches
-    ///   even if the memory usage has not dropped by a factor of 2. Otherwise it will
-    ///   only spill when the memory usage has dropped by the pre-defined factor.
-    ///
-    async fn sort_or_spill_in_mem_batches(&mut self, force_spill: bool) -> Result<()> {
+    /// Sorts the in-memory batches and merges them into a single sorted run, then writes
+    /// the result to spill files.
+    async fn sort_and_spill_in_mem_batches(&mut self) -> Result<()> {
+        if self.in_mem_batches.is_empty() {
+            return internal_err!(
+                "in_mem_batches must not be empty when attempting to sort and spill"
+            );
+        }
+
         // Release the memory reserved for merge back to the pool so
         // there is some left when `in_mem_sort_stream` requests an
         // allocation. At the end of this function, memory will be
         // reserved again for the next spill.
         self.merge_reservation.free();
 
-        let before = self.reservation.size();
-
         let mut sorted_stream =
             self.in_mem_sort_stream(self.metrics.baseline.intermediate())?;
+        // After `in_mem_sort_stream()` is constructed, all `in_mem_batches` is taken
+        // to construct a globally sorted stream.
+        if !self.in_mem_batches.is_empty() {
+            return internal_err!(
+                "in_mem_batches should be empty after constructing sorted stream"
+            );
+        }
+        // 'global' here refers to all buffered batches when the memory limit is
+        // reached. This variable will buffer the sorted batches after
+        // sort-preserving merge and incrementally append to spill files.
+        let mut globally_sorted_batches: Vec<RecordBatch> = vec![];
 
-        // `self.in_mem_batches` is already taken away by the sort_stream, now it is empty.
-        // We'll gradually collect the sorted stream into self.in_mem_batches, or directly
-        // write sorted batches to disk when the memory is insufficient.
-        let mut spilled = false;
         while let Some(batch) = sorted_stream.next().await {
             let batch = batch?;
             let sorted_size = get_reserved_byte_for_record_batch(&batch);
@@ -551,12 +558,11 @@ impl ExternalSorter {
                 // Although the reservation is not enough, the batch is
                 // already in memory, so it's okay to combine it with previously
                 // sorted batches, and spill together.
-                self.in_mem_batches.push(batch);
-                self.spill_append().await?; // reservation is freed in spill()
-                spilled = true;
+                globally_sorted_batches.push(batch);
+                self.consume_and_spill_append(&mut globally_sorted_batches)
+                    .await?; // reservation is freed in spill()
             } else {
-                self.in_mem_batches.push(batch);
-                self.in_mem_batches_sorted = true;
+                globally_sorted_batches.push(batch);
             }
         }
 
@@ -564,18 +570,17 @@ impl ExternalSorter {
         // upcoming `self.reserve_memory_for_merge()` may fail due to insufficient memory.
         drop(sorted_stream);
 
-        // Sorting may free up some memory especially when fetch is `Some`. If we have
-        // not freed more than 50% of the memory, then we have to spill to free up more
-        // memory for inserting more batches.
-        if (self.reservation.size() > before / 2) || force_spill {
-            // We have not freed more than 50% of the memory, so we have to spill to
-            // free up more memory
-            self.spill_append().await?;
-            spilled = true;
-        }
-
-        if spilled {
-            self.spill_finish().await?;
+        self.consume_and_spill_append(&mut globally_sorted_batches)
+            .await?;
+        self.spill_finish().await?;
+
+        // Sanity check after spilling
+        let buffers_cleared_property =
+            self.in_mem_batches.is_empty() && globally_sorted_batches.is_empty();
+        if !buffers_cleared_property {
+            return internal_err!(
+                "in_mem_batches and globally_sorted_batches should be cleared before"
+            );
         }
 
         // Reserve headroom for next sort/merge
@@ -675,7 +680,8 @@ impl ExternalSorter {
             let batch = concat_batches(&self.schema, &self.in_mem_batches)?;
             self.in_mem_batches.clear();
             self.reservation
-                .try_resize(get_reserved_byte_for_record_batch(&batch))?;
+                .try_resize(get_reserved_byte_for_record_batch(&batch))
+                .map_err(Self::err_with_oom_context)?;
             let reservation = self.reservation.take();
             return self.sort_batch_stream(batch, metrics, reservation);
         }
@@ -700,7 +706,7 @@ impl ExternalSorter {
             .with_expressions(expressions.as_ref())
             .with_metrics(metrics)
             .with_batch_size(self.batch_size)
-            .with_fetch(self.fetch)
+            .with_fetch(None)
             .with_reservation(self.merge_reservation.new_empty())
             .build()
     }
@@ -721,17 +727,30 @@ impl ExternalSorter {
         );
         let schema = batch.schema();
 
-        let fetch = self.fetch;
         let expressions: LexOrdering = self.expr.iter().cloned().collect();
-        let stream = futures::stream::once(futures::future::lazy(move |_| {
-            let timer = metrics.elapsed_compute().timer();
-            let sorted = sort_batch(&batch, &expressions, fetch)?;
-            timer.done();
+        let row_converter = Arc::clone(&self.sort_keys_row_converter);
+        let stream = futures::stream::once(async move {
+            let _timer = metrics.elapsed_compute().timer();
+
+            let sort_columns = expressions
+                .iter()
+                .map(|expr| expr.evaluate_to_sort_column(&batch))
+                .collect::<Result<Vec<_>>>()?;
+
+            let sorted = if is_multi_column_with_lists(&sort_columns) {
+                // lex_sort_to_indices doesn't support List with more than one column
+                // https://github.com/apache/arrow-rs/issues/5454
+                sort_batch_row_based(&batch, &expressions, row_converter, None)?
+            } else {
+                sort_batch(&batch, &expressions, None)?
+            };
+
             metrics.record_output(sorted.num_rows());
             drop(batch);
             drop(reservation);
             Ok(sorted)
-        }));
+        });
+
         Ok(Box::pin(RecordBatchStreamAdapter::new(schema, stream)))
     }
 
@@ -743,12 +762,51 @@ impl ExternalSorter {
         if self.runtime.disk_manager.tmp_files_enabled() {
             let size = self.sort_spill_reservation_bytes;
             if self.merge_reservation.size() != size {
-                self.merge_reservation.try_resize(size)?;
+                self.merge_reservation
+                    .try_resize(size)
+                    .map_err(Self::err_with_oom_context)?;
             }
         }
 
         Ok(())
     }
+
+    /// Reserves memory to be able to accommodate the given batch.
+    /// If memory is scarce, tries to spill current in-memory batches to disk first.
+    async fn reserve_memory_for_batch_and_maybe_spill(
+        &mut self,
+        input: &RecordBatch,
+    ) -> Result<()> {
+        let size = get_reserved_byte_for_record_batch(input);
+
+        match self.reservation.try_grow(size) {
+            Ok(_) => Ok(()),
+            Err(e) => {
+                if self.in_mem_batches.is_empty() {
+                    return Err(Self::err_with_oom_context(e));
+                }
+
+                // Spill and try again.
+                self.sort_and_spill_in_mem_batches().await?;
+                self.reservation
+                    .try_grow(size)
+                    .map_err(Self::err_with_oom_context)
+            }
+        }
+    }
+
+    /// Wraps the error with a context message suggesting settings to tweak.
+    /// This is meant to be used with DataFusionError::ResourcesExhausted only.
+    fn err_with_oom_context(e: DataFusionError) -> DataFusionError {
+        match e {
+            DataFusionError::ResourcesExhausted(_) => e.context(
+                "Not enough memory to continue external sort. \
+                    Consider increasing the memory limit, or decreasing sort_spill_reservation_bytes"
+            ),
+            // This is not an OOM error, so just return it as is.
+            _ => e,
+        }
+    }
 }
 
 /// Estimate how much memory is needed to sort a `RecordBatch`.
@@ -776,6 +834,45 @@ impl Debug for ExternalSorter {
     }
 }
 
+/// Converts rows into a sorted array of indices based on their order.
+/// This function returns the indices that represent the sorted order of the rows.
+fn rows_to_indices(rows: Rows, limit: Option<usize>) -> Result<UInt32Array> {
+    let mut sort: Vec<_> = rows.iter().enumerate().collect();
+    sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b));
+
+    let mut len = rows.num_rows();
+    if let Some(limit) = limit {
+        len = limit.min(len);
+    }
+    let indices =
+        UInt32Array::from_iter_values(sort.iter().take(len).map(|(i, _)| *i as u32));
+    Ok(indices)
+}
+
+/// Sorts a `RecordBatch` by converting its sort columns into Arrow Row Format for faster comparison.
+fn sort_batch_row_based(
+    batch: &RecordBatch,
+    expressions: &LexOrdering,
+    row_converter: Arc<RowConverter>,
+    fetch: Option<usize>,
+) -> Result<RecordBatch> {
+    let sort_columns = expressions
+        .iter()
+        .map(|expr| expr.evaluate_to_sort_column(batch).map(|col| col.values))
+        .collect::<Result<Vec<_>>>()?;
+    let rows = row_converter.convert_columns(&sort_columns)?;
+    let indices = rows_to_indices(rows, fetch)?;
+    let columns = take_arrays(batch.columns(), &indices, None)?;
+
+    let options = RecordBatchOptions::new().with_row_count(Some(indices.len()));
+
+    Ok(RecordBatch::try_new_with_options(
+        batch.schema(),
+        columns,
+        &options,
+    )?)
+}
+
 pub fn sort_batch(
     batch: &RecordBatch,
     expressions: &LexOrdering,
@@ -838,7 +935,9 @@ pub(crate) fn lexsort_to_indices_multi_columns(
         },
     );
 
-    // TODO reuse converter and rows, refer to TopK.
+    // Note: row converter is reused through `sort_batch_row_based()`, this function
+    // is not used during normal sort execution, but it's kept temporarily because
+    // it's inside a public interface `sort_batch()`.
     let converter = RowConverter::new(fields)?;
     let rows = converter.convert_columns(&columns)?;
     let mut sort: Vec<_> = rows.iter().enumerate().collect();
@@ -871,6 +970,8 @@ pub struct SortExec {
     preserve_partitioning: bool,
     /// Fetch highest/lowest n results
     fetch: Option<usize>,
+    /// Normalized common sort prefix between the input and the sort expressions (only used with fetch)
+    common_sort_prefix: LexOrdering,
     /// Cache holding plan properties like equivalences, output partitioning etc.
     cache: PlanProperties,
 }
@@ -880,13 +981,15 @@ impl SortExec {
     /// sorted output partition.
     pub fn new(expr: LexOrdering, input: Arc<dyn ExecutionPlan>) -> Self {
         let preserve_partitioning = false;
-        let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning);
+        let (cache, sort_prefix) =
+            Self::compute_properties(&input, expr.clone(), preserve_partitioning);
         Self {
             expr,
             input,
             metrics_set: ExecutionPlanMetricsSet::new(),
             preserve_partitioning,
             fetch: None,
+            common_sort_prefix: sort_prefix,
             cache,
         }
     }
@@ -938,6 +1041,7 @@ impl SortExec {
             expr: self.expr.clone(),
             metrics_set: self.metrics_set.clone(),
             preserve_partitioning: self.preserve_partitioning,
+            common_sort_prefix: self.common_sort_prefix.clone(),
             fetch,
             cache,
         }
@@ -971,19 +1075,21 @@ impl SortExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
+    /// It also returns the common sort prefix between the input and the sort expressions.
     fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         sort_exprs: LexOrdering,
         preserve_partitioning: bool,
-    ) -> PlanProperties {
+    ) -> (PlanProperties, LexOrdering) {
         // Determine execution mode:
         let requirement = LexRequirement::from(sort_exprs);
-        let sort_satisfied = input
+
+        let (sort_prefix, sort_satisfied) = input
             .equivalence_properties()
-            .ordering_satisfy_requirement(&requirement);
+            .extract_common_sort_prefix(&requirement);
 
         // The emission type depends on whether the input is already sorted:
-        // - If already sorted, we can emit results in the same way as the input
+        // - If already fully sorted, we can emit results in the same way as the input
         // - If not sorted, we must wait until all data is processed to emit results (Final)
         let emission_type = if sort_satisfied {
             input.pipeline_behavior()
@@ -1019,11 +1125,14 @@ impl SortExec {
         let output_partitioning =
             Self::output_partitioning_helper(input, preserve_partitioning);
 
-        PlanProperties::new(
-            eq_properties,
-            output_partitioning,
-            emission_type,
-            boundedness,
+        (
+            PlanProperties::new(
+                eq_properties,
+                output_partitioning,
+                emission_type,
+                boundedness,
+            ),
+            LexOrdering::from(sort_prefix),
         )
     }
 }
@@ -1035,7 +1144,12 @@ impl DisplayAs for SortExec {
                 let preserve_partitioning = self.preserve_partitioning;
                 match self.fetch {
                     Some(fetch) => {
-                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)
+                        write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)?;
+                        if !self.common_sort_prefix.is_empty() {
+                            write!(f, ", sort_prefix=[{}]", self.common_sort_prefix)
+                        } else {
+                            Ok(())
+                        }
                     }
                     None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr),
                 }
@@ -1055,7 +1169,10 @@ impl DisplayAs for SortExec {
 
 impl ExecutionPlan for SortExec {
     fn name(&self) -> &'static str {
-        "SortExec"
+        match self.fetch {
+            Some(_) => "SortExec(TopK)",
+            None => "SortExec",
+        }
     }
 
     fn as_any(&self) -> &dyn Any {
@@ -1108,10 +1225,12 @@ impl ExecutionPlan for SortExec {
 
         trace!("End SortExec's input.execute for partition: {}", partition);
 
+        let requirement = &LexRequirement::from(self.expr.clone());
+
         let sort_satisfied = self
             .input
             .equivalence_properties()
-            .ordering_satisfy_requirement(&LexRequirement::from(self.expr.clone()));
+            .ordering_satisfy_requirement(requirement);
 
         match (sort_satisfied, self.fetch.as_ref()) {
             (true, Some(fetch)) => Ok(Box::pin(LimitStream::new(
@@ -1125,6 +1244,7 @@ impl ExecutionPlan for SortExec {
                 let mut topk = TopK::try_new(
                     partition,
                     input.schema(),
+                    self.common_sort_prefix.clone(),
                     self.expr.clone(),
                     *fetch,
                     context.session_config().batch_size(),
@@ -1137,6 +1257,9 @@ impl ExecutionPlan for SortExec {
                         while let Some(batch) = input.next().await {
                             let batch = batch?;
                             topk.insert_batch(batch)?;
+                            if topk.finished {
+                                break;
+                            }
                         }
                         topk.emit()
                     })
@@ -1149,12 +1272,11 @@ impl ExecutionPlan for SortExec {
                     input.schema(),
                     self.expr.clone(),
                     context.session_config().batch_size(),
-                    self.fetch,
                     execution_options.sort_spill_reservation_bytes,
                     execution_options.sort_in_place_threshold_bytes,
                     &self.metrics_set,
                     context.runtime_env(),
-                );
+                )?;
                 Ok(Box::pin(RecordBatchStreamAdapter::new(
                     self.schema(),
                     futures::stream::once(async move {
@@ -1247,7 +1369,7 @@ mod tests {
     use arrow::datatypes::*;
     use datafusion_common::cast::as_primitive_array;
     use datafusion_common::test_util::batches_to_string;
-    use datafusion_common::{Result, ScalarValue};
+    use datafusion_common::{DataFusionError, Result, ScalarValue};
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use datafusion_execution::RecordBatchStream;
@@ -1472,6 +1594,69 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_batch_reservation_error() -> Result<()> {
+        // Pick a memory limit and sort_spill_reservation that make the first batch reservation fail.
+        // These values assume that the ExternalSorter will reserve 800 bytes for the first batch.
+        let expected_batch_reservation = 800;
+        let merge_reservation: usize = 0; // Set to 0 for simplicity
+        let memory_limit: usize = expected_batch_reservation + merge_reservation - 1; // Just short of what we need
+
+        let session_config =
+            SessionConfig::new().with_sort_spill_reservation_bytes(merge_reservation);
+        let runtime = RuntimeEnvBuilder::new()
+            .with_memory_limit(memory_limit, 1.0)
+            .build_arc()?;
+        let task_ctx = Arc::new(
+            TaskContext::default()
+                .with_session_config(session_config)
+                .with_runtime(runtime),
+        );
+
+        let plan = test::scan_partitioned(1);
+
+        // Read the first record batch to assert that our memory limit and sort_spill_reservation
+        // settings trigger the test scenario.
+        {
+            let mut stream = plan.execute(0, Arc::clone(&task_ctx))?;
+            let first_batch = stream.next().await.unwrap()?;
+            let batch_reservation = get_reserved_byte_for_record_batch(&first_batch);
+
+            assert_eq!(batch_reservation, expected_batch_reservation);
+            assert!(memory_limit < (merge_reservation + batch_reservation));
+        }
+
+        let sort_exec = Arc::new(SortExec::new(
+            LexOrdering::new(vec![PhysicalSortExpr {
+                expr: col("i", &plan.schema())?,
+                options: SortOptions::default(),
+            }]),
+            plan,
+        ));
+
+        let result = collect(
+            Arc::clone(&sort_exec) as Arc<dyn ExecutionPlan>,
+            Arc::clone(&task_ctx),
+        )
+        .await;
+
+        let err = result.unwrap_err();
+        assert!(
+            matches!(err, DataFusionError::Context(..)),
+            "Assertion failed: expected a Context error, but got: {:?}",
+            err
+        );
+
+        // Assert that the context error is wrapping a resources exhausted error.
+        assert!(
+            matches!(err.find_root(), DataFusionError::ResourcesExhausted(_)),
+            "Assertion failed: expected a ResourcesExhausted error, but got: {:?}",
+            err
+        );
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_sort_spill_utf8_strings() -> Result<()> {
         let session_config = SessionConfig::new()
diff --git a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
index 8c1ed77559078..7617e0a22a504 100644
--- a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
+++ b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
@@ -49,7 +49,12 @@ impl InProgressSpillFile {
         }
     }
 
-    /// Appends a `RecordBatch` to the file, initializing the writer if necessary.
+    /// Appends a `RecordBatch` to the spill file, initializing the writer if necessary.
+    ///
+    /// # Errors
+    /// - Returns an error if the file is not active (has been finalized)
+    /// - Returns an error if appending would exceed the disk usage limit configured
+    ///   by `max_temp_directory_size` in `DiskManager`
     pub fn append_batch(&mut self, batch: &RecordBatch) -> Result<()> {
         if self.in_progress_file.is_none() {
             return Err(exec_datafusion_err!(
@@ -70,6 +75,11 @@ impl InProgressSpillFile {
         }
         if let Some(writer) = &mut self.writer {
             let (spilled_rows, spilled_bytes) = writer.write(batch)?;
+            if let Some(in_progress_file) = &mut self.in_progress_file {
+                in_progress_file.update_disk_usage()?;
+            } else {
+                unreachable!() // Already checked inside current function
+            }
 
             // Update metrics
             self.spill_writer.metrics.spilled_bytes.add(spilled_bytes);
diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs
index 88bf7953daeb4..1101616a41060 100644
--- a/datafusion/physical-plan/src/spill/mod.rs
+++ b/datafusion/physical-plan/src/spill/mod.rs
@@ -23,25 +23,161 @@ pub(crate) mod spill_manager;
 use std::fs::File;
 use std::io::BufReader;
 use std::path::{Path, PathBuf};
+use std::pin::Pin;
 use std::ptr::NonNull;
+use std::sync::Arc;
+use std::task::{Context, Poll};
 
 use arrow::array::ArrayData;
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::ipc::{reader::StreamReader, writer::StreamWriter};
 use arrow::record_batch::RecordBatch;
-use tokio::sync::mpsc::Sender;
 
-use datafusion_common::{exec_datafusion_err, HashSet, Result};
+use datafusion_common::{exec_datafusion_err, DataFusionError, HashSet, Result};
+use datafusion_common_runtime::SpawnedTask;
+use datafusion_execution::disk_manager::RefCountedTempFile;
+use datafusion_execution::RecordBatchStream;
+use futures::{FutureExt as _, Stream};
 
-fn read_spill(sender: Sender<Result<RecordBatch>>, path: &Path) -> Result<()> {
-    let file = BufReader::new(File::open(path)?);
-    let reader = StreamReader::try_new(file, None)?;
-    for batch in reader {
-        sender
-            .blocking_send(batch.map_err(Into::into))
-            .map_err(|e| exec_datafusion_err!("{e}"))?;
+/// Stream that reads spill files from disk where each batch is read in a spawned blocking task
+/// It will read one batch at a time and will not do any buffering, to buffer data use [`crate::common::spawn_buffered`]
+///
+/// A simpler solution would be spawning a long-running blocking task for each
+/// file read (instead of each batch). This approach does not work because when
+/// the number of concurrent reads exceeds the Tokio thread pool limit,
+/// deadlocks can occur and block progress.
+struct SpillReaderStream {
+    schema: SchemaRef,
+    state: SpillReaderStreamState,
+}
+
+/// When we poll for the next batch, we will get back both the batch and the reader,
+/// so we can call `next` again.
+type NextRecordBatchResult = Result<(StreamReader<BufReader<File>>, Option<RecordBatch>)>;
+
+enum SpillReaderStreamState {
+    /// Initial state: the stream was not initialized yet
+    /// and the file was not opened
+    Uninitialized(RefCountedTempFile),
+
+    /// A read is in progress in a spawned blocking task for which we hold the handle.
+    ReadInProgress(SpawnedTask<NextRecordBatchResult>),
+
+    /// A read has finished and we wait for being polled again in order to start reading the next batch.
+    Waiting(StreamReader<BufReader<File>>),
+
+    /// The stream has finished, successfully or not.
+    Done,
+}
+
+impl SpillReaderStream {
+    fn new(schema: SchemaRef, spill_file: RefCountedTempFile) -> Self {
+        Self {
+            schema,
+            state: SpillReaderStreamState::Uninitialized(spill_file),
+        }
+    }
+
+    fn poll_next_inner(
+        &mut self,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        match &mut self.state {
+            SpillReaderStreamState::Uninitialized(_) => {
+                // Temporarily replace with `Done` to be able to pass the file to the task.
+                let SpillReaderStreamState::Uninitialized(spill_file) =
+                    std::mem::replace(&mut self.state, SpillReaderStreamState::Done)
+                else {
+                    unreachable!()
+                };
+
+                let task = SpawnedTask::spawn_blocking(move || {
+                    let file = BufReader::new(File::open(spill_file.path())?);
+                    // SAFETY: DataFusion's spill writer strictly follows Arrow IPC specifications
+                    // with validated schemas and buffers. Skip redundant validation during read
+                    // to speedup read operation. This is safe for DataFusion as input guaranteed to be correct when written.
+                    let mut reader = unsafe {
+                        StreamReader::try_new(file, None)?.with_skip_validation(true)
+                    };
+
+                    let next_batch = reader.next().transpose()?;
+
+                    Ok((reader, next_batch))
+                });
+
+                self.state = SpillReaderStreamState::ReadInProgress(task);
+
+                // Poll again immediately so the inner task is polled and the waker is
+                // registered.
+                self.poll_next_inner(cx)
+            }
+
+            SpillReaderStreamState::ReadInProgress(task) => {
+                let result = futures::ready!(task.poll_unpin(cx))
+                    .unwrap_or_else(|err| Err(DataFusionError::External(Box::new(err))));
+
+                match result {
+                    Ok((reader, batch)) => {
+                        match batch {
+                            Some(batch) => {
+                                self.state = SpillReaderStreamState::Waiting(reader);
+
+                                Poll::Ready(Some(Ok(batch)))
+                            }
+                            None => {
+                                // Stream is done
+                                self.state = SpillReaderStreamState::Done;
+
+                                Poll::Ready(None)
+                            }
+                        }
+                    }
+                    Err(err) => {
+                        self.state = SpillReaderStreamState::Done;
+
+                        Poll::Ready(Some(Err(err)))
+                    }
+                }
+            }
+
+            SpillReaderStreamState::Waiting(_) => {
+                // Temporarily replace with `Done` to be able to pass the file to the task.
+                let SpillReaderStreamState::Waiting(mut reader) =
+                    std::mem::replace(&mut self.state, SpillReaderStreamState::Done)
+                else {
+                    unreachable!()
+                };
+
+                let task = SpawnedTask::spawn_blocking(move || {
+                    let next_batch = reader.next().transpose()?;
+
+                    Ok((reader, next_batch))
+                });
+
+                self.state = SpillReaderStreamState::ReadInProgress(task);
+
+                // Poll again immediately so the inner task is polled and the waker is
+                // registered.
+                self.poll_next_inner(cx)
+            }
+
+            SpillReaderStreamState::Done => Poll::Ready(None),
+        }
+    }
+}
+
+impl Stream for SpillReaderStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        self.get_mut().poll_next_inner(cx)
+    }
+}
+
+impl RecordBatchStream for SpillReaderStream {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
     }
-    Ok(())
 }
 
 /// Spill the `RecordBatch` to disk as smaller batches
@@ -202,6 +338,7 @@ mod tests {
     use arrow::record_batch::RecordBatch;
     use datafusion_common::Result;
     use datafusion_execution::runtime_env::RuntimeEnv;
+    use futures::StreamExt as _;
 
     use std::sync::Arc;
 
@@ -601,4 +738,42 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_reading_more_spills_than_tokio_blocking_threads() -> Result<()> {
+        tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .max_blocking_threads(1)
+            .build()
+            .unwrap()
+            .block_on(async {
+                let batch = build_table_i32(
+                    ("a2", &vec![0, 1, 2]),
+                    ("b2", &vec![3, 4, 5]),
+                    ("c2", &vec![4, 5, 6]),
+                );
+
+                let schema = batch.schema();
+
+                // Construct SpillManager
+                let env = Arc::new(RuntimeEnv::default());
+                let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+                let spill_manager = SpillManager::new(env, metrics, Arc::clone(&schema));
+                let batches: [_; 10] = std::array::from_fn(|_| batch.clone());
+
+                let spill_file_1 = spill_manager
+                    .spill_record_batch_and_finish(&batches, "Test1")?
+                    .unwrap();
+                let spill_file_2 = spill_manager
+                    .spill_record_batch_and_finish(&batches, "Test2")?
+                    .unwrap();
+
+                let mut stream_1 = spill_manager.read_spill_as_stream(spill_file_1)?;
+                let mut stream_2 = spill_manager.read_spill_as_stream(spill_file_2)?;
+                stream_1.next().await;
+                stream_2.next().await;
+
+                Ok(())
+            })
+    }
 }
diff --git a/datafusion/physical-plan/src/spill/spill_manager.rs b/datafusion/physical-plan/src/spill/spill_manager.rs
index 4a8e293323f02..78cd47a8bad07 100644
--- a/datafusion/physical-plan/src/spill/spill_manager.rs
+++ b/datafusion/physical-plan/src/spill/spill_manager.rs
@@ -27,10 +27,9 @@ use datafusion_common::Result;
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::SendableRecordBatchStream;
 
-use crate::metrics::SpillMetrics;
-use crate::stream::RecordBatchReceiverStream;
+use crate::{common::spawn_buffered, metrics::SpillMetrics};
 
-use super::{in_progress_spill_file::InProgressSpillFile, read_spill};
+use super::{in_progress_spill_file::InProgressSpillFile, SpillReaderStream};
 
 /// The `SpillManager` is responsible for the following tasks:
 /// - Reading and writing `RecordBatch`es to raw files based on the provided configurations.
@@ -73,7 +72,10 @@ impl SpillManager {
     /// intended to incrementally write in-memory batches into the same spill file,
     /// use [`Self::create_in_progress_file`] instead.
     /// None is returned if no batches are spilled.
-    #[allow(dead_code)] // TODO: remove after change SMJ to use SpillManager
+    ///
+    /// # Errors
+    /// - Returns an error if spilling would exceed the disk usage limit configured
+    ///   by `max_temp_directory_size` in `DiskManager`
     pub fn spill_record_batch_and_finish(
         &self,
         batches: &[RecordBatch],
@@ -90,7 +92,10 @@ impl SpillManager {
 
     /// Refer to the documentation for [`Self::spill_record_batch_and_finish`]. This method
     /// additionally spills the `RecordBatch` into smaller batches, divided by `row_limit`.
-    #[allow(dead_code)] // TODO: remove after change aggregate to use SpillManager
+    ///
+    /// # Errors
+    /// - Returns an error if spilling would exceed the disk usage limit configured
+    ///   by `max_temp_directory_size` in `DiskManager`
     pub fn spill_record_batch_by_size(
         &self,
         batch: &RecordBatch,
@@ -120,14 +125,11 @@ impl SpillManager {
         &self,
         spill_file_path: RefCountedTempFile,
     ) -> Result<SendableRecordBatchStream> {
-        let mut builder = RecordBatchReceiverStream::builder(
+        let stream = Box::pin(SpillReaderStream::new(
             Arc::clone(&self.schema),
-            self.batch_read_buffer_capacity,
-        );
-        let sender = builder.tx();
+            spill_file_path,
+        ));
 
-        builder.spawn_blocking(move || read_spill(sender, spill_file_path.path()));
-
-        Ok(builder.build())
+        Ok(spawn_buffered(stream, self.batch_read_buffer_capacity))
     }
 }
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index 85de1eefce2e4..0b5780b9143f9 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -18,7 +18,7 @@
 //! TopK: Combination of Sort / LIMIT
 
 use arrow::{
-    compute::interleave,
+    compute::interleave_record_batch,
     row::{RowConverter, Rows, SortField},
 };
 use std::mem::size_of;
@@ -27,10 +27,10 @@ use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
 use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder};
 use crate::spill::get_record_batch_memory_size;
 use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
-use arrow::array::{Array, ArrayRef, RecordBatch};
+use arrow::array::{ArrayRef, RecordBatch};
 use arrow::datatypes::SchemaRef;
-use datafusion_common::HashMap;
 use datafusion_common::Result;
+use datafusion_common::{internal_datafusion_err, HashMap};
 use datafusion_execution::{
     memory_pool::{MemoryConsumer, MemoryReservation},
     runtime_env::RuntimeEnv,
@@ -70,6 +70,25 @@ use datafusion_physical_expr_common::sort_expr::LexOrdering;
 /// The same answer can be produced by simply keeping track of the top
 /// K=3 elements, reducing the total amount of required buffer memory.
 ///
+/// # Partial Sort Optimization
+///
+/// This implementation additionally optimizes queries where the input is already
+/// partially sorted by a common prefix of the requested ordering. Once the top K
+/// heap is full, if subsequent rows are guaranteed to be strictly greater (in sort
+/// order) on this prefix than the largest row currently stored, the operator
+/// safely terminates early.
+///
+/// ## Example
+///
+/// For input sorted by `(day DESC)`, but not by `timestamp`, a query such as:
+///
+/// ```sql
+/// SELECT day, timestamp FROM sensor ORDER BY day DESC, timestamp DESC LIMIT 10;
+/// ```
+///
+/// can terminate scanning early once sufficient rows from the latest days have been
+/// collected, skipping older data.
+///
 /// # Structure
 ///
 /// This operator tracks the top K items using a `TopKHeap`.
@@ -90,15 +109,43 @@ pub struct TopK {
     scratch_rows: Rows,
     /// stores the top k values and their sort key values, in order
     heap: TopKHeap,
+    /// row converter, for common keys between the sort keys and the input ordering
+    common_sort_prefix_converter: Option<RowConverter>,
+    /// Common sort prefix between the input and the sort expressions to allow early exit optimization
+    common_sort_prefix: Arc<[PhysicalSortExpr]>,
+    /// If true, indicates that all rows of subsequent batches are guaranteed
+    /// to be greater (by byte order, after row conversion) than the top K,
+    /// which means the top K won't change and the computation can be finished early.
+    pub(crate) finished: bool,
+}
+
+// Guesstimate for memory allocation: estimated number of bytes used per row in the RowConverter
+const ESTIMATED_BYTES_PER_ROW: usize = 20;
+
+fn build_sort_fields(
+    ordering: &LexOrdering,
+    schema: &SchemaRef,
+) -> Result<Vec<SortField>> {
+    ordering
+        .iter()
+        .map(|e| {
+            Ok(SortField::new_with_options(
+                e.expr.data_type(schema)?,
+                e.options,
+            ))
+        })
+        .collect::<Result<_>>()
 }
 
 impl TopK {
     /// Create a new [`TopK`] that stores the top `k` values, as
     /// defined by the sort expressions in `expr`.
     // TODO: make a builder or some other nicer API
+    #[allow(clippy::too_many_arguments)]
     pub fn try_new(
         partition_id: usize,
         schema: SchemaRef,
+        common_sort_prefix: LexOrdering,
         expr: LexOrdering,
         k: usize,
         batch_size: usize,
@@ -108,35 +155,34 @@ impl TopK {
         let reservation = MemoryConsumer::new(format!("TopK[{partition_id}]"))
             .register(&runtime.memory_pool);
 
-        let expr: Arc<[PhysicalSortExpr]> = expr.into();
-
-        let sort_fields: Vec<_> = expr
-            .iter()
-            .map(|e| {
-                Ok(SortField::new_with_options(
-                    e.expr.data_type(&schema)?,
-                    e.options,
-                ))
-            })
-            .collect::<Result<_>>()?;
+        let sort_fields: Vec<_> = build_sort_fields(&expr, &schema)?;
 
         // TODO there is potential to add special cases for single column sort fields
         // to improve performance
         let row_converter = RowConverter::new(sort_fields)?;
-        let scratch_rows = row_converter.empty_rows(
-            batch_size,
-            20 * batch_size, // guesstimate 20 bytes per row
-        );
+        let scratch_rows =
+            row_converter.empty_rows(batch_size, ESTIMATED_BYTES_PER_ROW * batch_size);
+
+        let prefix_row_converter = if common_sort_prefix.is_empty() {
+            None
+        } else {
+            let input_sort_fields: Vec<_> =
+                build_sort_fields(&common_sort_prefix, &schema)?;
+            Some(RowConverter::new(input_sort_fields)?)
+        };
 
         Ok(Self {
             schema: Arc::clone(&schema),
             metrics: TopKMetrics::new(metrics, partition_id),
             reservation,
             batch_size,
-            expr,
+            expr: Arc::from(expr),
             row_converter,
             scratch_rows,
-            heap: TopKHeap::new(k, batch_size, schema),
+            heap: TopKHeap::new(k, batch_size),
+            common_sort_prefix_converter: prefix_row_converter,
+            common_sort_prefix: Arc::from(common_sort_prefix),
+            finished: false,
         })
     }
 
@@ -144,7 +190,8 @@ impl TopK {
     /// the top k seen so far.
     pub fn insert_batch(&mut self, batch: RecordBatch) -> Result<()> {
         // Updates on drop
-        let _timer = self.metrics.baseline.elapsed_compute().timer();
+        let baseline = self.metrics.baseline.clone();
+        let _timer = baseline.elapsed_compute().timer();
 
         let sort_keys: Vec<ArrayRef> = self
             .expr
@@ -163,7 +210,7 @@ impl TopK {
         // TODO make this algorithmically better?:
         // Idea: filter out rows >= self.heap.max() early (before passing to `RowConverter`)
         //       this avoids some work and also might be better vectorizable.
-        let mut batch_entry = self.heap.register_batch(batch);
+        let mut batch_entry = self.heap.register_batch(batch.clone());
         for (index, row) in rows.iter().enumerate() {
             match self.heap.max() {
                 // heap has k items, and the new row is greater than the
@@ -183,6 +230,87 @@ impl TopK {
 
         // update memory reservation
         self.reservation.try_resize(self.size())?;
+
+        // flag the topK as finished if we know that all
+        // subsequent batches are guaranteed to be greater (by byte order, after row conversion) than the top K,
+        // which means the top K won't change and the computation can be finished early.
+        self.attempt_early_completion(&batch)?;
+
+        Ok(())
+    }
+
+    /// If input ordering shares a common sort prefix with the TopK, and if the TopK's heap is full,
+    /// check if the computation can be finished early.
+    /// This is the case if the last row of the current batch is strictly greater than the max row in the heap,
+    /// comparing only on the shared prefix columns.
+    fn attempt_early_completion(&mut self, batch: &RecordBatch) -> Result<()> {
+        // Early exit if the batch is empty as there is no last row to extract from it.
+        if batch.num_rows() == 0 {
+            return Ok(());
+        }
+
+        // prefix_row_converter is only `Some` if the input ordering has a common prefix with the TopK,
+        // so early exit if it is `None`.
+        let Some(prefix_converter) = &self.common_sort_prefix_converter else {
+            return Ok(());
+        };
+
+        // Early exit if the heap is not full (`heap.max()` only returns `Some` if the heap is full).
+        let Some(max_topk_row) = self.heap.max() else {
+            return Ok(());
+        };
+
+        // Evaluate the prefix for the last row of the current batch.
+        let last_row_idx = batch.num_rows() - 1;
+        let mut batch_prefix_scratch =
+            prefix_converter.empty_rows(1, ESTIMATED_BYTES_PER_ROW); // 1 row with capacity ESTIMATED_BYTES_PER_ROW
+
+        self.compute_common_sort_prefix(batch, last_row_idx, &mut batch_prefix_scratch)?;
+
+        // Retrieve the max row from the heap.
+        let store_entry = self
+            .heap
+            .store
+            .get(max_topk_row.batch_id)
+            .ok_or(internal_datafusion_err!("Invalid batch id in topK heap"))?;
+        let max_batch = &store_entry.batch;
+        let mut heap_prefix_scratch =
+            prefix_converter.empty_rows(1, ESTIMATED_BYTES_PER_ROW); // 1 row with capacity ESTIMATED_BYTES_PER_ROW
+        self.compute_common_sort_prefix(
+            max_batch,
+            max_topk_row.index,
+            &mut heap_prefix_scratch,
+        )?;
+
+        // If the last row's prefix is strictly greater than the max prefix, mark as finished.
+        if batch_prefix_scratch.row(0).as_ref() > heap_prefix_scratch.row(0).as_ref() {
+            self.finished = true;
+        }
+
+        Ok(())
+    }
+
+    // Helper function to compute the prefix for a given batch and row index, storing the result in scratch.
+    fn compute_common_sort_prefix(
+        &self,
+        batch: &RecordBatch,
+        last_row_idx: usize,
+        scratch: &mut Rows,
+    ) -> Result<()> {
+        let last_row: Vec<ArrayRef> = self
+            .common_sort_prefix
+            .iter()
+            .map(|expr| {
+                expr.expr
+                    .evaluate(&batch.slice(last_row_idx, 1))?
+                    .into_array(1)
+            })
+            .collect::<Result<_>>()?;
+
+        self.common_sort_prefix_converter
+            .as_ref()
+            .unwrap()
+            .append(scratch, &last_row)?;
         Ok(())
     }
 
@@ -197,6 +325,9 @@ impl TopK {
             row_converter: _,
             scratch_rows: _,
             mut heap,
+            common_sort_prefix_converter: _,
+            common_sort_prefix: _,
+            finished: _,
         } = self;
         let _timer = metrics.baseline.elapsed_compute().timer(); // time updated on drop
 
@@ -271,13 +402,13 @@ struct TopKHeap {
 }
 
 impl TopKHeap {
-    fn new(k: usize, batch_size: usize, schema: SchemaRef) -> Self {
+    fn new(k: usize, batch_size: usize) -> Self {
         assert!(k > 0);
         Self {
             k,
             batch_size,
             inner: BinaryHeap::new(),
-            store: RecordBatchStore::new(schema),
+            store: RecordBatchStore::new(),
             owned_bytes: 0,
         }
     }
@@ -354,8 +485,6 @@ impl TopKHeap {
     /// high, as a single [`RecordBatch`], and a sorted vec of the
     /// current heap's contents
     pub fn emit_with_state(&mut self) -> Result<(Option<RecordBatch>, Vec<TopKRow>)> {
-        let schema = Arc::clone(self.store.schema());
-
         // generate sorted rows
         let topk_rows = std::mem::take(&mut self.inner).into_sorted_vec();
 
@@ -370,30 +499,20 @@ impl TopKHeap {
             .map(|(i, k)| (i, k.index))
             .collect();
 
-        let num_columns = schema.fields().len();
-
-        // build the output columns one at time, using the
-        // `interleave` kernel to pick rows from different arrays
-        let output_columns: Vec<_> = (0..num_columns)
-            .map(|col| {
-                let input_arrays: Vec<_> = topk_rows
-                    .iter()
-                    .map(|k| {
-                        let entry =
-                            self.store.get(k.batch_id).expect("invalid stored batch id");
-                        entry.batch.column(col) as &dyn Array
-                    })
-                    .collect();
-
-                // at this point `indices` contains indexes within the
-                // rows and `input_arrays` contains a reference to the
-                // relevant Array for that index. `interleave` pulls
-                // them together into a single new array
-                Ok(interleave(&input_arrays, &indices)?)
+        let record_batches: Vec<_> = topk_rows
+            .iter()
+            .map(|k| {
+                let entry = self.store.get(k.batch_id).expect("invalid stored batch id");
+                &entry.batch
             })
-            .collect::<Result<_>>()?;
+            .collect();
+
+        // At this point `indices` contains indexes within the
+        // rows and `input_arrays` contains a reference to the
+        // relevant RecordBatch for that index. `interleave_record_batch` pulls
+        // them together into a single new batch
+        let new_batch = interleave_record_batch(&record_batches, &indices)?;
 
-        let new_batch = RecordBatch::try_new(schema, output_columns)?;
         Ok((Some(new_batch), topk_rows))
     }
 
@@ -548,17 +667,14 @@ struct RecordBatchStore {
     batches: HashMap<u32, RecordBatchEntry>,
     /// total size of all record batches tracked by this store
     batches_size: usize,
-    /// schema of the batches
-    schema: SchemaRef,
 }
 
 impl RecordBatchStore {
-    fn new(schema: SchemaRef) -> Self {
+    fn new() -> Self {
         Self {
             next_id: 0,
             batches: HashMap::new(),
             batches_size: 0,
-            schema,
         }
     }
 
@@ -609,11 +725,6 @@ impl RecordBatchStore {
         self.batches.is_empty()
     }
 
-    /// return the schema of batches stored
-    fn schema(&self) -> &SchemaRef {
-        &self.schema
-    }
-
     /// remove a use from the specified batch id. If the use count
     /// reaches zero the batch entry is removed from the store
     ///
@@ -649,6 +760,10 @@ mod tests {
     use super::*;
     use arrow::array::{Float64Array, Int32Array, RecordBatch};
     use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_schema::SortOptions;
+    use datafusion_common::assert_batches_eq;
+    use datafusion_physical_expr::expressions::col;
+    use futures::TryStreamExt;
 
     /// This test ensures the size calculation is correct for RecordBatches with multiple columns.
     #[test]
@@ -658,7 +773,7 @@ mod tests {
             Field::new("ints", DataType::Int32, true),
             Field::new("float64", DataType::Float64, false),
         ]));
-        let mut record_batch_store = RecordBatchStore::new(Arc::clone(&schema));
+        let mut record_batch_store = RecordBatchStore::new();
         let int_array =
             Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)]); // 5 * 4 = 20
         let float64_array = Float64Array::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]); // 5 * 8 = 40
@@ -681,4 +796,98 @@ mod tests {
         record_batch_store.unuse(0);
         assert_eq!(record_batch_store.batches_size, 0);
     }
+
+    /// This test validates that the `try_finish` method marks the TopK operator as finished
+    /// when the prefix (on column "a") of the last row in the current batch is strictly greater
+    /// than the max top‑k row.
+    /// The full sort expression is defined on both columns ("a", "b"), but the input ordering is only on "a".
+    #[tokio::test]
+    async fn test_try_finish_marks_finished_with_prefix() -> Result<()> {
+        // Create a schema with two columns.
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Float64, false),
+        ]));
+
+        // Create sort expressions.
+        // Full sort: first by "a", then by "b".
+        let sort_expr_a = PhysicalSortExpr {
+            expr: col("a", schema.as_ref())?,
+            options: SortOptions::default(),
+        };
+        let sort_expr_b = PhysicalSortExpr {
+            expr: col("b", schema.as_ref())?,
+            options: SortOptions::default(),
+        };
+
+        // Input ordering uses only column "a" (a prefix of the full sort).
+        let input_ordering = LexOrdering::from(vec![sort_expr_a.clone()]);
+        let full_expr = LexOrdering::from(vec![sort_expr_a, sort_expr_b]);
+
+        // Create a dummy runtime environment and metrics.
+        let runtime = Arc::new(RuntimeEnv::default());
+        let metrics = ExecutionPlanMetricsSet::new();
+
+        // Create a TopK instance with k = 3 and batch_size = 2.
+        let mut topk = TopK::try_new(
+            0,
+            Arc::clone(&schema),
+            input_ordering,
+            full_expr,
+            3,
+            2,
+            runtime,
+            &metrics,
+        )?;
+
+        // Create the first batch with two columns:
+        // Column "a": [1, 1, 2], Column "b": [20.0, 15.0, 30.0].
+        let array_a1: ArrayRef =
+            Arc::new(Int32Array::from(vec![Some(1), Some(1), Some(2)]));
+        let array_b1: ArrayRef = Arc::new(Float64Array::from(vec![20.0, 15.0, 30.0]));
+        let batch1 = RecordBatch::try_new(Arc::clone(&schema), vec![array_a1, array_b1])?;
+
+        // Insert the first batch.
+        // At this point the heap is not yet “finished” because the prefix of the last row of the batch
+        // is not strictly greater than the prefix of the max top‑k row (both being `2`).
+        topk.insert_batch(batch1)?;
+        assert!(
+            !topk.finished,
+            "Expected 'finished' to be false after the first batch."
+        );
+
+        // Create the second batch with two columns:
+        // Column "a": [2, 3], Column "b": [10.0, 20.0].
+        let array_a2: ArrayRef = Arc::new(Int32Array::from(vec![Some(2), Some(3)]));
+        let array_b2: ArrayRef = Arc::new(Float64Array::from(vec![10.0, 20.0]));
+        let batch2 = RecordBatch::try_new(Arc::clone(&schema), vec![array_a2, array_b2])?;
+
+        // Insert the second batch.
+        // The last row in this batch has a prefix value of `3`,
+        // which is strictly greater than the max top‑k row (with value `2`),
+        // so try_finish should mark the TopK as finished.
+        topk.insert_batch(batch2)?;
+        assert!(
+            topk.finished,
+            "Expected 'finished' to be true after the second batch."
+        );
+
+        // Verify the TopK correctly emits the top k rows from both batches
+        // (the value 10.0 for b is from the second batch).
+        let results: Vec<_> = topk.emit()?.try_collect().await?;
+        assert_batches_eq!(
+            &[
+                "+---+------+",
+                "| a | b    |",
+                "+---+------+",
+                "| 1 | 15.0 |",
+                "| 1 | 20.0 |",
+                "| 2 | 10.0 |",
+                "+---+------+",
+            ],
+            &results
+        );
+
+        Ok(())
+    }
 }
diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto
index bbeea5e1ec237..82f1e91d9c9b4 100644
--- a/datafusion/proto-common/proto/datafusion_common.proto
+++ b/datafusion/proto-common/proto/datafusion_common.proto
@@ -545,6 +545,10 @@ message ParquetOptions {
   uint64 max_row_group_size = 15;
 
   string created_by = 16;
+
+  oneof coerce_int96_opt {
+    string coerce_int96 = 32;
+  }
 }
 
 enum JoinSide {
diff --git a/datafusion/proto-common/src/common.rs b/datafusion/proto-common/src/common.rs
index 61711dcf8e088..9af63e3b07365 100644
--- a/datafusion/proto-common/src/common.rs
+++ b/datafusion/proto-common/src/common.rs
@@ -17,6 +17,7 @@
 
 use datafusion_common::{internal_datafusion_err, DataFusionError};
 
+/// Return a `DataFusionError::Internal` with the given message
 pub fn proto_error<S: Into<String>>(message: S) -> DataFusionError {
     internal_datafusion_err!("{}", message.into())
 }
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index da43a97899565..bd969db316872 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -984,6 +984,9 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
             maximum_buffered_record_batches_per_stream: value.maximum_buffered_record_batches_per_stream as usize,
             schema_force_view_types: value.schema_force_view_types,
             binary_as_string: value.binary_as_string,
+            coerce_int96: value.coerce_int96_opt.clone().map(|opt| match opt {
+                protobuf::parquet_options::CoerceInt96Opt::CoerceInt96(v) => Some(v),
+            }).unwrap_or(None),
             skip_arrow_metadata: value.skip_arrow_metadata,
         })
     }
diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs
index b0241fd47a26f..b44b05e9ca296 100644
--- a/datafusion/proto-common/src/generated/pbjson.rs
+++ b/datafusion/proto-common/src/generated/pbjson.rs
@@ -4981,6 +4981,9 @@ impl serde::Serialize for ParquetOptions {
         if self.bloom_filter_ndv_opt.is_some() {
             len += 1;
         }
+        if self.coerce_int96_opt.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetOptions", len)?;
         if self.enable_page_index {
             struct_ser.serialize_field("enablePageIndex", &self.enable_page_index)?;
@@ -5136,6 +5139,13 @@ impl serde::Serialize for ParquetOptions {
                 }
             }
         }
+        if let Some(v) = self.coerce_int96_opt.as_ref() {
+            match v {
+                parquet_options::CoerceInt96Opt::CoerceInt96(v) => {
+                    struct_ser.serialize_field("coerceInt96", v)?;
+                }
+            }
+        }
         struct_ser.end()
     }
 }
@@ -5203,6 +5213,8 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             "bloomFilterFpp",
             "bloom_filter_ndv",
             "bloomFilterNdv",
+            "coerce_int96",
+            "coerceInt96",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -5237,6 +5249,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
             Encoding,
             BloomFilterFpp,
             BloomFilterNdv,
+            CoerceInt96,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -5288,6 +5301,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             "encoding" => Ok(GeneratedField::Encoding),
                             "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp),
                             "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv),
+                            "coerceInt96" | "coerce_int96" => Ok(GeneratedField::CoerceInt96),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -5337,6 +5351,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                 let mut encoding_opt__ = None;
                 let mut bloom_filter_fpp_opt__ = None;
                 let mut bloom_filter_ndv_opt__ = None;
+                let mut coerce_int96_opt__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::EnablePageIndex => {
@@ -5533,6 +5548,12 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                             }
                             bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::BloomFilterNdvOpt::BloomFilterNdv(x.0));
                         }
+                        GeneratedField::CoerceInt96 => {
+                            if coerce_int96_opt__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("coerceInt96"));
+                            }
+                            coerce_int96_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::CoerceInt96Opt::CoerceInt96);
+                        }
                     }
                 }
                 Ok(ParquetOptions {
@@ -5566,6 +5587,7 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions {
                     encoding_opt: encoding_opt__,
                     bloom_filter_fpp_opt: bloom_filter_fpp_opt__,
                     bloom_filter_ndv_opt: bloom_filter_ndv_opt__,
+                    coerce_int96_opt: coerce_int96_opt__,
                 })
             }
         }
diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs
index b6e9bc1379832..e029327d481d1 100644
--- a/datafusion/proto-common/src/generated/prost.rs
+++ b/datafusion/proto-common/src/generated/prost.rs
@@ -804,6 +804,8 @@ pub struct ParquetOptions {
     pub bloom_filter_fpp_opt: ::core::option::Option<parquet_options::BloomFilterFppOpt>,
     #[prost(oneof = "parquet_options::BloomFilterNdvOpt", tags = "22")]
     pub bloom_filter_ndv_opt: ::core::option::Option<parquet_options::BloomFilterNdvOpt>,
+    #[prost(oneof = "parquet_options::CoerceInt96Opt", tags = "32")]
+    pub coerce_int96_opt: ::core::option::Option<parquet_options::CoerceInt96Opt>,
 }
 /// Nested message and enum types in `ParquetOptions`.
 pub mod parquet_options {
@@ -857,6 +859,11 @@ pub mod parquet_options {
         #[prost(uint64, tag = "22")]
         BloomFilterNdv(u64),
     }
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum CoerceInt96Opt {
+        #[prost(string, tag = "32")]
+        CoerceInt96(::prost::alloc::string::String),
+    }
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct Precision {
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index decd0cf630388..28927cad03b4c 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -836,6 +836,7 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions {
             schema_force_view_types: value.schema_force_view_types,
             binary_as_string: value.binary_as_string,
             skip_arrow_metadata: value.skip_arrow_metadata,
+            coerce_int96_opt: value.coerce_int96.clone().map(protobuf::parquet_options::CoerceInt96Opt::CoerceInt96),
         })
     }
 }
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index 553fccf7d428e..92e697ad2d9c1 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -55,7 +55,6 @@ pbjson = { workspace = true, optional = true }
 prost = { workspace = true }
 serde = { version = "1.0", optional = true }
 serde_json = { workspace = true, optional = true }
-
 [dev-dependencies]
 datafusion-functions = { workspace = true, default-features = true }
 datafusion-functions-aggregate = { workspace = true }
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 2e028eb291181..39236da3b9a82 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -21,7 +21,7 @@ syntax = "proto3";
 package datafusion;
 
 option java_multiple_files = true;
-option java_package = "org.apache.arrow.datafusion.protobuf";
+option java_package = "org.apache.datafusion.protobuf";
 option java_outer_classname = "DatafusionProto";
 
 import "datafusion/proto-common/proto/datafusion_common.proto";
@@ -90,7 +90,7 @@ message ListingTableScanNode {
   ProjectionColumns projection = 4;
   datafusion_common.Schema schema = 5;
   repeated LogicalExprNode filters = 6;
-  repeated string table_partition_cols = 7;
+  repeated PartitionColumn table_partition_cols = 7;
   bool collect_stat = 8;
   uint32 target_partitions = 9;
   oneof FileFormatType {
diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs
index b6e9bc1379832..e029327d481d1 100644
--- a/datafusion/proto/src/generated/datafusion_proto_common.rs
+++ b/datafusion/proto/src/generated/datafusion_proto_common.rs
@@ -804,6 +804,8 @@ pub struct ParquetOptions {
     pub bloom_filter_fpp_opt: ::core::option::Option<parquet_options::BloomFilterFppOpt>,
     #[prost(oneof = "parquet_options::BloomFilterNdvOpt", tags = "22")]
     pub bloom_filter_ndv_opt: ::core::option::Option<parquet_options::BloomFilterNdvOpt>,
+    #[prost(oneof = "parquet_options::CoerceInt96Opt", tags = "32")]
+    pub coerce_int96_opt: ::core::option::Option<parquet_options::CoerceInt96Opt>,
 }
 /// Nested message and enum types in `ParquetOptions`.
 pub mod parquet_options {
@@ -857,6 +859,11 @@ pub mod parquet_options {
         #[prost(uint64, tag = "22")]
         BloomFilterNdv(u64),
     }
+    #[derive(Clone, PartialEq, ::prost::Oneof)]
+    pub enum CoerceInt96Opt {
+        #[prost(string, tag = "32")]
+        CoerceInt96(::prost::alloc::string::String),
+    }
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct Precision {
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index d2165dad48501..41c60b22e3bc7 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -115,8 +115,8 @@ pub struct ListingTableScanNode {
     pub schema: ::core::option::Option<super::datafusion_common::Schema>,
     #[prost(message, repeated, tag = "6")]
     pub filters: ::prost::alloc::vec::Vec<LogicalExprNode>,
-    #[prost(string, repeated, tag = "7")]
-    pub table_partition_cols: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
+    #[prost(message, repeated, tag = "7")]
+    pub table_partition_cols: ::prost::alloc::vec::Vec<PartitionColumn>,
     #[prost(bool, tag = "8")]
     pub collect_stat: bool,
     #[prost(uint32, tag = "9")]
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index e22738973284e..5c33277dc9f74 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -415,6 +415,9 @@ impl TableParquetOptionsProto {
                 schema_force_view_types: global_options.global.schema_force_view_types,
                 binary_as_string: global_options.global.binary_as_string,
                 skip_arrow_metadata: global_options.global.skip_arrow_metadata,
+                coerce_int96_opt: global_options.global.coerce_int96.map(|compression| {
+                    parquet_options::CoerceInt96Opt::CoerceInt96(compression)
+                }),
             }),
             column_specific_options: column_specific_options.into_iter().map(|(column_name, options)| {
                 ParquetColumnSpecificOptions {
@@ -511,6 +514,9 @@ impl From<&ParquetOptionsProto> for ParquetOptions {
             schema_force_view_types: proto.schema_force_view_types,
             binary_as_string: proto.binary_as_string,
             skip_arrow_metadata: proto.skip_arrow_metadata,
+            coerce_int96: proto.coerce_int96_opt.as_ref().map(|opt| match opt {
+                parquet_options::CoerceInt96Opt::CoerceInt96(coerce_int96) => coerce_int96.clone(),
+            }),
         }
     }
 }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index c65569ef1cfbe..a39e6dac37c10 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -33,7 +33,7 @@ use crate::{
 };
 
 use crate::protobuf::{proto_error, ToProtoError};
-use arrow::datatypes::{DataType, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef};
 use datafusion::datasource::cte_worktable::CteWorkTable;
 #[cfg(feature = "avro")]
 use datafusion::datasource::file_format::avro::AvroFormat;
@@ -355,10 +355,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .as_ref()
                     .map(|expr| from_proto::parse_expr(expr, ctx, extension_codec))
                     .transpose()?
-                    .ok_or_else(|| {
-                        DataFusionError::Internal("expression required".to_string())
-                    })?;
-                // .try_into()?;
+                    .ok_or_else(|| proto_error("expression required"))?;
                 LogicalPlanBuilder::from(input).filter(expr)?.build()
             }
             LogicalPlanType::Window(window) => {
@@ -458,23 +455,25 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .map(ListingTableUrl::parse)
                     .collect::<Result<Vec<_>, _>>()?;
 
+                let partition_columns = scan
+                    .table_partition_cols
+                    .iter()
+                    .map(|col| {
+                        let Some(arrow_type) = col.arrow_type.as_ref() else {
+                            return Err(proto_error(
+                                "Missing Arrow type in partition columns",
+                            ));
+                        };
+                        let arrow_type = DataType::try_from(arrow_type).map_err(|e| {
+                            proto_error(format!("Received an unknown ArrowType: {}", e))
+                        })?;
+                        Ok((col.name.clone(), arrow_type))
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
                 let options = ListingOptions::new(file_format)
                     .with_file_extension(&scan.file_extension)
-                    .with_table_partition_cols(
-                        scan.table_partition_cols
-                            .iter()
-                            .map(|col| {
-                                (
-                                    col.clone(),
-                                    schema
-                                        .field_with_name(col)
-                                        .unwrap()
-                                        .data_type()
-                                        .clone(),
-                                )
-                            })
-                            .collect(),
-                    )
+                    .with_table_partition_cols(partition_columns)
                     .with_collect_stat(scan.collect_stat)
                     .with_target_partitions(scan.target_partitions as usize)
                     .with_file_sort_order(all_sort_orders);
@@ -1046,7 +1045,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                         })
                     }
                 };
-                let schema: protobuf::Schema = schema.as_ref().try_into()?;
 
                 let filters: Vec<protobuf::LogicalExprNode> =
                     serialize_exprs(filters, extension_codec)?;
@@ -1099,6 +1097,21 @@ impl AsLogicalPlan for LogicalPlanNode {
 
                     let options = listing_table.options();
 
+                    let mut builder = SchemaBuilder::from(schema.as_ref());
+                    for (idx, field) in schema.fields().iter().enumerate().rev() {
+                        if options
+                            .table_partition_cols
+                            .iter()
+                            .any(|(name, _)| name == field.name())
+                        {
+                            builder.remove(idx);
+                        }
+                    }
+
+                    let schema = builder.finish();
+
+                    let schema: protobuf::Schema = (&schema).try_into()?;
+
                     let mut exprs_vec: Vec<SortExprNodeCollection> = vec![];
                     for order in &options.file_sort_order {
                         let expr_vec = SortExprNodeCollection {
@@ -1107,6 +1120,24 @@ impl AsLogicalPlan for LogicalPlanNode {
                         exprs_vec.push(expr_vec);
                     }
 
+                    let partition_columns = options
+                        .table_partition_cols
+                        .iter()
+                        .map(|(name, arrow_type)| {
+                            let arrow_type = protobuf::ArrowType::try_from(arrow_type)
+                                .map_err(|e| {
+                                    proto_error(format!(
+                                        "Received an unknown ArrowType: {}",
+                                        e
+                                    ))
+                                })?;
+                            Ok(protobuf::PartitionColumn {
+                                name: name.clone(),
+                                arrow_type: Some(arrow_type),
+                            })
+                        })
+                        .collect::<Result<Vec<_>>>()?;
+
                     Ok(LogicalPlanNode {
                         logical_plan_type: Some(LogicalPlanType::ListingScan(
                             protobuf::ListingTableScanNode {
@@ -1114,11 +1145,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                                 table_name: Some(table_name.clone().into()),
                                 collect_stat: options.collect_stat,
                                 file_extension: options.file_extension.clone(),
-                                table_partition_cols: options
-                                    .table_partition_cols
-                                    .iter()
-                                    .map(|x| x.0.clone())
-                                    .collect::<Vec<_>>(),
+                                table_partition_cols: partition_columns,
                                 paths: listing_table
                                     .table_paths()
                                     .iter()
@@ -1133,6 +1160,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         )),
                     })
                 } else if let Some(view_table) = source.downcast_ref::<ViewTable>() {
+                    let schema: protobuf::Schema = schema.as_ref().try_into()?;
                     Ok(LogicalPlanNode {
                         logical_plan_type: Some(LogicalPlanType::ViewScan(Box::new(
                             protobuf::ViewTableScanNode {
@@ -1167,6 +1195,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         )),
                     })
                 } else {
+                    let schema: protobuf::Schema = schema.as_ref().try_into()?;
                     let mut bytes = vec![];
                     extension_codec
                         .try_encode_table_provider(table_name, provider, &mut bytes)
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index d1141060f9e05..a886fc2425456 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -67,7 +67,7 @@ impl From<&protobuf::PhysicalColumn> for Column {
 /// * `proto` - Input proto with physical sort expression node
 /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining expression data types
-///                    when performing type coercion.
+///   when performing type coercion.
 /// * `codec` - An extension codec used to decode custom UDFs.
 pub fn parse_physical_sort_expr(
     proto: &protobuf::PhysicalSortExprNode,
@@ -94,7 +94,7 @@ pub fn parse_physical_sort_expr(
 /// * `proto` - Input proto with vector of physical sort expression node
 /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining expression data types
-///                    when performing type coercion.
+///   when performing type coercion.
 /// * `codec` - An extension codec used to decode custom UDFs.
 pub fn parse_physical_sort_exprs(
     proto: &[protobuf::PhysicalSortExprNode],
@@ -118,7 +118,7 @@ pub fn parse_physical_sort_exprs(
 /// * `name` - Name of the window expression.
 /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining expression data types
-///                    when performing type coercion.
+///   when performing type coercion.
 /// * `codec` - An extension codec used to decode custom UDFs.
 pub fn parse_physical_window_expr(
     proto: &protobuf::PhysicalWindowExprNode,
@@ -203,7 +203,7 @@ where
 /// * `proto` - Input proto with physical expression node
 /// * `registry` - A registry knows how to build logical expressions out of user-defined function names
 /// * `input_schema` - The Arrow schema for the input, used for determining expression data types
-///                    when performing type coercion.
+///   when performing type coercion.
 /// * `codec` - An extension codec used to decode custom UDFs.
 pub fn parse_physical_expr(
     proto: &protobuf::PhysicalExprNode,
@@ -555,7 +555,7 @@ impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile {
             object_meta: ObjectMeta {
                 location: Path::from(val.path.as_str()),
                 last_modified: Utc.timestamp_nanos(val.last_modified_ns as i64),
-                size: val.size as usize,
+                size: val.size,
                 e_tag: None,
                 version: None,
             },
@@ -565,7 +565,11 @@ impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile {
                 .map(|v| v.try_into())
                 .collect::<Result<Vec<_>, _>>()?,
             range: val.range.as_ref().map(|v| v.try_into()).transpose()?,
-            statistics: val.statistics.as_ref().map(|v| v.try_into()).transpose()?,
+            statistics: val
+                .statistics
+                .as_ref()
+                .map(|v| v.try_into().map(Arc::new))
+                .transpose()?,
             extensions: None,
             metadata_size_hint: None,
         })
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 24cc0d5b3b028..90d071ab23f56 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -127,763 +127,1294 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
             ))
         })?;
         match plan {
-            PhysicalPlanType::Explain(explain) => Ok(Arc::new(ExplainExec::new(
-                Arc::new(explain.schema.as_ref().unwrap().try_into()?),
-                explain
-                    .stringified_plans
-                    .iter()
-                    .map(|plan| plan.into())
-                    .collect(),
-                explain.verbose,
-            ))),
-            PhysicalPlanType::Projection(projection) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &projection.input,
+            PhysicalPlanType::Explain(explain) => self.try_into_explain_physical_plan(
+                explain,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::Projection(projection) => self
+                .try_into_projection_physical_plan(
+                    projection,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let exprs = projection
-                    .expr
-                    .iter()
-                    .zip(projection.expr_name.iter())
-                    .map(|(expr, name)| {
-                        Ok((
-                            parse_physical_expr(
-                                expr,
-                                registry,
-                                input.schema().as_ref(),
-                                extension_codec,
-                            )?,
-                            name.to_string(),
-                        ))
-                    })
-                    .collect::<Result<Vec<(Arc<dyn PhysicalExpr>, String)>>>()?;
-                Ok(Arc::new(ProjectionExec::try_new(exprs, input)?))
-            }
-            PhysicalPlanType::Filter(filter) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &filter.input,
+                ),
+            PhysicalPlanType::Filter(filter) => self.try_into_filter_physical_plan(
+                filter,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::CsvScan(scan) => self.try_into_csv_scan_physical_plan(
+                scan,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::JsonScan(scan) => self.try_into_json_scan_physical_plan(
+                scan,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
+            PhysicalPlanType::ParquetScan(scan) => self
+                .try_into_parquet_scan_physical_plan(
+                    scan,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let predicate = filter
-                    .expr
-                    .as_ref()
-                    .map(|expr| {
-                        parse_physical_expr(
-                            expr,
-                            registry,
-                            input.schema().as_ref(),
-                            extension_codec,
-                        )
-                    })
-                    .transpose()?
-                    .ok_or_else(|| {
-                        DataFusionError::Internal(
-                            "filter (FilterExecNode) in PhysicalPlanNode is missing."
-                                .to_owned(),
-                        )
-                    })?;
-                let filter_selectivity = filter.default_filter_selectivity.try_into();
-                let projection = if !filter.projection.is_empty() {
-                    Some(
-                        filter
-                            .projection
-                            .iter()
-                            .map(|i| *i as usize)
-                            .collect::<Vec<_>>(),
-                    )
-                } else {
-                    None
-                };
-                let filter =
-                    FilterExec::try_new(predicate, input)?.with_projection(projection)?;
-                match filter_selectivity {
-                    Ok(filter_selectivity) => Ok(Arc::new(
-                        filter.with_default_selectivity(filter_selectivity)?,
-                    )),
-                    Err(_) => Err(DataFusionError::Internal(
-                        "filter_selectivity in PhysicalPlanNode is invalid ".to_owned(),
-                    )),
-                }
-            }
-            PhysicalPlanType::CsvScan(scan) => {
-                let escape = if let Some(
-                    protobuf::csv_scan_exec_node::OptionalEscape::Escape(escape),
-                ) = &scan.optional_escape
-                {
-                    Some(str_to_byte(escape, "escape")?)
-                } else {
-                    None
-                };
-
-                let comment = if let Some(
-                    protobuf::csv_scan_exec_node::OptionalComment::Comment(comment),
-                ) = &scan.optional_comment
-                {
-                    Some(str_to_byte(comment, "comment")?)
-                } else {
-                    None
-                };
-
-                let source = Arc::new(
-                    CsvSource::new(
-                        scan.has_header,
-                        str_to_byte(&scan.delimiter, "delimiter")?,
-                        0,
-                    )
-                    .with_escape(escape)
-                    .with_comment(comment),
-                );
-
-                let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config(
-                    scan.base_conf.as_ref().unwrap(),
+                ),
+            #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
+            PhysicalPlanType::AvroScan(scan) => self.try_into_avro_scan_physical_plan(
+                scan,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::CoalesceBatches(coalesce_batches) => self
+                .try_into_coalesce_batches_physical_plan(
+                    coalesce_batches,
                     registry,
+                    runtime,
                     extension_codec,
-                    source,
-                )?)
-                .with_newlines_in_values(scan.newlines_in_values)
-                .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
-                .build();
-                Ok(DataSourceExec::from_data_source(conf))
-            }
-            PhysicalPlanType::JsonScan(scan) => {
-                let scan_conf = parse_protobuf_file_scan_config(
-                    scan.base_conf.as_ref().unwrap(),
+                ),
+            PhysicalPlanType::Merge(merge) => self.try_into_merge_physical_plan(
+                merge,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::Repartition(repart) => self
+                .try_into_repartition_physical_plan(
+                    repart,
                     registry,
+                    runtime,
                     extension_codec,
-                    Arc::new(JsonSource::new()),
-                )?;
-                Ok(DataSourceExec::from_data_source(scan_conf))
-            }
-            #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
-            PhysicalPlanType::ParquetScan(scan) => {
-                #[cfg(feature = "parquet")]
-                {
-                    let schema = parse_protobuf_file_scan_schema(
-                        scan.base_conf.as_ref().unwrap(),
-                    )?;
-                    let predicate = scan
-                        .predicate
-                        .as_ref()
-                        .map(|expr| {
-                            parse_physical_expr(
-                                expr,
-                                registry,
-                                schema.as_ref(),
-                                extension_codec,
-                            )
-                        })
-                        .transpose()?;
-                    let mut options = TableParquetOptions::default();
-
-                    if let Some(table_options) = scan.parquet_options.as_ref() {
-                        options = table_options.try_into()?;
-                    }
-                    let mut source = ParquetSource::new(options);
-
-                    if let Some(predicate) = predicate {
-                        source = source.with_predicate(Arc::clone(&schema), predicate);
-                    }
-                    let base_config = parse_protobuf_file_scan_config(
-                        scan.base_conf.as_ref().unwrap(),
-                        registry,
-                        extension_codec,
-                        Arc::new(source),
-                    )?;
-                    Ok(DataSourceExec::from_data_source(base_config))
-                }
-                #[cfg(not(feature = "parquet"))]
-                panic!("Unable to process a Parquet PhysicalPlan when `parquet` feature is not enabled")
-            }
-            #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
-            PhysicalPlanType::AvroScan(scan) => {
-                #[cfg(feature = "avro")]
-                {
-                    let conf = parse_protobuf_file_scan_config(
-                        scan.base_conf.as_ref().unwrap(),
-                        registry,
-                        extension_codec,
-                        Arc::new(AvroSource::new()),
-                    )?;
-                    Ok(DataSourceExec::from_data_source(conf))
-                }
-                #[cfg(not(feature = "avro"))]
-                panic!("Unable to process a Avro PhysicalPlan when `avro` feature is not enabled")
-            }
-            PhysicalPlanType::CoalesceBatches(coalesce_batches) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &coalesce_batches.input,
+                ),
+            PhysicalPlanType::GlobalLimit(limit) => self
+                .try_into_global_limit_physical_plan(
+                    limit,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                Ok(Arc::new(
-                    CoalesceBatchesExec::new(
-                        input,
-                        coalesce_batches.target_batch_size as usize,
-                    )
-                    .with_fetch(coalesce_batches.fetch.map(|f| f as usize)),
-                ))
-            }
-            PhysicalPlanType::Merge(merge) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&merge.input, registry, runtime, extension_codec)?;
-                Ok(Arc::new(CoalescePartitionsExec::new(input)))
-            }
-            PhysicalPlanType::Repartition(repart) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &repart.input,
+                ),
+            PhysicalPlanType::LocalLimit(limit) => self
+                .try_into_local_limit_physical_plan(
+                    limit,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let partitioning = parse_protobuf_partitioning(
-                    repart.partitioning.as_ref(),
+                ),
+            PhysicalPlanType::Window(window_agg) => self.try_into_window_physical_plan(
+                window_agg,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::Aggregate(hash_agg) => self
+                .try_into_aggregate_physical_plan(
+                    hash_agg,
                     registry,
-                    input.schema().as_ref(),
+                    runtime,
                     extension_codec,
-                )?;
-                Ok(Arc::new(RepartitionExec::try_new(
-                    input,
-                    partitioning.unwrap(),
-                )?))
-            }
-            PhysicalPlanType::GlobalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&limit.input, registry, runtime, extension_codec)?;
-                let fetch = if limit.fetch >= 0 {
-                    Some(limit.fetch as usize)
-                } else {
-                    None
-                };
-                Ok(Arc::new(GlobalLimitExec::new(
-                    input,
-                    limit.skip as usize,
-                    fetch,
-                )))
-            }
-            PhysicalPlanType::LocalLimit(limit) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&limit.input, registry, runtime, extension_codec)?;
-                Ok(Arc::new(LocalLimitExec::new(input, limit.fetch as usize)))
-            }
-            PhysicalPlanType::Window(window_agg) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &window_agg.input,
+                ),
+            PhysicalPlanType::HashJoin(hashjoin) => self
+                .try_into_hash_join_physical_plan(
+                    hashjoin,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let input_schema = input.schema();
-
-                let physical_window_expr: Vec<Arc<dyn WindowExpr>> = window_agg
-                    .window_expr
-                    .iter()
-                    .map(|window_expr| {
-                        parse_physical_window_expr(
-                            window_expr,
-                            registry,
-                            input_schema.as_ref(),
-                            extension_codec,
-                        )
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let partition_keys = window_agg
-                    .partition_keys
-                    .iter()
-                    .map(|expr| {
-                        parse_physical_expr(
-                            expr,
-                            registry,
-                            input.schema().as_ref(),
-                            extension_codec,
-                        )
-                    })
-                    .collect::<Result<Vec<Arc<dyn PhysicalExpr>>>>()?;
-
-                if let Some(input_order_mode) = window_agg.input_order_mode.as_ref() {
-                    let input_order_mode = match input_order_mode {
-                        window_agg_exec_node::InputOrderMode::Linear(_) => {
-                            InputOrderMode::Linear
-                        }
-                        window_agg_exec_node::InputOrderMode::PartiallySorted(
-                            protobuf::PartiallySortedInputOrderMode { columns },
-                        ) => InputOrderMode::PartiallySorted(
-                            columns.iter().map(|c| *c as usize).collect(),
-                        ),
-                        window_agg_exec_node::InputOrderMode::Sorted(_) => {
-                            InputOrderMode::Sorted
-                        }
-                    };
-
-                    Ok(Arc::new(BoundedWindowAggExec::try_new(
-                        physical_window_expr,
-                        input,
-                        input_order_mode,
-                        !partition_keys.is_empty(),
-                    )?))
-                } else {
-                    Ok(Arc::new(WindowAggExec::try_new(
-                        physical_window_expr,
-                        input,
-                        !partition_keys.is_empty(),
-                    )?))
-                }
-            }
-            PhysicalPlanType::Aggregate(hash_agg) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &hash_agg.input,
+                ),
+            PhysicalPlanType::SymmetricHashJoin(sym_join) => self
+                .try_into_symmetric_hash_join_physical_plan(
+                    sym_join,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let mode = protobuf::AggregateMode::try_from(hash_agg.mode).map_err(
-                    |_| {
-                        proto_error(format!(
-                            "Received a AggregateNode message with unknown AggregateMode {}",
-                            hash_agg.mode
-                        ))
-                    },
-                )?;
-                let agg_mode: AggregateMode = match mode {
-                    protobuf::AggregateMode::Partial => AggregateMode::Partial,
-                    protobuf::AggregateMode::Final => AggregateMode::Final,
-                    protobuf::AggregateMode::FinalPartitioned => {
-                        AggregateMode::FinalPartitioned
-                    }
-                    protobuf::AggregateMode::Single => AggregateMode::Single,
-                    protobuf::AggregateMode::SinglePartitioned => {
-                        AggregateMode::SinglePartitioned
-                    }
-                };
-
-                let num_expr = hash_agg.group_expr.len();
-
-                let group_expr = hash_agg
-                    .group_expr
-                    .iter()
-                    .zip(hash_agg.group_expr_name.iter())
-                    .map(|(expr, name)| {
-                        parse_physical_expr(
-                            expr,
-                            registry,
-                            input.schema().as_ref(),
-                            extension_codec,
-                        )
-                        .map(|expr| (expr, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let null_expr = hash_agg
-                    .null_expr
-                    .iter()
-                    .zip(hash_agg.group_expr_name.iter())
-                    .map(|(expr, name)| {
-                        parse_physical_expr(
-                            expr,
-                            registry,
-                            input.schema().as_ref(),
-                            extension_codec,
-                        )
-                        .map(|expr| (expr, name.to_string()))
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let groups: Vec<Vec<bool>> = if !hash_agg.groups.is_empty() {
-                    hash_agg
-                        .groups
-                        .chunks(num_expr)
-                        .map(|g| g.to_vec())
-                        .collect::<Vec<Vec<bool>>>()
-                } else {
-                    vec![]
-                };
-
-                let input_schema = hash_agg.input_schema.as_ref().ok_or_else(|| {
-                    DataFusionError::Internal(
-                        "input_schema in AggregateNode is missing.".to_owned(),
-                    )
-                })?;
-                let physical_schema: SchemaRef = SchemaRef::new(input_schema.try_into()?);
-
-                let physical_filter_expr = hash_agg
-                    .filter_expr
-                    .iter()
-                    .map(|expr| {
-                        expr.expr
-                            .as_ref()
-                            .map(|e| {
-                                parse_physical_expr(
-                                    e,
-                                    registry,
-                                    &physical_schema,
-                                    extension_codec,
-                                )
-                            })
-                            .transpose()
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let physical_aggr_expr: Vec<Arc<AggregateFunctionExpr>> = hash_agg
-                    .aggr_expr
-                    .iter()
-                    .zip(hash_agg.aggr_expr_name.iter())
-                    .map(|(expr, name)| {
-                        let expr_type = expr.expr_type.as_ref().ok_or_else(|| {
-                            proto_error("Unexpected empty aggregate physical expression")
-                        })?;
-
-                        match expr_type {
-                            ExprType::AggregateExpr(agg_node) => {
-                                let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node.expr.iter()
-                                    .map(|e| parse_physical_expr(e, registry, &physical_schema, extension_codec)).collect::<Result<Vec<_>>>()?;
-                                let ordering_req: LexOrdering = agg_node.ordering_req.iter()
-                                    .map(|e| parse_physical_sort_expr(e, registry, &physical_schema, extension_codec))
-                                    .collect::<Result<LexOrdering>>()?;
-                                agg_node.aggregate_function.as_ref().map(|func| {
-                                    match func {
-                                        AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
-                                            let agg_udf = match &agg_node.fun_definition {
-                                                Some(buf) => extension_codec.try_decode_udaf(udaf_name, buf)?,
-                                                None => registry.udaf(udaf_name)?
-                                            };
-
-                                            AggregateExprBuilder::new(agg_udf, input_phy_expr)
-                                                .schema(Arc::clone(&physical_schema))
-                                                .alias(name)
-                                                .with_ignore_nulls(agg_node.ignore_nulls)
-                                                .with_distinct(agg_node.distinct)
-                                                .order_by(ordering_req)
-                                                .build()
-                                                .map(Arc::new)
-                                        }
-                                    }
-                                }).transpose()?.ok_or_else(|| {
-                                    proto_error("Invalid AggregateExpr, missing aggregate_function")
-                                })
-                            }
-                            _ => internal_err!(
-                                "Invalid aggregate expression for AggregateExec"
-                            ),
-                        }
-                    })
-                    .collect::<Result<Vec<_>, _>>()?;
-
-                let limit = hash_agg
-                    .limit
-                    .as_ref()
-                    .map(|lit_value| lit_value.limit as usize);
-
-                let agg = AggregateExec::try_new(
-                    agg_mode,
-                    PhysicalGroupBy::new(group_expr, null_expr, groups),
-                    physical_aggr_expr,
-                    physical_filter_expr,
-                    input,
-                    physical_schema,
-                )?;
-
-                let agg = agg.with_limit(limit);
-
-                Ok(Arc::new(agg))
-            }
-            PhysicalPlanType::HashJoin(hashjoin) => {
-                let left: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &hashjoin.left,
+                ),
+            PhysicalPlanType::Union(union) => self.try_into_union_physical_plan(
+                union,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::Interleave(interleave) => self
+                .try_into_interleave_physical_plan(
+                    interleave,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let right: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &hashjoin.right,
+                ),
+            PhysicalPlanType::CrossJoin(crossjoin) => self
+                .try_into_cross_join_physical_plan(
+                    crossjoin,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let left_schema = left.schema();
-                let right_schema = right.schema();
-                let on: Vec<(PhysicalExprRef, PhysicalExprRef)> = hashjoin
-                    .on
-                    .iter()
-                    .map(|col| {
-                        let left = parse_physical_expr(
-                            &col.left.clone().unwrap(),
-                            registry,
-                            left_schema.as_ref(),
-                            extension_codec,
-                        )?;
-                        let right = parse_physical_expr(
-                            &col.right.clone().unwrap(),
-                            registry,
-                            right_schema.as_ref(),
-                            extension_codec,
-                        )?;
-                        Ok((left, right))
-                    })
-                    .collect::<Result<_>>()?;
-                let join_type = protobuf::JoinType::try_from(hashjoin.join_type)
-                    .map_err(|_| {
-                        proto_error(format!(
-                            "Received a HashJoinNode message with unknown JoinType {}",
-                            hashjoin.join_type
-                        ))
-                    })?;
-                let filter = hashjoin
-                    .filter
-                    .as_ref()
-                    .map(|f| {
-                        let schema = f
-                            .schema
-                            .as_ref()
-                            .ok_or_else(|| proto_error("Missing JoinFilter schema"))?
-                            .try_into()?;
-
-                        let expression = parse_physical_expr(
-                            f.expression.as_ref().ok_or_else(|| {
-                                proto_error("Unexpected empty filter expression")
-                            })?,
-                            registry, &schema,
-                            extension_codec,
-                        )?;
-                        let column_indices = f.column_indices
-                            .iter()
-                            .map(|i| {
-                                let side = protobuf::JoinSide::try_from(i.side)
-                                    .map_err(|_| proto_error(format!(
-                                        "Received a HashJoinNode message with JoinSide in Filter {}",
-                                        i.side))
-                                    )?;
-
-                                Ok(ColumnIndex {
-                                    index: i.index as usize,
-                                    side: side.into(),
-                                })
-                            })
-                            .collect::<Result<Vec<_>>>()?;
-
-                        Ok(JoinFilter::new(expression, column_indices, Arc::new(schema)))
-                    })
-                    .map_or(Ok(None), |v: Result<JoinFilter>| v.map(Some))?;
-
-                let partition_mode = protobuf::PartitionMode::try_from(
-                    hashjoin.partition_mode,
-                )
-                .map_err(|_| {
-                    proto_error(format!(
-                        "Received a HashJoinNode message with unknown PartitionMode {}",
-                        hashjoin.partition_mode
-                    ))
-                })?;
-                let partition_mode = match partition_mode {
-                    protobuf::PartitionMode::CollectLeft => PartitionMode::CollectLeft,
-                    protobuf::PartitionMode::Partitioned => PartitionMode::Partitioned,
-                    protobuf::PartitionMode::Auto => PartitionMode::Auto,
-                };
-                let projection = if !hashjoin.projection.is_empty() {
-                    Some(
-                        hashjoin
-                            .projection
-                            .iter()
-                            .map(|i| *i as usize)
-                            .collect::<Vec<_>>(),
-                    )
-                } else {
-                    None
-                };
-                Ok(Arc::new(HashJoinExec::try_new(
-                    left,
-                    right,
-                    on,
-                    filter,
-                    &join_type.into(),
-                    projection,
-                    partition_mode,
-                    hashjoin.null_equals_null,
-                )?))
+                ),
+            PhysicalPlanType::Empty(empty) => self.try_into_empty_physical_plan(
+                empty,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::PlaceholderRow(placeholder) => self
+                .try_into_placeholder_row_physical_plan(
+                    placeholder,
+                    registry,
+                    runtime,
+                    extension_codec,
+                ),
+            PhysicalPlanType::Sort(sort) => {
+                self.try_into_sort_physical_plan(sort, registry, runtime, extension_codec)
             }
-            PhysicalPlanType::SymmetricHashJoin(sym_join) => {
-                let left = into_physical_plan(
-                    &sym_join.left,
+            PhysicalPlanType::SortPreservingMerge(sort) => self
+                .try_into_sort_preserving_merge_physical_plan(
+                    sort,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let right = into_physical_plan(
-                    &sym_join.right,
+                ),
+            PhysicalPlanType::Extension(extension) => self
+                .try_into_extension_physical_plan(
+                    extension,
                     registry,
                     runtime,
                     extension_codec,
-                )?;
-                let left_schema = left.schema();
-                let right_schema = right.schema();
-                let on = sym_join
-                    .on
-                    .iter()
-                    .map(|col| {
-                        let left = parse_physical_expr(
-                            &col.left.clone().unwrap(),
-                            registry,
-                            left_schema.as_ref(),
-                            extension_codec,
-                        )?;
-                        let right = parse_physical_expr(
-                            &col.right.clone().unwrap(),
-                            registry,
-                            right_schema.as_ref(),
-                            extension_codec,
-                        )?;
-                        Ok((left, right))
-                    })
-                    .collect::<Result<_>>()?;
-                let join_type = protobuf::JoinType::try_from(sym_join.join_type)
-                    .map_err(|_| {
-                        proto_error(format!(
-                            "Received a SymmetricHashJoin message with unknown JoinType {}",
-                            sym_join.join_type
-                        ))
-                    })?;
-                let filter = sym_join
-                    .filter
-                    .as_ref()
-                    .map(|f| {
-                        let schema = f
-                            .schema
-                            .as_ref()
-                            .ok_or_else(|| proto_error("Missing JoinFilter schema"))?
-                            .try_into()?;
+                ),
+            PhysicalPlanType::NestedLoopJoin(join) => self
+                .try_into_nested_loop_join_physical_plan(
+                    join,
+                    registry,
+                    runtime,
+                    extension_codec,
+                ),
+            PhysicalPlanType::Analyze(analyze) => self.try_into_analyze_physical_plan(
+                analyze,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::JsonSink(sink) => self.try_into_json_sink_physical_plan(
+                sink,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+            PhysicalPlanType::CsvSink(sink) => self.try_into_csv_sink_physical_plan(
+                sink,
+                registry,
+                runtime,
+                extension_codec,
+            ),
 
-                        let expression = parse_physical_expr(
-                            f.expression.as_ref().ok_or_else(|| {
-                                proto_error("Unexpected empty filter expression")
-                            })?,
-                            registry, &schema,
+            #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
+            PhysicalPlanType::ParquetSink(sink) => self
+                .try_into_parquet_sink_physical_plan(
+                    sink,
+                    registry,
+                    runtime,
+                    extension_codec,
+                ),
+            PhysicalPlanType::Unnest(unnest) => self.try_into_unnest_physical_plan(
+                unnest,
+                registry,
+                runtime,
+                extension_codec,
+            ),
+        }
+    }
+
+    fn try_from_physical_plan(
+        plan: Arc<dyn ExecutionPlan>,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self>
+    where
+        Self: Sized,
+    {
+        let plan_clone = Arc::clone(&plan);
+        let plan = plan.as_any();
+
+        if let Some(exec) = plan.downcast_ref::<ExplainExec>() {
+            return protobuf::PhysicalPlanNode::try_from_explain_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
+            return protobuf::PhysicalPlanNode::try_from_projection_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<AnalyzeExec>() {
+            return protobuf::PhysicalPlanNode::try_from_analyze_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<FilterExec>() {
+            return protobuf::PhysicalPlanNode::try_from_filter_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(limit) = plan.downcast_ref::<GlobalLimitExec>() {
+            return protobuf::PhysicalPlanNode::try_from_global_limit_exec(
+                limit,
+                extension_codec,
+            );
+        }
+
+        if let Some(limit) = plan.downcast_ref::<LocalLimitExec>() {
+            return protobuf::PhysicalPlanNode::try_from_local_limit_exec(
+                limit,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<HashJoinExec>() {
+            return protobuf::PhysicalPlanNode::try_from_hash_join_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<SymmetricHashJoinExec>() {
+            return protobuf::PhysicalPlanNode::try_from_symmetric_hash_join_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<CrossJoinExec>() {
+            return protobuf::PhysicalPlanNode::try_from_cross_join_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<AggregateExec>() {
+            return protobuf::PhysicalPlanNode::try_from_aggregate_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(empty) = plan.downcast_ref::<EmptyExec>() {
+            return protobuf::PhysicalPlanNode::try_from_empty_exec(
+                empty,
+                extension_codec,
+            );
+        }
+
+        if let Some(empty) = plan.downcast_ref::<PlaceholderRowExec>() {
+            return protobuf::PhysicalPlanNode::try_from_placeholder_row_exec(
+                empty,
+                extension_codec,
+            );
+        }
+
+        if let Some(coalesce_batches) = plan.downcast_ref::<CoalesceBatchesExec>() {
+            return protobuf::PhysicalPlanNode::try_from_coalesce_batches_exec(
+                coalesce_batches,
+                extension_codec,
+            );
+        }
+
+        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>() {
+            if let Some(node) = protobuf::PhysicalPlanNode::try_from_data_source_exec(
+                data_source_exec,
+                extension_codec,
+            )? {
+                return Ok(node);
+            }
+        }
+
+        if let Some(exec) = plan.downcast_ref::<CoalescePartitionsExec>() {
+            return protobuf::PhysicalPlanNode::try_from_coalesce_partitions_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<RepartitionExec>() {
+            return protobuf::PhysicalPlanNode::try_from_repartition_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<SortExec>() {
+            return protobuf::PhysicalPlanNode::try_from_sort_exec(exec, extension_codec);
+        }
+
+        if let Some(union) = plan.downcast_ref::<UnionExec>() {
+            return protobuf::PhysicalPlanNode::try_from_union_exec(
+                union,
+                extension_codec,
+            );
+        }
+
+        if let Some(interleave) = plan.downcast_ref::<InterleaveExec>() {
+            return protobuf::PhysicalPlanNode::try_from_interleave_exec(
+                interleave,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<SortPreservingMergeExec>() {
+            return protobuf::PhysicalPlanNode::try_from_sort_preserving_merge_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<NestedLoopJoinExec>() {
+            return protobuf::PhysicalPlanNode::try_from_nested_loop_join_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<WindowAggExec>() {
+            return protobuf::PhysicalPlanNode::try_from_window_agg_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<BoundedWindowAggExec>() {
+            return protobuf::PhysicalPlanNode::try_from_bounded_window_agg_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        if let Some(exec) = plan.downcast_ref::<DataSinkExec>() {
+            if let Some(node) = protobuf::PhysicalPlanNode::try_from_data_sink_exec(
+                exec,
+                extension_codec,
+            )? {
+                return Ok(node);
+            }
+        }
+
+        if let Some(exec) = plan.downcast_ref::<UnnestExec>() {
+            return protobuf::PhysicalPlanNode::try_from_unnest_exec(
+                exec,
+                extension_codec,
+            );
+        }
+
+        let mut buf: Vec<u8> = vec![];
+        match extension_codec.try_encode(Arc::clone(&plan_clone), &mut buf) {
+            Ok(_) => {
+                let inputs: Vec<protobuf::PhysicalPlanNode> = plan_clone
+                    .children()
+                    .into_iter()
+                    .cloned()
+                    .map(|i| {
+                        protobuf::PhysicalPlanNode::try_from_physical_plan(
+                            i,
                             extension_codec,
-                        )?;
-                        let column_indices = f.column_indices
-                            .iter()
-                            .map(|i| {
-                                let side = protobuf::JoinSide::try_from(i.side)
-                                    .map_err(|_| proto_error(format!(
-                                        "Received a HashJoinNode message with JoinSide in Filter {}",
-                                        i.side))
-                                    )?;
+                        )
+                    })
+                    .collect::<Result<_>>()?;
 
-                                Ok(ColumnIndex {
-                                    index: i.index as usize,
-                                    side: side.into(),
-                                })
+                Ok(protobuf::PhysicalPlanNode {
+                    physical_plan_type: Some(PhysicalPlanType::Extension(
+                        protobuf::PhysicalExtensionNode { node: buf, inputs },
+                    )),
+                })
+            }
+            Err(e) => internal_err!(
+                "Unsupported plan and extension codec failed with [{e}]. Plan: {plan_clone:?}"
+            ),
+        }
+    }
+}
+
+impl protobuf::PhysicalPlanNode {
+    fn try_into_explain_physical_plan(
+        &self,
+        explain: &protobuf::ExplainExecNode,
+        _registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(Arc::new(ExplainExec::new(
+            Arc::new(explain.schema.as_ref().unwrap().try_into()?),
+            explain
+                .stringified_plans
+                .iter()
+                .map(|plan| plan.into())
+                .collect(),
+            explain.verbose,
+        )))
+    }
+
+    fn try_into_projection_physical_plan(
+        &self,
+        projection: &protobuf::ProjectionExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&projection.input, registry, runtime, extension_codec)?;
+        let exprs = projection
+            .expr
+            .iter()
+            .zip(projection.expr_name.iter())
+            .map(|(expr, name)| {
+                Ok((
+                    parse_physical_expr(
+                        expr,
+                        registry,
+                        input.schema().as_ref(),
+                        extension_codec,
+                    )?,
+                    name.to_string(),
+                ))
+            })
+            .collect::<Result<Vec<(Arc<dyn PhysicalExpr>, String)>>>()?;
+        Ok(Arc::new(ProjectionExec::try_new(exprs, input)?))
+    }
+
+    fn try_into_filter_physical_plan(
+        &self,
+        filter: &protobuf::FilterExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&filter.input, registry, runtime, extension_codec)?;
+        let predicate = filter
+            .expr
+            .as_ref()
+            .map(|expr| {
+                parse_physical_expr(
+                    expr,
+                    registry,
+                    input.schema().as_ref(),
+                    extension_codec,
+                )
+            })
+            .transpose()?
+            .ok_or_else(|| {
+                DataFusionError::Internal(
+                    "filter (FilterExecNode) in PhysicalPlanNode is missing.".to_owned(),
+                )
+            })?;
+        let filter_selectivity = filter.default_filter_selectivity.try_into();
+        let projection = if !filter.projection.is_empty() {
+            Some(
+                filter
+                    .projection
+                    .iter()
+                    .map(|i| *i as usize)
+                    .collect::<Vec<_>>(),
+            )
+        } else {
+            None
+        };
+        let filter =
+            FilterExec::try_new(predicate, input)?.with_projection(projection)?;
+        match filter_selectivity {
+            Ok(filter_selectivity) => Ok(Arc::new(
+                filter.with_default_selectivity(filter_selectivity)?,
+            )),
+            Err(_) => Err(DataFusionError::Internal(
+                "filter_selectivity in PhysicalPlanNode is invalid ".to_owned(),
+            )),
+        }
+    }
+
+    fn try_into_csv_scan_physical_plan(
+        &self,
+        scan: &protobuf::CsvScanExecNode,
+        registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let escape =
+            if let Some(protobuf::csv_scan_exec_node::OptionalEscape::Escape(escape)) =
+                &scan.optional_escape
+            {
+                Some(str_to_byte(escape, "escape")?)
+            } else {
+                None
+            };
+
+        let comment = if let Some(
+            protobuf::csv_scan_exec_node::OptionalComment::Comment(comment),
+        ) = &scan.optional_comment
+        {
+            Some(str_to_byte(comment, "comment")?)
+        } else {
+            None
+        };
+
+        let source = Arc::new(
+            CsvSource::new(
+                scan.has_header,
+                str_to_byte(&scan.delimiter, "delimiter")?,
+                0,
+            )
+            .with_escape(escape)
+            .with_comment(comment),
+        );
+
+        let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config(
+            scan.base_conf.as_ref().unwrap(),
+            registry,
+            extension_codec,
+            source,
+        )?)
+        .with_newlines_in_values(scan.newlines_in_values)
+        .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+        .build();
+        Ok(DataSourceExec::from_data_source(conf))
+    }
+
+    fn try_into_json_scan_physical_plan(
+        &self,
+        scan: &protobuf::JsonScanExecNode,
+        registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let scan_conf = parse_protobuf_file_scan_config(
+            scan.base_conf.as_ref().unwrap(),
+            registry,
+            extension_codec,
+            Arc::new(JsonSource::new()),
+        )?;
+        Ok(DataSourceExec::from_data_source(scan_conf))
+    }
+
+    #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
+    fn try_into_parquet_scan_physical_plan(
+        &self,
+        scan: &protobuf::ParquetScanExecNode,
+        registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        #[cfg(feature = "parquet")]
+        {
+            let schema =
+                parse_protobuf_file_scan_schema(scan.base_conf.as_ref().unwrap())?;
+            let predicate = scan
+                .predicate
+                .as_ref()
+                .map(|expr| {
+                    parse_physical_expr(expr, registry, schema.as_ref(), extension_codec)
+                })
+                .transpose()?;
+            let mut options = TableParquetOptions::default();
+
+            if let Some(table_options) = scan.parquet_options.as_ref() {
+                options = table_options.try_into()?;
+            }
+            let mut source = ParquetSource::new(options);
+
+            if let Some(predicate) = predicate {
+                source = source.with_predicate(Arc::clone(&schema), predicate);
+            }
+            let base_config = parse_protobuf_file_scan_config(
+                scan.base_conf.as_ref().unwrap(),
+                registry,
+                extension_codec,
+                Arc::new(source),
+            )?;
+            Ok(DataSourceExec::from_data_source(base_config))
+        }
+        #[cfg(not(feature = "parquet"))]
+        panic!("Unable to process a Parquet PhysicalPlan when `parquet` feature is not enabled")
+    }
+
+    #[cfg_attr(not(feature = "avro"), allow(unused_variables))]
+    fn try_into_avro_scan_physical_plan(
+        &self,
+        scan: &protobuf::AvroScanExecNode,
+        registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        #[cfg(feature = "avro")]
+        {
+            let conf = parse_protobuf_file_scan_config(
+                scan.base_conf.as_ref().unwrap(),
+                registry,
+                extension_codec,
+                Arc::new(AvroSource::new()),
+            )?;
+            Ok(DataSourceExec::from_data_source(conf))
+        }
+        #[cfg(not(feature = "avro"))]
+        panic!("Unable to process a Avro PhysicalPlan when `avro` feature is not enabled")
+    }
+
+    fn try_into_coalesce_batches_physical_plan(
+        &self,
+        coalesce_batches: &protobuf::CoalesceBatchesExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> = into_physical_plan(
+            &coalesce_batches.input,
+            registry,
+            runtime,
+            extension_codec,
+        )?;
+        Ok(Arc::new(
+            CoalesceBatchesExec::new(input, coalesce_batches.target_batch_size as usize)
+                .with_fetch(coalesce_batches.fetch.map(|f| f as usize)),
+        ))
+    }
+
+    fn try_into_merge_physical_plan(
+        &self,
+        merge: &protobuf::CoalescePartitionsExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&merge.input, registry, runtime, extension_codec)?;
+        Ok(Arc::new(CoalescePartitionsExec::new(input)))
+    }
+
+    fn try_into_repartition_physical_plan(
+        &self,
+        repart: &protobuf::RepartitionExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&repart.input, registry, runtime, extension_codec)?;
+        let partitioning = parse_protobuf_partitioning(
+            repart.partitioning.as_ref(),
+            registry,
+            input.schema().as_ref(),
+            extension_codec,
+        )?;
+        Ok(Arc::new(RepartitionExec::try_new(
+            input,
+            partitioning.unwrap(),
+        )?))
+    }
+
+    fn try_into_global_limit_physical_plan(
+        &self,
+        limit: &protobuf::GlobalLimitExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&limit.input, registry, runtime, extension_codec)?;
+        let fetch = if limit.fetch >= 0 {
+            Some(limit.fetch as usize)
+        } else {
+            None
+        };
+        Ok(Arc::new(GlobalLimitExec::new(
+            input,
+            limit.skip as usize,
+            fetch,
+        )))
+    }
+
+    fn try_into_local_limit_physical_plan(
+        &self,
+        limit: &protobuf::LocalLimitExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&limit.input, registry, runtime, extension_codec)?;
+        Ok(Arc::new(LocalLimitExec::new(input, limit.fetch as usize)))
+    }
+
+    fn try_into_window_physical_plan(
+        &self,
+        window_agg: &protobuf::WindowAggExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&window_agg.input, registry, runtime, extension_codec)?;
+        let input_schema = input.schema();
+
+        let physical_window_expr: Vec<Arc<dyn WindowExpr>> = window_agg
+            .window_expr
+            .iter()
+            .map(|window_expr| {
+                parse_physical_window_expr(
+                    window_expr,
+                    registry,
+                    input_schema.as_ref(),
+                    extension_codec,
+                )
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let partition_keys = window_agg
+            .partition_keys
+            .iter()
+            .map(|expr| {
+                parse_physical_expr(
+                    expr,
+                    registry,
+                    input.schema().as_ref(),
+                    extension_codec,
+                )
+            })
+            .collect::<Result<Vec<Arc<dyn PhysicalExpr>>>>()?;
+
+        if let Some(input_order_mode) = window_agg.input_order_mode.as_ref() {
+            let input_order_mode = match input_order_mode {
+                window_agg_exec_node::InputOrderMode::Linear(_) => InputOrderMode::Linear,
+                window_agg_exec_node::InputOrderMode::PartiallySorted(
+                    protobuf::PartiallySortedInputOrderMode { columns },
+                ) => InputOrderMode::PartiallySorted(
+                    columns.iter().map(|c| *c as usize).collect(),
+                ),
+                window_agg_exec_node::InputOrderMode::Sorted(_) => InputOrderMode::Sorted,
+            };
+
+            Ok(Arc::new(BoundedWindowAggExec::try_new(
+                physical_window_expr,
+                input,
+                input_order_mode,
+                !partition_keys.is_empty(),
+            )?))
+        } else {
+            Ok(Arc::new(WindowAggExec::try_new(
+                physical_window_expr,
+                input,
+                !partition_keys.is_empty(),
+            )?))
+        }
+    }
+
+    fn try_into_aggregate_physical_plan(
+        &self,
+        hash_agg: &protobuf::AggregateExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&hash_agg.input, registry, runtime, extension_codec)?;
+        let mode = protobuf::AggregateMode::try_from(hash_agg.mode).map_err(|_| {
+            proto_error(format!(
+                "Received a AggregateNode message with unknown AggregateMode {}",
+                hash_agg.mode
+            ))
+        })?;
+        let agg_mode: AggregateMode = match mode {
+            protobuf::AggregateMode::Partial => AggregateMode::Partial,
+            protobuf::AggregateMode::Final => AggregateMode::Final,
+            protobuf::AggregateMode::FinalPartitioned => AggregateMode::FinalPartitioned,
+            protobuf::AggregateMode::Single => AggregateMode::Single,
+            protobuf::AggregateMode::SinglePartitioned => {
+                AggregateMode::SinglePartitioned
+            }
+        };
+
+        let num_expr = hash_agg.group_expr.len();
+
+        let group_expr = hash_agg
+            .group_expr
+            .iter()
+            .zip(hash_agg.group_expr_name.iter())
+            .map(|(expr, name)| {
+                parse_physical_expr(
+                    expr,
+                    registry,
+                    input.schema().as_ref(),
+                    extension_codec,
+                )
+                .map(|expr| (expr, name.to_string()))
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let null_expr = hash_agg
+            .null_expr
+            .iter()
+            .zip(hash_agg.group_expr_name.iter())
+            .map(|(expr, name)| {
+                parse_physical_expr(
+                    expr,
+                    registry,
+                    input.schema().as_ref(),
+                    extension_codec,
+                )
+                .map(|expr| (expr, name.to_string()))
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let groups: Vec<Vec<bool>> = if !hash_agg.groups.is_empty() {
+            hash_agg
+                .groups
+                .chunks(num_expr)
+                .map(|g| g.to_vec())
+                .collect::<Vec<Vec<bool>>>()
+        } else {
+            vec![]
+        };
+
+        let input_schema = hash_agg.input_schema.as_ref().ok_or_else(|| {
+            DataFusionError::Internal(
+                "input_schema in AggregateNode is missing.".to_owned(),
+            )
+        })?;
+        let physical_schema: SchemaRef = SchemaRef::new(input_schema.try_into()?);
+
+        let physical_filter_expr = hash_agg
+            .filter_expr
+            .iter()
+            .map(|expr| {
+                expr.expr
+                    .as_ref()
+                    .map(|e| {
+                        parse_physical_expr(
+                            e,
+                            registry,
+                            &physical_schema,
+                            extension_codec,
+                        )
+                    })
+                    .transpose()
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let physical_aggr_expr: Vec<Arc<AggregateFunctionExpr>> = hash_agg
+            .aggr_expr
+            .iter()
+            .zip(hash_agg.aggr_expr_name.iter())
+            .map(|(expr, name)| {
+                let expr_type = expr.expr_type.as_ref().ok_or_else(|| {
+                    proto_error("Unexpected empty aggregate physical expression")
+                })?;
+
+                match expr_type {
+                    ExprType::AggregateExpr(agg_node) => {
+                        let input_phy_expr: Vec<Arc<dyn PhysicalExpr>> = agg_node
+                            .expr
+                            .iter()
+                            .map(|e| {
+                                parse_physical_expr(
+                                    e,
+                                    registry,
+                                    &physical_schema,
+                                    extension_codec,
+                                )
                             })
-                            .collect::<Result<_>>()?;
-
-                        Ok(JoinFilter::new(expression, column_indices, Arc::new(schema)))
-                    })
-                    .map_or(Ok(None), |v: Result<JoinFilter>| v.map(Some))?;
+                            .collect::<Result<Vec<_>>>()?;
+                        let ordering_req: LexOrdering = agg_node
+                            .ordering_req
+                            .iter()
+                            .map(|e| {
+                                parse_physical_sort_expr(
+                                    e,
+                                    registry,
+                                    &physical_schema,
+                                    extension_codec,
+                                )
+                            })
+                            .collect::<Result<LexOrdering>>()?;
+                        agg_node
+                            .aggregate_function
+                            .as_ref()
+                            .map(|func| match func {
+                                AggregateFunction::UserDefinedAggrFunction(udaf_name) => {
+                                    let agg_udf = match &agg_node.fun_definition {
+                                        Some(buf) => extension_codec
+                                            .try_decode_udaf(udaf_name, buf)?,
+                                        None => registry.udaf(udaf_name)?,
+                                    };
+
+                                    AggregateExprBuilder::new(agg_udf, input_phy_expr)
+                                        .schema(Arc::clone(&physical_schema))
+                                        .alias(name)
+                                        .with_ignore_nulls(agg_node.ignore_nulls)
+                                        .with_distinct(agg_node.distinct)
+                                        .order_by(ordering_req)
+                                        .build()
+                                        .map(Arc::new)
+                                }
+                            })
+                            .transpose()?
+                            .ok_or_else(|| {
+                                proto_error(
+                                    "Invalid AggregateExpr, missing aggregate_function",
+                                )
+                            })
+                    }
+                    _ => internal_err!("Invalid aggregate expression for AggregateExec"),
+                }
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let limit = hash_agg
+            .limit
+            .as_ref()
+            .map(|lit_value| lit_value.limit as usize);
+
+        let agg = AggregateExec::try_new(
+            agg_mode,
+            PhysicalGroupBy::new(group_expr, null_expr, groups),
+            physical_aggr_expr,
+            physical_filter_expr,
+            input,
+            physical_schema,
+        )?;
+
+        let agg = agg.with_limit(limit);
+
+        Ok(Arc::new(agg))
+    }
 
-                let left_sort_exprs = parse_physical_sort_exprs(
-                    &sym_join.left_sort_exprs,
+    fn try_into_hash_join_physical_plan(
+        &self,
+        hashjoin: &protobuf::HashJoinExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let left: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&hashjoin.left, registry, runtime, extension_codec)?;
+        let right: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&hashjoin.right, registry, runtime, extension_codec)?;
+        let left_schema = left.schema();
+        let right_schema = right.schema();
+        let on: Vec<(PhysicalExprRef, PhysicalExprRef)> = hashjoin
+            .on
+            .iter()
+            .map(|col| {
+                let left = parse_physical_expr(
+                    &col.left.clone().unwrap(),
                     registry,
-                    &left_schema,
+                    left_schema.as_ref(),
                     extension_codec,
                 )?;
-                let left_sort_exprs = if left_sort_exprs.is_empty() {
-                    None
-                } else {
-                    Some(left_sort_exprs)
-                };
-
-                let right_sort_exprs = parse_physical_sort_exprs(
-                    &sym_join.right_sort_exprs,
+                let right = parse_physical_expr(
+                    &col.right.clone().unwrap(),
                     registry,
-                    &right_schema,
+                    right_schema.as_ref(),
                     extension_codec,
                 )?;
-                let right_sort_exprs = if right_sort_exprs.is_empty() {
-                    None
-                } else {
-                    Some(right_sort_exprs)
-                };
-
-                let partition_mode =
-                    protobuf::StreamPartitionMode::try_from(sym_join.partition_mode).map_err(|_| {
-                        proto_error(format!(
-                            "Received a SymmetricHashJoin message with unknown PartitionMode {}",
-                            sym_join.partition_mode
-                        ))
-                    })?;
-                let partition_mode = match partition_mode {
-                    protobuf::StreamPartitionMode::SinglePartition => {
-                        StreamJoinPartitionMode::SinglePartition
-                    }
-                    protobuf::StreamPartitionMode::PartitionedExec => {
-                        StreamJoinPartitionMode::Partitioned
-                    }
-                };
-                SymmetricHashJoinExec::try_new(
-                    left,
-                    right,
-                    on,
-                    filter,
-                    &join_type.into(),
-                    sym_join.null_equals_null,
-                    left_sort_exprs,
-                    right_sort_exprs,
-                    partition_mode,
-                )
-                .map(|e| Arc::new(e) as _)
-            }
-            PhysicalPlanType::Union(union) => {
-                let mut inputs: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                for input in &union.inputs {
-                    inputs.push(input.try_into_physical_plan(
-                        registry,
-                        runtime,
-                        extension_codec,
-                    )?);
-                }
-                Ok(Arc::new(UnionExec::new(inputs)))
-            }
-            PhysicalPlanType::Interleave(interleave) => {
-                let mut inputs: Vec<Arc<dyn ExecutionPlan>> = vec![];
-                for input in &interleave.inputs {
-                    inputs.push(input.try_into_physical_plan(
-                        registry,
-                        runtime,
-                        extension_codec,
-                    )?);
-                }
-                Ok(Arc::new(InterleaveExec::try_new(inputs)?))
-            }
-            PhysicalPlanType::CrossJoin(crossjoin) => {
-                let left: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &crossjoin.left,
+                Ok((left, right))
+            })
+            .collect::<Result<_>>()?;
+        let join_type =
+            protobuf::JoinType::try_from(hashjoin.join_type).map_err(|_| {
+                proto_error(format!(
+                    "Received a HashJoinNode message with unknown JoinType {}",
+                    hashjoin.join_type
+                ))
+            })?;
+        let filter = hashjoin
+            .filter
+            .as_ref()
+            .map(|f| {
+                let schema = f
+                    .schema
+                    .as_ref()
+                    .ok_or_else(|| proto_error("Missing JoinFilter schema"))?
+                    .try_into()?;
+
+                let expression = parse_physical_expr(
+                    f.expression.as_ref().ok_or_else(|| {
+                        proto_error("Unexpected empty filter expression")
+                    })?,
+                    registry, &schema,
+                    extension_codec,
+                )?;
+                let column_indices = f.column_indices
+                    .iter()
+                    .map(|i| {
+                        let side = protobuf::JoinSide::try_from(i.side)
+                            .map_err(|_| proto_error(format!(
+                                "Received a HashJoinNode message with JoinSide in Filter {}",
+                                i.side))
+                            )?;
+
+                        Ok(ColumnIndex {
+                            index: i.index as usize,
+                            side: side.into(),
+                        })
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                Ok(JoinFilter::new(expression, column_indices, Arc::new(schema)))
+            })
+            .map_or(Ok(None), |v: Result<JoinFilter>| v.map(Some))?;
+
+        let partition_mode = protobuf::PartitionMode::try_from(hashjoin.partition_mode)
+            .map_err(|_| {
+            proto_error(format!(
+                "Received a HashJoinNode message with unknown PartitionMode {}",
+                hashjoin.partition_mode
+            ))
+        })?;
+        let partition_mode = match partition_mode {
+            protobuf::PartitionMode::CollectLeft => PartitionMode::CollectLeft,
+            protobuf::PartitionMode::Partitioned => PartitionMode::Partitioned,
+            protobuf::PartitionMode::Auto => PartitionMode::Auto,
+        };
+        let projection = if !hashjoin.projection.is_empty() {
+            Some(
+                hashjoin
+                    .projection
+                    .iter()
+                    .map(|i| *i as usize)
+                    .collect::<Vec<_>>(),
+            )
+        } else {
+            None
+        };
+        Ok(Arc::new(HashJoinExec::try_new(
+            left,
+            right,
+            on,
+            filter,
+            &join_type.into(),
+            projection,
+            partition_mode,
+            hashjoin.null_equals_null,
+        )?))
+    }
+
+    fn try_into_symmetric_hash_join_physical_plan(
+        &self,
+        sym_join: &protobuf::SymmetricHashJoinExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let left =
+            into_physical_plan(&sym_join.left, registry, runtime, extension_codec)?;
+        let right =
+            into_physical_plan(&sym_join.right, registry, runtime, extension_codec)?;
+        let left_schema = left.schema();
+        let right_schema = right.schema();
+        let on = sym_join
+            .on
+            .iter()
+            .map(|col| {
+                let left = parse_physical_expr(
+                    &col.left.clone().unwrap(),
                     registry,
-                    runtime,
+                    left_schema.as_ref(),
                     extension_codec,
                 )?;
-                let right: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &crossjoin.right,
+                let right = parse_physical_expr(
+                    &col.right.clone().unwrap(),
                     registry,
-                    runtime,
+                    right_schema.as_ref(),
                     extension_codec,
                 )?;
-                Ok(Arc::new(CrossJoinExec::new(left, right)))
-            }
-            PhysicalPlanType::Empty(empty) => {
-                let schema = Arc::new(convert_required!(empty.schema)?);
-                Ok(Arc::new(EmptyExec::new(schema)))
+                Ok((left, right))
+            })
+            .collect::<Result<_>>()?;
+        let join_type =
+            protobuf::JoinType::try_from(sym_join.join_type).map_err(|_| {
+                proto_error(format!(
+                    "Received a SymmetricHashJoin message with unknown JoinType {}",
+                    sym_join.join_type
+                ))
+            })?;
+        let filter = sym_join
+            .filter
+            .as_ref()
+            .map(|f| {
+                let schema = f
+                    .schema
+                    .as_ref()
+                    .ok_or_else(|| proto_error("Missing JoinFilter schema"))?
+                    .try_into()?;
+
+                let expression = parse_physical_expr(
+                    f.expression.as_ref().ok_or_else(|| {
+                        proto_error("Unexpected empty filter expression")
+                    })?,
+                    registry, &schema,
+                    extension_codec,
+                )?;
+                let column_indices = f.column_indices
+                    .iter()
+                    .map(|i| {
+                        let side = protobuf::JoinSide::try_from(i.side)
+                            .map_err(|_| proto_error(format!(
+                                "Received a HashJoinNode message with JoinSide in Filter {}",
+                                i.side))
+                            )?;
+
+                        Ok(ColumnIndex {
+                            index: i.index as usize,
+                            side: side.into(),
+                        })
+                    })
+                    .collect::<Result<_>>()?;
+
+                Ok(JoinFilter::new(expression, column_indices, Arc::new(schema)))
+            })
+            .map_or(Ok(None), |v: Result<JoinFilter>| v.map(Some))?;
+
+        let left_sort_exprs = parse_physical_sort_exprs(
+            &sym_join.left_sort_exprs,
+            registry,
+            &left_schema,
+            extension_codec,
+        )?;
+        let left_sort_exprs = if left_sort_exprs.is_empty() {
+            None
+        } else {
+            Some(left_sort_exprs)
+        };
+
+        let right_sort_exprs = parse_physical_sort_exprs(
+            &sym_join.right_sort_exprs,
+            registry,
+            &right_schema,
+            extension_codec,
+        )?;
+        let right_sort_exprs = if right_sort_exprs.is_empty() {
+            None
+        } else {
+            Some(right_sort_exprs)
+        };
+
+        let partition_mode = protobuf::StreamPartitionMode::try_from(
+            sym_join.partition_mode,
+        )
+        .map_err(|_| {
+            proto_error(format!(
+                "Received a SymmetricHashJoin message with unknown PartitionMode {}",
+                sym_join.partition_mode
+            ))
+        })?;
+        let partition_mode = match partition_mode {
+            protobuf::StreamPartitionMode::SinglePartition => {
+                StreamJoinPartitionMode::SinglePartition
             }
-            PhysicalPlanType::PlaceholderRow(placeholder) => {
-                let schema = Arc::new(convert_required!(placeholder.schema)?);
-                Ok(Arc::new(PlaceholderRowExec::new(schema)))
+            protobuf::StreamPartitionMode::PartitionedExec => {
+                StreamJoinPartitionMode::Partitioned
             }
-            PhysicalPlanType::Sort(sort) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&sort.input, registry, runtime, extension_codec)?;
-                let exprs = sort
+        };
+        SymmetricHashJoinExec::try_new(
+            left,
+            right,
+            on,
+            filter,
+            &join_type.into(),
+            sym_join.null_equals_null,
+            left_sort_exprs,
+            right_sort_exprs,
+            partition_mode,
+        )
+        .map(|e| Arc::new(e) as _)
+    }
+
+    fn try_into_union_physical_plan(
+        &self,
+        union: &protobuf::UnionExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let mut inputs: Vec<Arc<dyn ExecutionPlan>> = vec![];
+        for input in &union.inputs {
+            inputs.push(input.try_into_physical_plan(
+                registry,
+                runtime,
+                extension_codec,
+            )?);
+        }
+        Ok(Arc::new(UnionExec::new(inputs)))
+    }
+
+    fn try_into_interleave_physical_plan(
+        &self,
+        interleave: &protobuf::InterleaveExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let mut inputs: Vec<Arc<dyn ExecutionPlan>> = vec![];
+        for input in &interleave.inputs {
+            inputs.push(input.try_into_physical_plan(
+                registry,
+                runtime,
+                extension_codec,
+            )?);
+        }
+        Ok(Arc::new(InterleaveExec::try_new(inputs)?))
+    }
+
+    fn try_into_cross_join_physical_plan(
+        &self,
+        crossjoin: &protobuf::CrossJoinExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let left: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&crossjoin.left, registry, runtime, extension_codec)?;
+        let right: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&crossjoin.right, registry, runtime, extension_codec)?;
+        Ok(Arc::new(CrossJoinExec::new(left, right)))
+    }
+
+    fn try_into_empty_physical_plan(
+        &self,
+        empty: &protobuf::EmptyExecNode,
+        _registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let schema = Arc::new(convert_required!(empty.schema)?);
+        Ok(Arc::new(EmptyExec::new(schema)))
+    }
+
+    fn try_into_placeholder_row_physical_plan(
+        &self,
+        placeholder: &protobuf::PlaceholderRowExecNode,
+        _registry: &dyn FunctionRegistry,
+        _runtime: &RuntimeEnv,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let schema = Arc::new(convert_required!(placeholder.schema)?);
+        Ok(Arc::new(PlaceholderRowExec::new(schema)))
+    }
+
+    fn try_into_sort_physical_plan(
+        &self,
+        sort: &protobuf::SortExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&sort.input, registry, runtime, extension_codec)?;
+        let exprs = sort
                     .expr
                     .iter()
                     .map(|expr| {
@@ -916,90 +1447,110 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                         }
                     })
                     .collect::<Result<LexOrdering, _>>()?;
-                let fetch = if sort.fetch < 0 {
-                    None
-                } else {
-                    Some(sort.fetch as usize)
-                };
-                let new_sort = SortExec::new(exprs, input)
-                    .with_fetch(fetch)
-                    .with_preserve_partitioning(sort.preserve_partitioning);
+        let fetch = if sort.fetch < 0 {
+            None
+        } else {
+            Some(sort.fetch as usize)
+        };
+        let new_sort = SortExec::new(exprs, input)
+            .with_fetch(fetch)
+            .with_preserve_partitioning(sort.preserve_partitioning);
+
+        Ok(Arc::new(new_sort))
+    }
 
-                Ok(Arc::new(new_sort))
-            }
-            PhysicalPlanType::SortPreservingMerge(sort) => {
-                let input: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&sort.input, registry, runtime, extension_codec)?;
-                let exprs = sort
-                    .expr
-                    .iter()
-                    .map(|expr| {
-                        let expr = expr.expr_type.as_ref().ok_or_else(|| {
+    fn try_into_sort_preserving_merge_physical_plan(
+        &self,
+        sort: &protobuf::SortPreservingMergeExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&sort.input, registry, runtime, extension_codec)?;
+        let exprs = sort
+            .expr
+            .iter()
+            .map(|expr| {
+                let expr = expr.expr_type.as_ref().ok_or_else(|| {
+                    proto_error(format!(
+                        "physical_plan::from_proto() Unexpected expr {self:?}"
+                    ))
+                })?;
+                if let ExprType::Sort(sort_expr) = expr {
+                    let expr = sort_expr
+                        .expr
+                        .as_ref()
+                        .ok_or_else(|| {
                             proto_error(format!(
-                                "physical_plan::from_proto() Unexpected expr {self:?}"
-                            ))
-                        })?;
-                        if let ExprType::Sort(sort_expr) = expr {
-                            let expr = sort_expr
-                                .expr
-                                .as_ref()
-                                .ok_or_else(|| {
-                                    proto_error(format!(
-                                        "physical_plan::from_proto() Unexpected sort expr {self:?}"
-                                    ))
-                                })?
-                                .as_ref();
-                            Ok(PhysicalSortExpr {
-                                expr: parse_physical_expr(expr, registry, input.schema().as_ref(), extension_codec)?,
-                                options: SortOptions {
-                                    descending: !sort_expr.asc,
-                                    nulls_first: sort_expr.nulls_first,
-                                },
-                            })
-                        } else {
-                            internal_err!(
-                                "physical_plan::from_proto() {self:?}"
-                            )
-                        }
+                            "physical_plan::from_proto() Unexpected sort expr {self:?}"
+                        ))
+                        })?
+                        .as_ref();
+                    Ok(PhysicalSortExpr {
+                        expr: parse_physical_expr(
+                            expr,
+                            registry,
+                            input.schema().as_ref(),
+                            extension_codec,
+                        )?,
+                        options: SortOptions {
+                            descending: !sort_expr.asc,
+                            nulls_first: sort_expr.nulls_first,
+                        },
                     })
-                    .collect::<Result<LexOrdering, _>>()?;
-                let fetch = if sort.fetch < 0 {
-                    None
                 } else {
-                    Some(sort.fetch as usize)
-                };
-                Ok(Arc::new(
-                    SortPreservingMergeExec::new(exprs, input).with_fetch(fetch),
-                ))
-            }
-            PhysicalPlanType::Extension(extension) => {
-                let inputs: Vec<Arc<dyn ExecutionPlan>> = extension
-                    .inputs
-                    .iter()
-                    .map(|i| i.try_into_physical_plan(registry, runtime, extension_codec))
-                    .collect::<Result<_>>()?;
+                    internal_err!("physical_plan::from_proto() {self:?}")
+                }
+            })
+            .collect::<Result<LexOrdering, _>>()?;
+        let fetch = if sort.fetch < 0 {
+            None
+        } else {
+            Some(sort.fetch as usize)
+        };
+        Ok(Arc::new(
+            SortPreservingMergeExec::new(exprs, input).with_fetch(fetch),
+        ))
+    }
 
-                let extension_node = extension_codec.try_decode(
-                    extension.node.as_slice(),
-                    &inputs,
-                    registry,
-                )?;
+    fn try_into_extension_physical_plan(
+        &self,
+        extension: &protobuf::PhysicalExtensionNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let inputs: Vec<Arc<dyn ExecutionPlan>> = extension
+            .inputs
+            .iter()
+            .map(|i| i.try_into_physical_plan(registry, runtime, extension_codec))
+            .collect::<Result<_>>()?;
 
-                Ok(extension_node)
-            }
-            PhysicalPlanType::NestedLoopJoin(join) => {
-                let left: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&join.left, registry, runtime, extension_codec)?;
-                let right: Arc<dyn ExecutionPlan> =
-                    into_physical_plan(&join.right, registry, runtime, extension_codec)?;
-                let join_type =
-                    protobuf::JoinType::try_from(join.join_type).map_err(|_| {
-                        proto_error(format!(
-                            "Received a NestedLoopJoinExecNode message with unknown JoinType {}",
-                            join.join_type
-                        ))
-                    })?;
-                let filter = join
+        let extension_node =
+            extension_codec.try_decode(extension.node.as_slice(), &inputs, registry)?;
+
+        Ok(extension_node)
+    }
+
+    fn try_into_nested_loop_join_physical_plan(
+        &self,
+        join: &protobuf::NestedLoopJoinExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let left: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&join.left, registry, runtime, extension_codec)?;
+        let right: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&join.right, registry, runtime, extension_codec)?;
+        let join_type = protobuf::JoinType::try_from(join.join_type).map_err(|_| {
+            proto_error(format!(
+                "Received a NestedLoopJoinExecNode message with unknown JoinType {}",
+                join.join_type
+            ))
+        })?;
+        let filter = join
                     .filter
                     .as_ref()
                     .map(|f| {
@@ -1036,1121 +1587,1157 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode {
                     })
                     .map_or(Ok(None), |v: Result<JoinFilter>| v.map(Some))?;
 
-                let projection = if !join.projection.is_empty() {
-                    Some(
-                        join.projection
-                            .iter()
-                            .map(|i| *i as usize)
-                            .collect::<Vec<_>>(),
-                    )
-                } else {
-                    None
-                };
-
-                Ok(Arc::new(NestedLoopJoinExec::try_new(
-                    left,
-                    right,
-                    filter,
-                    &join_type.into(),
-                    projection,
-                )?))
-            }
-            PhysicalPlanType::Analyze(analyze) => {
-                let input: Arc<dyn ExecutionPlan> = into_physical_plan(
-                    &analyze.input,
-                    registry,
-                    runtime,
-                    extension_codec,
-                )?;
-                Ok(Arc::new(AnalyzeExec::new(
-                    analyze.verbose,
-                    analyze.show_statistics,
-                    input,
-                    Arc::new(convert_required!(analyze.schema)?),
-                )))
-            }
-            PhysicalPlanType::JsonSink(sink) => {
-                let input =
-                    into_physical_plan(&sink.input, registry, runtime, extension_codec)?;
-
-                let data_sink: JsonSink = sink
-                    .sink
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Missing required field in protobuf"))?
-                    .try_into()?;
-                let sink_schema = input.schema();
-                let sort_order = sink
-                    .sort_order
-                    .as_ref()
-                    .map(|collection| {
-                        parse_physical_sort_exprs(
-                            &collection.physical_sort_expr_nodes,
-                            registry,
-                            &sink_schema,
-                            extension_codec,
-                        )
-                        .map(LexRequirement::from)
-                    })
-                    .transpose()?;
-                Ok(Arc::new(DataSinkExec::new(
-                    input,
-                    Arc::new(data_sink),
-                    sort_order,
-                )))
-            }
-            PhysicalPlanType::CsvSink(sink) => {
-                let input =
-                    into_physical_plan(&sink.input, registry, runtime, extension_codec)?;
+        let projection = if !join.projection.is_empty() {
+            Some(
+                join.projection
+                    .iter()
+                    .map(|i| *i as usize)
+                    .collect::<Vec<_>>(),
+            )
+        } else {
+            None
+        };
+
+        Ok(Arc::new(NestedLoopJoinExec::try_new(
+            left,
+            right,
+            filter,
+            &join_type.into(),
+            projection,
+        )?))
+    }
 
-                let data_sink: CsvSink = sink
-                    .sink
-                    .as_ref()
-                    .ok_or_else(|| proto_error("Missing required field in protobuf"))?
-                    .try_into()?;
-                let sink_schema = input.schema();
-                let sort_order = sink
-                    .sort_order
-                    .as_ref()
-                    .map(|collection| {
-                        parse_physical_sort_exprs(
-                            &collection.physical_sort_expr_nodes,
-                            registry,
-                            &sink_schema,
-                            extension_codec,
-                        )
-                        .map(LexRequirement::from)
-                    })
-                    .transpose()?;
-                Ok(Arc::new(DataSinkExec::new(
-                    input,
-                    Arc::new(data_sink),
-                    sort_order,
-                )))
-            }
-            #[cfg_attr(not(feature = "parquet"), allow(unused_variables))]
-            PhysicalPlanType::ParquetSink(sink) => {
-                #[cfg(feature = "parquet")]
-                {
-                    let input = into_physical_plan(
-                        &sink.input,
-                        registry,
-                        runtime,
-                        extension_codec,
-                    )?;
+    fn try_into_analyze_physical_plan(
+        &self,
+        analyze: &protobuf::AnalyzeExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input: Arc<dyn ExecutionPlan> =
+            into_physical_plan(&analyze.input, registry, runtime, extension_codec)?;
+        Ok(Arc::new(AnalyzeExec::new(
+            analyze.verbose,
+            analyze.show_statistics,
+            input,
+            Arc::new(convert_required!(analyze.schema)?),
+        )))
+    }
 
-                    let data_sink: ParquetSink = sink
-                        .sink
-                        .as_ref()
-                        .ok_or_else(|| proto_error("Missing required field in protobuf"))?
-                        .try_into()?;
-                    let sink_schema = input.schema();
-                    let sort_order = sink
-                        .sort_order
-                        .as_ref()
-                        .map(|collection| {
-                            parse_physical_sort_exprs(
-                                &collection.physical_sort_expr_nodes,
-                                registry,
-                                &sink_schema,
-                                extension_codec,
-                            )
-                            .map(LexRequirement::from)
-                        })
-                        .transpose()?;
-                    Ok(Arc::new(DataSinkExec::new(
-                        input,
-                        Arc::new(data_sink),
-                        sort_order,
-                    )))
-                }
-                #[cfg(not(feature = "parquet"))]
-                panic!("Trying to use ParquetSink without `parquet` feature enabled");
-            }
-            PhysicalPlanType::Unnest(unnest) => {
-                let input = into_physical_plan(
-                    &unnest.input,
+    fn try_into_json_sink_physical_plan(
+        &self,
+        sink: &protobuf::JsonSinkExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?;
+
+        let data_sink: JsonSink = sink
+            .sink
+            .as_ref()
+            .ok_or_else(|| proto_error("Missing required field in protobuf"))?
+            .try_into()?;
+        let sink_schema = input.schema();
+        let sort_order = sink
+            .sort_order
+            .as_ref()
+            .map(|collection| {
+                parse_physical_sort_exprs(
+                    &collection.physical_sort_expr_nodes,
                     registry,
-                    runtime,
+                    &sink_schema,
                     extension_codec,
-                )?;
+                )
+                .map(LexRequirement::from)
+            })
+            .transpose()?;
+        Ok(Arc::new(DataSinkExec::new(
+            input,
+            Arc::new(data_sink),
+            sort_order,
+        )))
+    }
 
-                Ok(Arc::new(UnnestExec::new(
-                    input,
-                    unnest
-                        .list_type_columns
-                        .iter()
-                        .map(|c| ListUnnest {
-                            index_in_input_schema: c.index_in_input_schema as _,
-                            depth: c.depth as _,
-                        })
-                        .collect(),
-                    unnest.struct_type_columns.iter().map(|c| *c as _).collect(),
-                    Arc::new(convert_required!(unnest.schema)?),
-                    into_required!(unnest.options)?,
-                )))
-            }
-        }
+    fn try_into_csv_sink_physical_plan(
+        &self,
+        sink: &protobuf::CsvSinkExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?;
+
+        let data_sink: CsvSink = sink
+            .sink
+            .as_ref()
+            .ok_or_else(|| proto_error("Missing required field in protobuf"))?
+            .try_into()?;
+        let sink_schema = input.schema();
+        let sort_order = sink
+            .sort_order
+            .as_ref()
+            .map(|collection| {
+                parse_physical_sort_exprs(
+                    &collection.physical_sort_expr_nodes,
+                    registry,
+                    &sink_schema,
+                    extension_codec,
+                )
+                .map(LexRequirement::from)
+            })
+            .transpose()?;
+        Ok(Arc::new(DataSinkExec::new(
+            input,
+            Arc::new(data_sink),
+            sort_order,
+        )))
     }
 
-    fn try_from_physical_plan(
-        plan: Arc<dyn ExecutionPlan>,
+    fn try_into_parquet_sink_physical_plan(
+        &self,
+        sink: &protobuf::ParquetSinkExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
         extension_codec: &dyn PhysicalExtensionCodec,
-    ) -> Result<Self>
-    where
-        Self: Sized,
-    {
-        let plan_clone = Arc::clone(&plan);
-        let plan = plan.as_any();
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        #[cfg(feature = "parquet")]
+        {
+            let input =
+                into_physical_plan(&sink.input, registry, runtime, extension_codec)?;
 
-        if let Some(exec) = plan.downcast_ref::<ExplainExec>() {
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Explain(
-                    protobuf::ExplainExecNode {
-                        schema: Some(exec.schema().as_ref().try_into()?),
-                        stringified_plans: exec
-                            .stringified_plans()
-                            .iter()
-                            .map(|plan| plan.into())
-                            .collect(),
-                        verbose: exec.verbose(),
-                    },
-                )),
-            });
+            let data_sink: ParquetSink = sink
+                .sink
+                .as_ref()
+                .ok_or_else(|| proto_error("Missing required field in protobuf"))?
+                .try_into()?;
+            let sink_schema = input.schema();
+            let sort_order = sink
+                .sort_order
+                .as_ref()
+                .map(|collection| {
+                    parse_physical_sort_exprs(
+                        &collection.physical_sort_expr_nodes,
+                        registry,
+                        &sink_schema,
+                        extension_codec,
+                    )
+                    .map(LexRequirement::from)
+                })
+                .transpose()?;
+            Ok(Arc::new(DataSinkExec::new(
+                input,
+                Arc::new(data_sink),
+                sort_order,
+            )))
         }
+        #[cfg(not(feature = "parquet"))]
+        panic!("Trying to use ParquetSink without `parquet` feature enabled");
+    }
 
-        if let Some(exec) = plan.downcast_ref::<ProjectionExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            let expr = exec
-                .expr()
+    fn try_into_unnest_physical_plan(
+        &self,
+        unnest: &protobuf::UnnestExecNode,
+        registry: &dyn FunctionRegistry,
+        runtime: &RuntimeEnv,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let input =
+            into_physical_plan(&unnest.input, registry, runtime, extension_codec)?;
+
+        Ok(Arc::new(UnnestExec::new(
+            input,
+            unnest
+                .list_type_columns
                 .iter()
-                .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
-                .collect::<Result<Vec<_>>>()?;
-            let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
-                    protobuf::ProjectionExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                        expr_name,
-                    },
-                ))),
-            });
-        }
+                .map(|c| ListUnnest {
+                    index_in_input_schema: c.index_in_input_schema as _,
+                    depth: c.depth as _,
+                })
+                .collect(),
+            unnest.struct_type_columns.iter().map(|c| *c as _).collect(),
+            Arc::new(convert_required!(unnest.schema)?),
+            into_required!(unnest.options)?,
+        )))
+    }
 
-        if let Some(exec) = plan.downcast_ref::<AnalyzeExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Analyze(Box::new(
-                    protobuf::AnalyzeExecNode {
-                        verbose: exec.verbose(),
-                        show_statistics: exec.show_statistics(),
-                        input: Some(Box::new(input)),
-                        schema: Some(exec.schema().as_ref().try_into()?),
-                    },
-                ))),
-            });
-        }
+    fn try_from_explain_exec(
+        exec: &ExplainExec,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Explain(
+                protobuf::ExplainExecNode {
+                    schema: Some(exec.schema().as_ref().try_into()?),
+                    stringified_plans: exec
+                        .stringified_plans()
+                        .iter()
+                        .map(|plan| plan.into())
+                        .collect(),
+                    verbose: exec.verbose(),
+                },
+            )),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<FilterExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Filter(Box::new(
-                    protobuf::FilterExecNode {
-                        input: Some(Box::new(input)),
-                        expr: Some(serialize_physical_expr(
-                            exec.predicate(),
-                            extension_codec,
-                        )?),
-                        default_filter_selectivity: exec.default_selectivity() as u32,
-                        projection: exec
-                            .projection()
-                            .as_ref()
-                            .map_or_else(Vec::new, |v| {
-                                v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
-                            }),
-                    },
-                ))),
-            });
-        }
+    fn try_from_projection_exec(
+        exec: &ProjectionExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        let expr = exec
+            .expr()
+            .iter()
+            .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
+            .collect::<Result<Vec<_>>>()?;
+        let expr_name = exec.expr().iter().map(|expr| expr.1.clone()).collect();
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Projection(Box::new(
+                protobuf::ProjectionExecNode {
+                    input: Some(Box::new(input)),
+                    expr,
+                    expr_name,
+                },
+            ))),
+        })
+    }
 
-        if let Some(limit) = plan.downcast_ref::<GlobalLimitExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                limit.input().to_owned(),
-                extension_codec,
-            )?;
+    fn try_from_analyze_exec(
+        exec: &AnalyzeExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Analyze(Box::new(
+                protobuf::AnalyzeExecNode {
+                    verbose: exec.verbose(),
+                    show_statistics: exec.show_statistics(),
+                    input: Some(Box::new(input)),
+                    schema: Some(exec.schema().as_ref().try_into()?),
+                },
+            ))),
+        })
+    }
 
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::GlobalLimit(Box::new(
-                    protobuf::GlobalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        skip: limit.skip() as u32,
-                        fetch: match limit.fetch() {
-                            Some(n) => n as i64,
-                            _ => -1, // no limit
-                        },
-                    },
-                ))),
-            });
-        }
+    fn try_from_filter_exec(
+        exec: &FilterExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Filter(Box::new(
+                protobuf::FilterExecNode {
+                    input: Some(Box::new(input)),
+                    expr: Some(serialize_physical_expr(
+                        exec.predicate(),
+                        extension_codec,
+                    )?),
+                    default_filter_selectivity: exec.default_selectivity() as u32,
+                    projection: exec.projection().as_ref().map_or_else(Vec::new, |v| {
+                        v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
+                    }),
+                },
+            ))),
+        })
+    }
 
-        if let Some(limit) = plan.downcast_ref::<LocalLimitExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                limit.input().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::LocalLimit(Box::new(
-                    protobuf::LocalLimitExecNode {
-                        input: Some(Box::new(input)),
-                        fetch: limit.fetch() as u32,
+    fn try_from_global_limit_exec(
+        limit: &GlobalLimitExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            limit.input().to_owned(),
+            extension_codec,
+        )?;
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::GlobalLimit(Box::new(
+                protobuf::GlobalLimitExecNode {
+                    input: Some(Box::new(input)),
+                    skip: limit.skip() as u32,
+                    fetch: match limit.fetch() {
+                        Some(n) => n as i64,
+                        _ => -1, // no limit
                     },
-                ))),
-            });
-        }
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<HashJoinExec>() {
-            let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.left().to_owned(),
-                extension_codec,
-            )?;
-            let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.right().to_owned(),
-                extension_codec,
-            )?;
-            let on: Vec<protobuf::JoinOn> = exec
-                .on()
-                .iter()
-                .map(|tuple| {
-                    let l = serialize_physical_expr(&tuple.0, extension_codec)?;
-                    let r = serialize_physical_expr(&tuple.1, extension_codec)?;
-                    Ok::<_, DataFusionError>(protobuf::JoinOn {
-                        left: Some(l),
-                        right: Some(r),
-                    })
+    fn try_from_local_limit_exec(
+        limit: &LocalLimitExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            limit.input().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::LocalLimit(Box::new(
+                protobuf::LocalLimitExecNode {
+                    input: Some(Box::new(input)),
+                    fetch: limit.fetch() as u32,
+                },
+            ))),
+        })
+    }
+
+    fn try_from_hash_join_exec(
+        exec: &HashJoinExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.left().to_owned(),
+            extension_codec,
+        )?;
+        let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.right().to_owned(),
+            extension_codec,
+        )?;
+        let on: Vec<protobuf::JoinOn> = exec
+            .on()
+            .iter()
+            .map(|tuple| {
+                let l = serialize_physical_expr(&tuple.0, extension_codec)?;
+                let r = serialize_physical_expr(&tuple.1, extension_codec)?;
+                Ok::<_, DataFusionError>(protobuf::JoinOn {
+                    left: Some(l),
+                    right: Some(r),
                 })
-                .collect::<Result<_>>()?;
-            let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
-            let filter = exec
-                .filter()
-                .as_ref()
-                .map(|f| {
-                    let expression =
-                        serialize_physical_expr(f.expression(), extension_codec)?;
-                    let column_indices = f
-                        .column_indices()
-                        .iter()
-                        .map(|i| {
-                            let side: protobuf::JoinSide = i.side.to_owned().into();
-                            protobuf::ColumnIndex {
-                                index: i.index as u32,
-                                side: side.into(),
-                            }
-                        })
-                        .collect();
-                    let schema = f.schema().as_ref().try_into()?;
-                    Ok(protobuf::JoinFilter {
-                        expression: Some(expression),
-                        column_indices,
-                        schema: Some(schema),
+            })
+            .collect::<Result<_>>()?;
+        let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
+        let filter = exec
+            .filter()
+            .as_ref()
+            .map(|f| {
+                let expression =
+                    serialize_physical_expr(f.expression(), extension_codec)?;
+                let column_indices = f
+                    .column_indices()
+                    .iter()
+                    .map(|i| {
+                        let side: protobuf::JoinSide = i.side.to_owned().into();
+                        protobuf::ColumnIndex {
+                            index: i.index as u32,
+                            side: side.into(),
+                        }
                     })
+                    .collect();
+                let schema = f.schema().as_ref().try_into()?;
+                Ok(protobuf::JoinFilter {
+                    expression: Some(expression),
+                    column_indices,
+                    schema: Some(schema),
                 })
-                .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
-
-            let partition_mode = match exec.partition_mode() {
-                PartitionMode::CollectLeft => protobuf::PartitionMode::CollectLeft,
-                PartitionMode::Partitioned => protobuf::PartitionMode::Partitioned,
-                PartitionMode::Auto => protobuf::PartitionMode::Auto,
-            };
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::HashJoin(Box::new(
-                    protobuf::HashJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                        on,
-                        join_type: join_type.into(),
-                        partition_mode: partition_mode.into(),
-                        null_equals_null: exec.null_equals_null(),
-                        filter,
-                        projection: exec.projection.as_ref().map_or_else(Vec::new, |v| {
-                            v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
-                        }),
-                    },
-                ))),
-            });
-        }
+            })
+            .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
+
+        let partition_mode = match exec.partition_mode() {
+            PartitionMode::CollectLeft => protobuf::PartitionMode::CollectLeft,
+            PartitionMode::Partitioned => protobuf::PartitionMode::Partitioned,
+            PartitionMode::Auto => protobuf::PartitionMode::Auto,
+        };
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::HashJoin(Box::new(
+                protobuf::HashJoinExecNode {
+                    left: Some(Box::new(left)),
+                    right: Some(Box::new(right)),
+                    on,
+                    join_type: join_type.into(),
+                    partition_mode: partition_mode.into(),
+                    null_equals_null: exec.null_equals_null(),
+                    filter,
+                    projection: exec.projection.as_ref().map_or_else(Vec::new, |v| {
+                        v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
+                    }),
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<SymmetricHashJoinExec>() {
-            let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.left().to_owned(),
-                extension_codec,
-            )?;
-            let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.right().to_owned(),
-                extension_codec,
-            )?;
-            let on = exec
-                .on()
-                .iter()
-                .map(|tuple| {
-                    let l = serialize_physical_expr(&tuple.0, extension_codec)?;
-                    let r = serialize_physical_expr(&tuple.1, extension_codec)?;
-                    Ok::<_, DataFusionError>(protobuf::JoinOn {
-                        left: Some(l),
-                        right: Some(r),
-                    })
+    fn try_from_symmetric_hash_join_exec(
+        exec: &SymmetricHashJoinExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.left().to_owned(),
+            extension_codec,
+        )?;
+        let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.right().to_owned(),
+            extension_codec,
+        )?;
+        let on = exec
+            .on()
+            .iter()
+            .map(|tuple| {
+                let l = serialize_physical_expr(&tuple.0, extension_codec)?;
+                let r = serialize_physical_expr(&tuple.1, extension_codec)?;
+                Ok::<_, DataFusionError>(protobuf::JoinOn {
+                    left: Some(l),
+                    right: Some(r),
                 })
-                .collect::<Result<_>>()?;
-            let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
-            let filter = exec
-                .filter()
-                .as_ref()
-                .map(|f| {
-                    let expression =
-                        serialize_physical_expr(f.expression(), extension_codec)?;
-                    let column_indices = f
-                        .column_indices()
-                        .iter()
-                        .map(|i| {
-                            let side: protobuf::JoinSide = i.side.to_owned().into();
-                            protobuf::ColumnIndex {
-                                index: i.index as u32,
-                                side: side.into(),
-                            }
-                        })
-                        .collect();
-                    let schema = f.schema().as_ref().try_into()?;
-                    Ok(protobuf::JoinFilter {
-                        expression: Some(expression),
-                        column_indices,
-                        schema: Some(schema),
+            })
+            .collect::<Result<_>>()?;
+        let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
+        let filter = exec
+            .filter()
+            .as_ref()
+            .map(|f| {
+                let expression =
+                    serialize_physical_expr(f.expression(), extension_codec)?;
+                let column_indices = f
+                    .column_indices()
+                    .iter()
+                    .map(|i| {
+                        let side: protobuf::JoinSide = i.side.to_owned().into();
+                        protobuf::ColumnIndex {
+                            index: i.index as u32,
+                            side: side.into(),
+                        }
                     })
+                    .collect();
+                let schema = f.schema().as_ref().try_into()?;
+                Ok(protobuf::JoinFilter {
+                    expression: Some(expression),
+                    column_indices,
+                    schema: Some(schema),
                 })
-                .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
+            })
+            .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
 
-            let partition_mode = match exec.partition_mode() {
-                StreamJoinPartitionMode::SinglePartition => {
-                    protobuf::StreamPartitionMode::SinglePartition
-                }
-                StreamJoinPartitionMode::Partitioned => {
-                    protobuf::StreamPartitionMode::PartitionedExec
-                }
-            };
+        let partition_mode = match exec.partition_mode() {
+            StreamJoinPartitionMode::SinglePartition => {
+                protobuf::StreamPartitionMode::SinglePartition
+            }
+            StreamJoinPartitionMode::Partitioned => {
+                protobuf::StreamPartitionMode::PartitionedExec
+            }
+        };
 
-            let left_sort_exprs = exec
-                .left_sort_exprs()
-                .map(|exprs| {
-                    exprs
-                        .iter()
-                        .map(|expr| {
-                            Ok(protobuf::PhysicalSortExprNode {
-                                expr: Some(Box::new(serialize_physical_expr(
-                                    &expr.expr,
-                                    extension_codec,
-                                )?)),
-                                asc: !expr.options.descending,
-                                nulls_first: expr.options.nulls_first,
-                            })
+        let left_sort_exprs = exec
+            .left_sort_exprs()
+            .map(|exprs| {
+                exprs
+                    .iter()
+                    .map(|expr| {
+                        Ok(protobuf::PhysicalSortExprNode {
+                            expr: Some(Box::new(serialize_physical_expr(
+                                &expr.expr,
+                                extension_codec,
+                            )?)),
+                            asc: !expr.options.descending,
+                            nulls_first: expr.options.nulls_first,
                         })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .transpose()?
-                .unwrap_or(vec![]);
-
-            let right_sort_exprs = exec
-                .right_sort_exprs()
-                .map(|exprs| {
-                    exprs
-                        .iter()
-                        .map(|expr| {
-                            Ok(protobuf::PhysicalSortExprNode {
-                                expr: Some(Box::new(serialize_physical_expr(
-                                    &expr.expr,
-                                    extension_codec,
-                                )?)),
-                                asc: !expr.options.descending,
-                                nulls_first: expr.options.nulls_first,
-                            })
+                    })
+                    .collect::<Result<Vec<_>>>()
+            })
+            .transpose()?
+            .unwrap_or(vec![]);
+
+        let right_sort_exprs = exec
+            .right_sort_exprs()
+            .map(|exprs| {
+                exprs
+                    .iter()
+                    .map(|expr| {
+                        Ok(protobuf::PhysicalSortExprNode {
+                            expr: Some(Box::new(serialize_physical_expr(
+                                &expr.expr,
+                                extension_codec,
+                            )?)),
+                            asc: !expr.options.descending,
+                            nulls_first: expr.options.nulls_first,
                         })
-                        .collect::<Result<Vec<_>>>()
-                })
-                .transpose()?
-                .unwrap_or(vec![]);
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::SymmetricHashJoin(Box::new(
-                    protobuf::SymmetricHashJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                        on,
-                        join_type: join_type.into(),
-                        partition_mode: partition_mode.into(),
-                        null_equals_null: exec.null_equals_null(),
-                        left_sort_exprs,
-                        right_sort_exprs,
-                        filter,
-                    },
-                ))),
-            });
-        }
-
-        if let Some(exec) = plan.downcast_ref::<CrossJoinExec>() {
-            let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.left().to_owned(),
-                extension_codec,
-            )?;
-            let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.right().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CrossJoin(Box::new(
-                    protobuf::CrossJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                    },
-                ))),
-            });
-        }
-        if let Some(exec) = plan.downcast_ref::<AggregateExec>() {
-            let groups: Vec<bool> = exec
-                .group_expr()
-                .groups()
-                .iter()
-                .flatten()
-                .copied()
-                .collect();
-
-            let group_names = exec
-                .group_expr()
-                .expr()
-                .iter()
-                .map(|expr| expr.1.to_owned())
-                .collect();
-
-            let filter = exec
-                .filter_expr()
-                .iter()
-                .map(|expr| serialize_maybe_filter(expr.to_owned(), extension_codec))
-                .collect::<Result<Vec<_>>>()?;
-
-            let agg = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| {
-                    serialize_physical_aggr_expr(expr.to_owned(), extension_codec)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            let agg_names = exec
-                .aggr_expr()
-                .iter()
-                .map(|expr| expr.name().to_string())
-                .collect::<Vec<_>>();
-
-            let agg_mode = match exec.mode() {
-                AggregateMode::Partial => protobuf::AggregateMode::Partial,
-                AggregateMode::Final => protobuf::AggregateMode::Final,
-                AggregateMode::FinalPartitioned => {
-                    protobuf::AggregateMode::FinalPartitioned
-                }
-                AggregateMode::Single => protobuf::AggregateMode::Single,
-                AggregateMode::SinglePartitioned => {
-                    protobuf::AggregateMode::SinglePartitioned
-                }
-            };
-            let input_schema = exec.input_schema();
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
+                    })
+                    .collect::<Result<Vec<_>>>()
+            })
+            .transpose()?
+            .unwrap_or(vec![]);
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::SymmetricHashJoin(Box::new(
+                protobuf::SymmetricHashJoinExecNode {
+                    left: Some(Box::new(left)),
+                    right: Some(Box::new(right)),
+                    on,
+                    join_type: join_type.into(),
+                    partition_mode: partition_mode.into(),
+                    null_equals_null: exec.null_equals_null(),
+                    left_sort_exprs,
+                    right_sort_exprs,
+                    filter,
+                },
+            ))),
+        })
+    }
 
-            let null_expr = exec
-                .group_expr()
-                .null_expr()
-                .iter()
-                .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
-                .collect::<Result<Vec<_>>>()?;
+    fn try_from_cross_join_exec(
+        exec: &CrossJoinExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.left().to_owned(),
+            extension_codec,
+        )?;
+        let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.right().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::CrossJoin(Box::new(
+                protobuf::CrossJoinExecNode {
+                    left: Some(Box::new(left)),
+                    right: Some(Box::new(right)),
+                },
+            ))),
+        })
+    }
 
-            let group_expr = exec
-                .group_expr()
-                .expr()
-                .iter()
-                .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
-                .collect::<Result<Vec<_>>>()?;
-
-            let limit = exec.limit().map(|value| protobuf::AggLimit {
-                limit: value as u64,
-            });
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Aggregate(Box::new(
-                    protobuf::AggregateExecNode {
-                        group_expr,
-                        group_expr_name: group_names,
-                        aggr_expr: agg,
-                        filter_expr: filter,
-                        aggr_expr_name: agg_names,
-                        mode: agg_mode as i32,
-                        input: Some(Box::new(input)),
-                        input_schema: Some(input_schema.as_ref().try_into()?),
-                        null_expr,
-                        groups,
-                        limit,
-                    },
-                ))),
-            });
-        }
+    fn try_from_aggregate_exec(
+        exec: &AggregateExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let groups: Vec<bool> = exec
+            .group_expr()
+            .groups()
+            .iter()
+            .flatten()
+            .copied()
+            .collect();
+
+        let group_names = exec
+            .group_expr()
+            .expr()
+            .iter()
+            .map(|expr| expr.1.to_owned())
+            .collect();
+
+        let filter = exec
+            .filter_expr()
+            .iter()
+            .map(|expr| serialize_maybe_filter(expr.to_owned(), extension_codec))
+            .collect::<Result<Vec<_>>>()?;
+
+        let agg = exec
+            .aggr_expr()
+            .iter()
+            .map(|expr| serialize_physical_aggr_expr(expr.to_owned(), extension_codec))
+            .collect::<Result<Vec<_>>>()?;
+
+        let agg_names = exec
+            .aggr_expr()
+            .iter()
+            .map(|expr| expr.name().to_string())
+            .collect::<Vec<_>>();
+
+        let agg_mode = match exec.mode() {
+            AggregateMode::Partial => protobuf::AggregateMode::Partial,
+            AggregateMode::Final => protobuf::AggregateMode::Final,
+            AggregateMode::FinalPartitioned => protobuf::AggregateMode::FinalPartitioned,
+            AggregateMode::Single => protobuf::AggregateMode::Single,
+            AggregateMode::SinglePartitioned => {
+                protobuf::AggregateMode::SinglePartitioned
+            }
+        };
+        let input_schema = exec.input_schema();
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+
+        let null_expr = exec
+            .group_expr()
+            .null_expr()
+            .iter()
+            .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
+            .collect::<Result<Vec<_>>>()?;
+
+        let group_expr = exec
+            .group_expr()
+            .expr()
+            .iter()
+            .map(|expr| serialize_physical_expr(&expr.0, extension_codec))
+            .collect::<Result<Vec<_>>>()?;
+
+        let limit = exec.limit().map(|value| protobuf::AggLimit {
+            limit: value as u64,
+        });
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Aggregate(Box::new(
+                protobuf::AggregateExecNode {
+                    group_expr,
+                    group_expr_name: group_names,
+                    aggr_expr: agg,
+                    filter_expr: filter,
+                    aggr_expr_name: agg_names,
+                    mode: agg_mode as i32,
+                    input: Some(Box::new(input)),
+                    input_schema: Some(input_schema.as_ref().try_into()?),
+                    null_expr,
+                    groups,
+                    limit,
+                },
+            ))),
+        })
+    }
 
-        if let Some(empty) = plan.downcast_ref::<EmptyExec>() {
-            let schema = empty.schema().as_ref().try_into()?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Empty(
-                    protobuf::EmptyExecNode {
-                        schema: Some(schema),
-                    },
-                )),
-            });
-        }
+    fn try_from_empty_exec(
+        empty: &EmptyExec,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let schema = empty.schema().as_ref().try_into()?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Empty(protobuf::EmptyExecNode {
+                schema: Some(schema),
+            })),
+        })
+    }
 
-        if let Some(empty) = plan.downcast_ref::<PlaceholderRowExec>() {
-            let schema = empty.schema().as_ref().try_into()?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::PlaceholderRow(
-                    protobuf::PlaceholderRowExecNode {
-                        schema: Some(schema),
-                    },
-                )),
-            });
-        }
+    fn try_from_placeholder_row_exec(
+        empty: &PlaceholderRowExec,
+        _extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let schema = empty.schema().as_ref().try_into()?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::PlaceholderRow(
+                protobuf::PlaceholderRowExecNode {
+                    schema: Some(schema),
+                },
+            )),
+        })
+    }
 
-        if let Some(coalesce_batches) = plan.downcast_ref::<CoalesceBatchesExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                coalesce_batches.input().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::CoalesceBatches(Box::new(
-                    protobuf::CoalesceBatchesExecNode {
-                        input: Some(Box::new(input)),
-                        target_batch_size: coalesce_batches.target_batch_size() as u32,
-                        fetch: coalesce_batches.fetch().map(|n| n as u32),
-                    },
-                ))),
-            });
-        }
+    fn try_from_coalesce_batches_exec(
+        coalesce_batches: &CoalesceBatchesExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            coalesce_batches.input().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::CoalesceBatches(Box::new(
+                protobuf::CoalesceBatchesExecNode {
+                    input: Some(Box::new(input)),
+                    target_batch_size: coalesce_batches.target_batch_size() as u32,
+                    fetch: coalesce_batches.fetch().map(|n| n as u32),
+                },
+            ))),
+        })
+    }
 
-        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>() {
-            let data_source = data_source_exec.data_source();
-            if let Some(maybe_csv) = data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                let source = maybe_csv.file_source();
-                if let Some(csv_config) = source.as_any().downcast_ref::<CsvSource>() {
-                    return Ok(protobuf::PhysicalPlanNode {
-                        physical_plan_type: Some(PhysicalPlanType::CsvScan(
-                            protobuf::CsvScanExecNode {
-                                base_conf: Some(serialize_file_scan_config(
-                                    maybe_csv,
-                                    extension_codec,
-                                )?),
-                                has_header: csv_config.has_header(),
-                                delimiter: byte_to_string(
-                                    csv_config.delimiter(),
-                                    "delimiter",
-                                )?,
-                                quote: byte_to_string(csv_config.quote(), "quote")?,
-                                optional_escape: if let Some(escape) = csv_config.escape()
-                                {
-                                    Some(
-                                        protobuf::csv_scan_exec_node::OptionalEscape::Escape(
-                                            byte_to_string(escape, "escape")?,
-                                        ),
-                                    )
-                                } else {
-                                    None
-                                },
-                                optional_comment: if let Some(comment) =
-                                    csv_config.comment()
-                                {
-                                    Some(protobuf::csv_scan_exec_node::OptionalComment::Comment(
+    fn try_from_data_source_exec(
+        data_source_exec: &DataSourceExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Option<Self>> {
+        let data_source = data_source_exec.data_source();
+        if let Some(maybe_csv) = data_source.as_any().downcast_ref::<FileScanConfig>() {
+            let source = maybe_csv.file_source();
+            if let Some(csv_config) = source.as_any().downcast_ref::<CsvSource>() {
+                return Ok(Some(protobuf::PhysicalPlanNode {
+                    physical_plan_type: Some(PhysicalPlanType::CsvScan(
+                        protobuf::CsvScanExecNode {
+                            base_conf: Some(serialize_file_scan_config(
+                                maybe_csv,
+                                extension_codec,
+                            )?),
+                            has_header: csv_config.has_header(),
+                            delimiter: byte_to_string(
+                                csv_config.delimiter(),
+                                "delimiter",
+                            )?,
+                            quote: byte_to_string(csv_config.quote(), "quote")?,
+                            optional_escape: if let Some(escape) = csv_config.escape() {
+                                Some(
+                                    protobuf::csv_scan_exec_node::OptionalEscape::Escape(
+                                        byte_to_string(escape, "escape")?,
+                                    ),
+                                )
+                            } else {
+                                None
+                            },
+                            optional_comment: if let Some(comment) = csv_config.comment()
+                            {
+                                Some(protobuf::csv_scan_exec_node::OptionalComment::Comment(
                                         byte_to_string(comment, "comment")?,
                                     ))
-                                } else {
-                                    None
-                                },
-                                newlines_in_values: maybe_csv.newlines_in_values(),
+                            } else {
+                                None
                             },
-                        )),
-                    });
-                }
-            }
-        }
-
-        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>() {
-            let data_source = data_source_exec.data_source();
-            if let Some(scan_conf) = data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                let source = scan_conf.file_source();
-                if let Some(_json_source) = source.as_any().downcast_ref::<JsonSource>() {
-                    return Ok(protobuf::PhysicalPlanNode {
-                        physical_plan_type: Some(PhysicalPlanType::JsonScan(
-                            protobuf::JsonScanExecNode {
-                                base_conf: Some(serialize_file_scan_config(
-                                    scan_conf,
-                                    extension_codec,
-                                )?),
-                            },
-                        )),
-                    });
-                }
+                            newlines_in_values: maybe_csv.newlines_in_values(),
+                        },
+                    )),
+                }));
             }
         }
 
-        #[cfg(feature = "parquet")]
-        if let Some(exec) = plan.downcast_ref::<DataSourceExec>() {
-            if let Some((maybe_parquet, conf)) =
-                exec.downcast_to_file_source::<ParquetSource>()
-            {
-                let predicate = conf
-                    .predicate()
-                    .map(|pred| serialize_physical_expr(pred, extension_codec))
-                    .transpose()?;
-                return Ok(protobuf::PhysicalPlanNode {
-                    physical_plan_type: Some(PhysicalPlanType::ParquetScan(
-                        protobuf::ParquetScanExecNode {
+        if let Some(scan_conf) = data_source.as_any().downcast_ref::<FileScanConfig>() {
+            let source = scan_conf.file_source();
+            if let Some(_json_source) = source.as_any().downcast_ref::<JsonSource>() {
+                return Ok(Some(protobuf::PhysicalPlanNode {
+                    physical_plan_type: Some(PhysicalPlanType::JsonScan(
+                        protobuf::JsonScanExecNode {
                             base_conf: Some(serialize_file_scan_config(
-                                maybe_parquet,
+                                scan_conf,
                                 extension_codec,
                             )?),
-                            predicate,
-                            parquet_options: Some(
-                                conf.table_parquet_options().try_into()?,
-                            ),
                         },
                     )),
-                });
+                }));
             }
         }
 
+        #[cfg(feature = "parquet")]
+        if let Some((maybe_parquet, conf)) =
+            data_source_exec.downcast_to_file_source::<ParquetSource>()
+        {
+            let predicate = conf
+                .predicate()
+                .map(|pred| serialize_physical_expr(pred, extension_codec))
+                .transpose()?;
+            return Ok(Some(protobuf::PhysicalPlanNode {
+                physical_plan_type: Some(PhysicalPlanType::ParquetScan(
+                    protobuf::ParquetScanExecNode {
+                        base_conf: Some(serialize_file_scan_config(
+                            maybe_parquet,
+                            extension_codec,
+                        )?),
+                        predicate,
+                        parquet_options: Some(conf.table_parquet_options().try_into()?),
+                    },
+                )),
+            }));
+        }
+
         #[cfg(feature = "avro")]
-        if let Some(data_source_exec) = plan.downcast_ref::<DataSourceExec>() {
-            let data_source = data_source_exec.data_source();
-            if let Some(maybe_avro) =
-                data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                let source = maybe_avro.file_source();
-                if source.as_any().downcast_ref::<AvroSource>().is_some() {
-                    return Ok(protobuf::PhysicalPlanNode {
-                        physical_plan_type: Some(PhysicalPlanType::AvroScan(
-                            protobuf::AvroScanExecNode {
-                                base_conf: Some(serialize_file_scan_config(
-                                    maybe_avro,
-                                    extension_codec,
-                                )?),
-                            },
-                        )),
-                    });
-                }
+        if let Some(maybe_avro) = data_source.as_any().downcast_ref::<FileScanConfig>() {
+            let source = maybe_avro.file_source();
+            if source.as_any().downcast_ref::<AvroSource>().is_some() {
+                return Ok(Some(protobuf::PhysicalPlanNode {
+                    physical_plan_type: Some(PhysicalPlanType::AvroScan(
+                        protobuf::AvroScanExecNode {
+                            base_conf: Some(serialize_file_scan_config(
+                                maybe_avro,
+                                extension_codec,
+                            )?),
+                        },
+                    )),
+                }));
             }
         }
 
-        if let Some(exec) = plan.downcast_ref::<CoalescePartitionsExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Merge(Box::new(
-                    protobuf::CoalescePartitionsExecNode {
-                        input: Some(Box::new(input)),
-                    },
-                ))),
-            });
-        }
+        Ok(None)
+    }
 
-        if let Some(exec) = plan.downcast_ref::<RepartitionExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
+    fn try_from_coalesce_partitions_exec(
+        exec: &CoalescePartitionsExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Merge(Box::new(
+                protobuf::CoalescePartitionsExecNode {
+                    input: Some(Box::new(input)),
+                },
+            ))),
+        })
+    }
 
-            let pb_partitioning =
-                serialize_partitioning(exec.partitioning(), extension_codec)?;
+    fn try_from_repartition_exec(
+        exec: &RepartitionExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+
+        let pb_partitioning =
+            serialize_partitioning(exec.partitioning(), extension_codec)?;
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Repartition(Box::new(
+                protobuf::RepartitionExecNode {
+                    input: Some(Box::new(input)),
+                    partitioning: Some(pb_partitioning),
+                },
+            ))),
+        })
+    }
 
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Repartition(Box::new(
-                    protobuf::RepartitionExecNode {
-                        input: Some(Box::new(input)),
-                        partitioning: Some(pb_partitioning),
+    fn try_from_sort_exec(
+        exec: &SortExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        let expr = exec
+            .expr()
+            .iter()
+            .map(|expr| {
+                let sort_expr = Box::new(protobuf::PhysicalSortExprNode {
+                    expr: Some(Box::new(serialize_physical_expr(
+                        &expr.expr,
+                        extension_codec,
+                    )?)),
+                    asc: !expr.options.descending,
+                    nulls_first: expr.options.nulls_first,
+                });
+                Ok(protobuf::PhysicalExprNode {
+                    expr_type: Some(ExprType::Sort(sort_expr)),
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Sort(Box::new(
+                protobuf::SortExecNode {
+                    input: Some(Box::new(input)),
+                    expr,
+                    fetch: match exec.fetch() {
+                        Some(n) => n as i64,
+                        _ => -1,
                     },
-                ))),
-            });
-        }
+                    preserve_partitioning: exec.preserve_partitioning(),
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<SortExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
+    fn try_from_union_exec(
+        union: &UnionExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let mut inputs: Vec<protobuf::PhysicalPlanNode> = vec![];
+        for input in union.inputs() {
+            inputs.push(protobuf::PhysicalPlanNode::try_from_physical_plan(
+                input.to_owned(),
                 extension_codec,
-            )?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| {
-                    let sort_expr = Box::new(protobuf::PhysicalSortExprNode {
-                        expr: Some(Box::new(serialize_physical_expr(
-                            &expr.expr,
-                            extension_codec,
-                        )?)),
-                        asc: !expr.options.descending,
-                        nulls_first: expr.options.nulls_first,
-                    });
-                    Ok(protobuf::PhysicalExprNode {
-                        expr_type: Some(ExprType::Sort(sort_expr)),
-                    })
-                })
-                .collect::<Result<Vec<_>>>()?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Sort(Box::new(
-                    protobuf::SortExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                        fetch: match exec.fetch() {
-                            Some(n) => n as i64,
-                            _ => -1,
-                        },
-                        preserve_partitioning: exec.preserve_partitioning(),
-                    },
-                ))),
-            });
+            )?);
         }
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Union(protobuf::UnionExecNode {
+                inputs,
+            })),
+        })
+    }
 
-        if let Some(union) = plan.downcast_ref::<UnionExec>() {
-            let mut inputs: Vec<protobuf::PhysicalPlanNode> = vec![];
-            for input in union.inputs() {
-                inputs.push(protobuf::PhysicalPlanNode::try_from_physical_plan(
-                    input.to_owned(),
-                    extension_codec,
-                )?);
-            }
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Union(
-                    protobuf::UnionExecNode { inputs },
-                )),
-            });
+    fn try_from_interleave_exec(
+        interleave: &InterleaveExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let mut inputs: Vec<protobuf::PhysicalPlanNode> = vec![];
+        for input in interleave.inputs() {
+            inputs.push(protobuf::PhysicalPlanNode::try_from_physical_plan(
+                input.to_owned(),
+                extension_codec,
+            )?);
         }
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Interleave(
+                protobuf::InterleaveExecNode { inputs },
+            )),
+        })
+    }
 
-        if let Some(interleave) = plan.downcast_ref::<InterleaveExec>() {
-            let mut inputs: Vec<protobuf::PhysicalPlanNode> = vec![];
-            for input in interleave.inputs() {
-                inputs.push(protobuf::PhysicalPlanNode::try_from_physical_plan(
-                    input.to_owned(),
-                    extension_codec,
-                )?);
-            }
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Interleave(
-                    protobuf::InterleaveExecNode { inputs },
-                )),
-            });
-        }
+    fn try_from_sort_preserving_merge_exec(
+        exec: &SortPreservingMergeExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+        let expr = exec
+            .expr()
+            .iter()
+            .map(|expr| {
+                let sort_expr = Box::new(protobuf::PhysicalSortExprNode {
+                    expr: Some(Box::new(serialize_physical_expr(
+                        &expr.expr,
+                        extension_codec,
+                    )?)),
+                    asc: !expr.options.descending,
+                    nulls_first: expr.options.nulls_first,
+                });
+                Ok(protobuf::PhysicalExprNode {
+                    expr_type: Some(ExprType::Sort(sort_expr)),
+                })
+            })
+            .collect::<Result<Vec<_>>>()?;
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::SortPreservingMerge(Box::new(
+                protobuf::SortPreservingMergeExecNode {
+                    input: Some(Box::new(input)),
+                    expr,
+                    fetch: exec.fetch().map(|f| f as i64).unwrap_or(-1),
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<SortPreservingMergeExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            let expr = exec
-                .expr()
-                .iter()
-                .map(|expr| {
-                    let sort_expr = Box::new(protobuf::PhysicalSortExprNode {
-                        expr: Some(Box::new(serialize_physical_expr(
-                            &expr.expr,
-                            extension_codec,
-                        )?)),
-                        asc: !expr.options.descending,
-                        nulls_first: expr.options.nulls_first,
-                    });
-                    Ok(protobuf::PhysicalExprNode {
-                        expr_type: Some(ExprType::Sort(sort_expr)),
+    fn try_from_nested_loop_join_exec(
+        exec: &NestedLoopJoinExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.left().to_owned(),
+            extension_codec,
+        )?;
+        let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.right().to_owned(),
+            extension_codec,
+        )?;
+
+        let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
+        let filter = exec
+            .filter()
+            .as_ref()
+            .map(|f| {
+                let expression =
+                    serialize_physical_expr(f.expression(), extension_codec)?;
+                let column_indices = f
+                    .column_indices()
+                    .iter()
+                    .map(|i| {
+                        let side: protobuf::JoinSide = i.side.to_owned().into();
+                        protobuf::ColumnIndex {
+                            index: i.index as u32,
+                            side: side.into(),
+                        }
                     })
+                    .collect();
+                let schema = f.schema().as_ref().try_into()?;
+                Ok(protobuf::JoinFilter {
+                    expression: Some(expression),
+                    column_indices,
+                    schema: Some(schema),
                 })
-                .collect::<Result<Vec<_>>>()?;
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::SortPreservingMerge(
-                    Box::new(protobuf::SortPreservingMergeExecNode {
-                        input: Some(Box::new(input)),
-                        expr,
-                        fetch: exec.fetch().map(|f| f as i64).unwrap_or(-1),
+            })
+            .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::NestedLoopJoin(Box::new(
+                protobuf::NestedLoopJoinExecNode {
+                    left: Some(Box::new(left)),
+                    right: Some(Box::new(right)),
+                    join_type: join_type.into(),
+                    filter,
+                    projection: exec.projection().map_or_else(Vec::new, |v| {
+                        v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
                     }),
-                )),
-            });
-        }
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<NestedLoopJoinExec>() {
-            let left = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.left().to_owned(),
-                extension_codec,
-            )?;
-            let right = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.right().to_owned(),
-                extension_codec,
-            )?;
+    fn try_from_window_agg_exec(
+        exec: &WindowAggExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+
+        let window_expr = exec
+            .window_expr()
+            .iter()
+            .map(|e| serialize_physical_window_expr(e, extension_codec))
+            .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
+
+        let partition_keys = exec
+            .partition_keys()
+            .iter()
+            .map(|e| serialize_physical_expr(e, extension_codec))
+            .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Window(Box::new(
+                protobuf::WindowAggExecNode {
+                    input: Some(Box::new(input)),
+                    window_expr,
+                    partition_keys,
+                    input_order_mode: None,
+                },
+            ))),
+        })
+    }
 
-            let join_type: protobuf::JoinType = exec.join_type().to_owned().into();
-            let filter = exec
-                .filter()
-                .as_ref()
-                .map(|f| {
-                    let expression =
-                        serialize_physical_expr(f.expression(), extension_codec)?;
-                    let column_indices = f
-                        .column_indices()
-                        .iter()
-                        .map(|i| {
-                            let side: protobuf::JoinSide = i.side.to_owned().into();
-                            protobuf::ColumnIndex {
-                                index: i.index as u32,
-                                side: side.into(),
-                            }
-                        })
-                        .collect();
-                    let schema = f.schema().as_ref().try_into()?;
-                    Ok(protobuf::JoinFilter {
-                        expression: Some(expression),
-                        column_indices,
-                        schema: Some(schema),
-                    })
-                })
-                .map_or(Ok(None), |v: Result<protobuf::JoinFilter>| v.map(Some))?;
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::NestedLoopJoin(Box::new(
-                    protobuf::NestedLoopJoinExecNode {
-                        left: Some(Box::new(left)),
-                        right: Some(Box::new(right)),
-                        join_type: join_type.into(),
-                        filter,
-                        projection: exec.projection().map_or_else(Vec::new, |v| {
-                            v.iter().map(|x| *x as u32).collect::<Vec<u32>>()
-                        }),
+    fn try_from_bounded_window_agg_exec(
+        exec: &BoundedWindowAggExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+
+        let window_expr = exec
+            .window_expr()
+            .iter()
+            .map(|e| serialize_physical_window_expr(e, extension_codec))
+            .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
+
+        let partition_keys = exec
+            .partition_keys()
+            .iter()
+            .map(|e| serialize_physical_expr(e, extension_codec))
+            .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
+
+        let input_order_mode = match &exec.input_order_mode {
+            InputOrderMode::Linear => {
+                window_agg_exec_node::InputOrderMode::Linear(protobuf::EmptyMessage {})
+            }
+            InputOrderMode::PartiallySorted(columns) => {
+                window_agg_exec_node::InputOrderMode::PartiallySorted(
+                    protobuf::PartiallySortedInputOrderMode {
+                        columns: columns.iter().map(|c| *c as u64).collect(),
                     },
-                ))),
-            });
-        }
+                )
+            }
+            InputOrderMode::Sorted => {
+                window_agg_exec_node::InputOrderMode::Sorted(protobuf::EmptyMessage {})
+            }
+        };
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Window(Box::new(
+                protobuf::WindowAggExecNode {
+                    input: Some(Box::new(input)),
+                    window_expr,
+                    partition_keys,
+                    input_order_mode: Some(input_order_mode),
+                },
+            ))),
+        })
+    }
 
-        if let Some(exec) = plan.downcast_ref::<WindowAggExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+    fn try_from_data_sink_exec(
+        exec: &DataSinkExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Option<Self>> {
+        let input: protobuf::PhysicalPlanNode =
+            protobuf::PhysicalPlanNode::try_from_physical_plan(
                 exec.input().to_owned(),
                 extension_codec,
             )?;
+        let sort_order = match exec.sort_order() {
+            Some(requirements) => {
+                let expr = requirements
+                    .iter()
+                    .map(|requirement| {
+                        let expr: PhysicalSortExpr = requirement.to_owned().into();
+                        let sort_expr = protobuf::PhysicalSortExprNode {
+                            expr: Some(Box::new(serialize_physical_expr(
+                                &expr.expr,
+                                extension_codec,
+                            )?)),
+                            asc: !expr.options.descending,
+                            nulls_first: expr.options.nulls_first,
+                        };
+                        Ok(sort_expr)
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+                Some(protobuf::PhysicalSortExprNodeCollection {
+                    physical_sort_expr_nodes: expr,
+                })
+            }
+            None => None,
+        };
 
-            let window_expr = exec
-                .window_expr()
-                .iter()
-                .map(|e| serialize_physical_window_expr(e, extension_codec))
-                .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
-
-            let partition_keys = exec
-                .partition_keys()
-                .iter()
-                .map(|e| serialize_physical_expr(e, extension_codec))
-                .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Window(Box::new(
-                    protobuf::WindowAggExecNode {
+        if let Some(sink) = exec.sink().as_any().downcast_ref::<JsonSink>() {
+            return Ok(Some(protobuf::PhysicalPlanNode {
+                physical_plan_type: Some(PhysicalPlanType::JsonSink(Box::new(
+                    protobuf::JsonSinkExecNode {
                         input: Some(Box::new(input)),
-                        window_expr,
-                        partition_keys,
-                        input_order_mode: None,
+                        sink: Some(sink.try_into()?),
+                        sink_schema: Some(exec.schema().as_ref().try_into()?),
+                        sort_order,
                     },
                 ))),
-            });
+            }));
         }
 
-        if let Some(exec) = plan.downcast_ref::<BoundedWindowAggExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-
-            let window_expr = exec
-                .window_expr()
-                .iter()
-                .map(|e| serialize_physical_window_expr(e, extension_codec))
-                .collect::<Result<Vec<protobuf::PhysicalWindowExprNode>>>()?;
-
-            let partition_keys = exec
-                .partition_keys()
-                .iter()
-                .map(|e| serialize_physical_expr(e, extension_codec))
-                .collect::<Result<Vec<protobuf::PhysicalExprNode>>>()?;
-
-            let input_order_mode = match &exec.input_order_mode {
-                InputOrderMode::Linear => window_agg_exec_node::InputOrderMode::Linear(
-                    protobuf::EmptyMessage {},
-                ),
-                InputOrderMode::PartiallySorted(columns) => {
-                    window_agg_exec_node::InputOrderMode::PartiallySorted(
-                        protobuf::PartiallySortedInputOrderMode {
-                            columns: columns.iter().map(|c| *c as u64).collect(),
-                        },
-                    )
-                }
-                InputOrderMode::Sorted => window_agg_exec_node::InputOrderMode::Sorted(
-                    protobuf::EmptyMessage {},
-                ),
-            };
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Window(Box::new(
-                    protobuf::WindowAggExecNode {
+        if let Some(sink) = exec.sink().as_any().downcast_ref::<CsvSink>() {
+            return Ok(Some(protobuf::PhysicalPlanNode {
+                physical_plan_type: Some(PhysicalPlanType::CsvSink(Box::new(
+                    protobuf::CsvSinkExecNode {
                         input: Some(Box::new(input)),
-                        window_expr,
-                        partition_keys,
-                        input_order_mode: Some(input_order_mode),
+                        sink: Some(sink.try_into()?),
+                        sink_schema: Some(exec.schema().as_ref().try_into()?),
+                        sort_order,
                     },
                 ))),
-            });
-        }
-
-        if let Some(exec) = plan.downcast_ref::<DataSinkExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-            let sort_order = match exec.sort_order() {
-                Some(requirements) => {
-                    let expr = requirements
-                        .iter()
-                        .map(|requirement| {
-                            let expr: PhysicalSortExpr = requirement.to_owned().into();
-                            let sort_expr = protobuf::PhysicalSortExprNode {
-                                expr: Some(Box::new(serialize_physical_expr(
-                                    &expr.expr,
-                                    extension_codec,
-                                )?)),
-                                asc: !expr.options.descending,
-                                nulls_first: expr.options.nulls_first,
-                            };
-                            Ok(sort_expr)
-                        })
-                        .collect::<Result<Vec<_>>>()?;
-                    Some(protobuf::PhysicalSortExprNodeCollection {
-                        physical_sort_expr_nodes: expr,
-                    })
-                }
-                None => None,
-            };
-
-            if let Some(sink) = exec.sink().as_any().downcast_ref::<JsonSink>() {
-                return Ok(protobuf::PhysicalPlanNode {
-                    physical_plan_type: Some(PhysicalPlanType::JsonSink(Box::new(
-                        protobuf::JsonSinkExecNode {
-                            input: Some(Box::new(input)),
-                            sink: Some(sink.try_into()?),
-                            sink_schema: Some(exec.schema().as_ref().try_into()?),
-                            sort_order,
-                        },
-                    ))),
-                });
-            }
-
-            if let Some(sink) = exec.sink().as_any().downcast_ref::<CsvSink>() {
-                return Ok(protobuf::PhysicalPlanNode {
-                    physical_plan_type: Some(PhysicalPlanType::CsvSink(Box::new(
-                        protobuf::CsvSinkExecNode {
-                            input: Some(Box::new(input)),
-                            sink: Some(sink.try_into()?),
-                            sink_schema: Some(exec.schema().as_ref().try_into()?),
-                            sort_order,
-                        },
-                    ))),
-                });
-            }
-
-            #[cfg(feature = "parquet")]
-            if let Some(sink) = exec.sink().as_any().downcast_ref::<ParquetSink>() {
-                return Ok(protobuf::PhysicalPlanNode {
-                    physical_plan_type: Some(PhysicalPlanType::ParquetSink(Box::new(
-                        protobuf::ParquetSinkExecNode {
-                            input: Some(Box::new(input)),
-                            sink: Some(sink.try_into()?),
-                            sink_schema: Some(exec.schema().as_ref().try_into()?),
-                            sort_order,
-                        },
-                    ))),
-                });
-            }
-
-            // If unknown DataSink then let extension handle it
+            }));
         }
 
-        if let Some(exec) = plan.downcast_ref::<UnnestExec>() {
-            let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
-                exec.input().to_owned(),
-                extension_codec,
-            )?;
-
-            return Ok(protobuf::PhysicalPlanNode {
-                physical_plan_type: Some(PhysicalPlanType::Unnest(Box::new(
-                    protobuf::UnnestExecNode {
+        #[cfg(feature = "parquet")]
+        if let Some(sink) = exec.sink().as_any().downcast_ref::<ParquetSink>() {
+            return Ok(Some(protobuf::PhysicalPlanNode {
+                physical_plan_type: Some(PhysicalPlanType::ParquetSink(Box::new(
+                    protobuf::ParquetSinkExecNode {
                         input: Some(Box::new(input)),
-                        schema: Some(exec.schema().try_into()?),
-                        list_type_columns: exec
-                            .list_column_indices()
-                            .iter()
-                            .map(|c| ProtoListUnnest {
-                                index_in_input_schema: c.index_in_input_schema as _,
-                                depth: c.depth as _,
-                            })
-                            .collect(),
-                        struct_type_columns: exec
-                            .struct_column_indices()
-                            .iter()
-                            .map(|c| *c as _)
-                            .collect(),
-                        options: Some(exec.options().into()),
+                        sink: Some(sink.try_into()?),
+                        sink_schema: Some(exec.schema().as_ref().try_into()?),
+                        sort_order,
                     },
                 ))),
-            });
+            }));
         }
 
-        let mut buf: Vec<u8> = vec![];
-        match extension_codec.try_encode(Arc::clone(&plan_clone), &mut buf) {
-            Ok(_) => {
-                let inputs: Vec<protobuf::PhysicalPlanNode> = plan_clone
-                    .children()
-                    .into_iter()
-                    .cloned()
-                    .map(|i| {
-                        protobuf::PhysicalPlanNode::try_from_physical_plan(
-                            i,
-                            extension_codec,
-                        )
-                    })
-                    .collect::<Result<_>>()?;
+        // If unknown DataSink then let extension handle it
+        Ok(None)
+    }
 
-                Ok(protobuf::PhysicalPlanNode {
-                    physical_plan_type: Some(PhysicalPlanType::Extension(
-                        protobuf::PhysicalExtensionNode { node: buf, inputs },
-                    )),
-                })
-            }
-            Err(e) => internal_err!(
-                "Unsupported plan and extension codec failed with [{e}]. Plan: {plan_clone:?}"
-            ),
-        }
+    fn try_from_unnest_exec(
+        exec: &UnnestExec,
+        extension_codec: &dyn PhysicalExtensionCodec,
+    ) -> Result<Self> {
+        let input = protobuf::PhysicalPlanNode::try_from_physical_plan(
+            exec.input().to_owned(),
+            extension_codec,
+        )?;
+
+        Ok(protobuf::PhysicalPlanNode {
+            physical_plan_type: Some(PhysicalPlanType::Unnest(Box::new(
+                protobuf::UnnestExecNode {
+                    input: Some(Box::new(input)),
+                    schema: Some(exec.schema().try_into()?),
+                    list_type_columns: exec
+                        .list_column_indices()
+                        .iter()
+                        .map(|c| ProtoListUnnest {
+                            index_in_input_schema: c.index_in_input_schema as _,
+                            depth: c.depth as _,
+                        })
+                        .collect(),
+                    struct_type_columns: exec
+                        .struct_column_indices()
+                        .iter()
+                        .map(|c| *c as _)
+                        .collect(),
+                    options: Some(exec.options().into()),
+                },
+            ))),
+        })
     }
 }
 
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index c196595eeed4a..d1b1f51ae1075 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -22,6 +22,7 @@ use datafusion::datasource::file_format::parquet::ParquetSink;
 use datafusion::datasource::physical_plan::FileSink;
 use datafusion::physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
 use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
+use datafusion::physical_expr_common::physical_expr::snapshot_physical_expr;
 use datafusion::physical_plan::expressions::{
     BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
     Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
@@ -210,6 +211,9 @@ pub fn serialize_physical_expr(
     value: &Arc<dyn PhysicalExpr>,
     codec: &dyn PhysicalExtensionCodec,
 ) -> Result<protobuf::PhysicalExprNode> {
+    // Snapshot the expr in case it has dynamic predicate state so
+    // it can be serialized
+    let value = snapshot_physical_expr(Arc::clone(value))?;
     let expr = value.as_any();
 
     if let Some(expr) = expr.downcast_ref::<Column>() {
@@ -368,7 +372,7 @@ pub fn serialize_physical_expr(
         })
     } else {
         let mut buf: Vec<u8> = vec![];
-        match codec.try_encode_expr(value, &mut buf) {
+        match codec.try_encode_expr(&value, &mut buf) {
             Ok(_) => {
                 let inputs: Vec<protobuf::PhysicalExprNode> = value
                     .children()
@@ -441,7 +445,7 @@ impl TryFrom<&PartitionedFile> for protobuf::PartitionedFile {
         })? as u64;
         Ok(protobuf::PartitionedFile {
             path: pf.object_meta.location.as_ref().to_owned(),
-            size: pf.object_meta.size as u64,
+            size: pf.object_meta.size,
             last_modified_ns,
             partition_values: pf
                 .partition_values
@@ -449,7 +453,7 @@ impl TryFrom<&PartitionedFile> for protobuf::PartitionedFile {
                 .map(|v| v.try_into())
                 .collect::<Result<Vec<_>, _>>()?,
             range: pf.range.as_ref().map(|r| r.try_into()).transpose()?,
-            statistics: pf.statistics.as_ref().map(|s| s.into()),
+            statistics: pf.statistics.as_ref().map(|s| s.as_ref().into()),
         })
     }
 }
@@ -507,7 +511,7 @@ pub fn serialize_file_scan_config(
 
     Ok(protobuf::FileScanExecConf {
         file_groups,
-        statistics: Some((&conf.statistics).into()),
+        statistics: Some((&conf.file_source.statistics().unwrap()).into()),
         limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
         projection: conf
             .projection
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 9fa1f74ae188a..7ecb2c23a5e13 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -24,7 +24,10 @@ use arrow::datatypes::{
     DECIMAL256_MAX_PRECISION,
 };
 use arrow::util::pretty::pretty_format_batches;
-use datafusion::datasource::file_format::json::JsonFormatFactory;
+use datafusion::datasource::file_format::json::{JsonFormat, JsonFormatFactory};
+use datafusion::datasource::listing::{
+    ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+};
 use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion;
 use datafusion::optimizer::Optimizer;
 use datafusion_common::parsers::CompressionTypeVariant;
@@ -970,8 +973,8 @@ async fn roundtrip_expr_api() -> Result<()> {
         stddev_pop(lit(2.2)),
         approx_distinct(lit(2)),
         approx_median(lit(2)),
-        approx_percentile_cont(lit(2), lit(0.5), None),
-        approx_percentile_cont(lit(2), lit(0.5), Some(lit(50))),
+        approx_percentile_cont(lit(2).sort(true, false), lit(0.5), None),
+        approx_percentile_cont(lit(2).sort(true, false), lit(0.5), Some(lit(50))),
         approx_percentile_cont_with_weight(lit(2), lit(1), lit(0.5)),
         grouping(lit(1)),
         bit_and(lit(2)),
@@ -2559,3 +2562,33 @@ async fn roundtrip_union_query() -> Result<()> {
     );
     Ok(())
 }
+
+#[tokio::test]
+async fn roundtrip_custom_listing_tables_schema() -> Result<()> {
+    let ctx = SessionContext::new();
+    // Make sure during round-trip, constraint information is preserved
+    let file_format = JsonFormat::default();
+    let table_partition_cols = vec![("part".to_owned(), DataType::Int64)];
+    let data = "../core/tests/data/partitioned_table_json";
+    let listing_table_url = ListingTableUrl::parse(data)?;
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_table_partition_cols(table_partition_cols);
+
+    let config = ListingTableConfig::new(listing_table_url)
+        .with_listing_options(listing_options)
+        .infer_schema(&ctx.state())
+        .await?;
+
+    ctx.register_table("hive_style", Arc::new(ListingTable::try_new(config)?))?;
+
+    let plan = ctx
+        .sql("SELECT part, value FROM hive_style LIMIT 1")
+        .await?
+        .logical_plan()
+        .clone();
+
+    let bytes = logical_plan_to_bytes(&plan)?;
+    let new_plan = logical_plan_from_bytes(&bytes, &ctx)?;
+    assert_eq!(plan, new_plan);
+    Ok(())
+}
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index 6356b8b7b0cf4..6dddbb5ea0a0e 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -504,7 +504,7 @@ fn rountrip_aggregate_with_approx_pencentile_cont() -> Result<()> {
         vec![col("b", &schema)?, lit(0.5)],
     )
     .schema(Arc::clone(&schema))
-    .alias("APPROX_PERCENTILE_CONT(b, 0.5)")
+    .alias("APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY b)")
     .build()
     .map(Arc::new)?];
 
@@ -1676,3 +1676,47 @@ async fn roundtrip_empty_projection() -> Result<()> {
     let sql = "select 1 from alltypes_plain";
     roundtrip_test_sql_with_context(sql, &ctx).await
 }
+
+#[tokio::test]
+async fn roundtrip_physical_plan_node() {
+    use datafusion::prelude::*;
+    use datafusion_proto::physical_plan::{
+        AsExecutionPlan, DefaultPhysicalExtensionCodec,
+    };
+    use datafusion_proto::protobuf::PhysicalPlanNode;
+
+    let ctx = SessionContext::new();
+
+    ctx.register_parquet(
+        "pt",
+        &format!(
+            "{}/alltypes_plain.snappy.parquet",
+            datafusion_common::test_util::parquet_test_data()
+        ),
+        ParquetReadOptions::default(),
+    )
+    .await
+    .unwrap();
+
+    let plan = ctx
+        .sql("select id, string_col, timestamp_col from pt where id > 4 order by string_col")
+        .await
+        .unwrap()
+        .create_physical_plan()
+        .await
+        .unwrap();
+
+    let node: PhysicalPlanNode =
+        PhysicalPlanNode::try_from_physical_plan(plan, &DefaultPhysicalExtensionCodec {})
+            .unwrap();
+
+    let plan = node
+        .try_into_physical_plan(
+            &ctx,
+            &ctx.runtime_env(),
+            &DefaultPhysicalExtensionCodec {},
+        )
+        .unwrap();
+
+    let _ = plan.execute(0, ctx.task_ctx()).unwrap();
+}
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index d1b50105d053d..d15e62909f7e4 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -260,7 +260,7 @@ fn test_expression_serialization_roundtrip() {
     for function in string::functions() {
         // default to 4 args (though some exprs like substr have error checking)
         let num_args = 4;
-        let args: Vec<_> = std::iter::repeat(&lit).take(num_args).cloned().collect();
+        let args: Vec<_> = std::iter::repeat_n(&lit, num_args).cloned().collect();
         let expr = Expr::ScalarFunction(ScalarFunction::new_udf(function, args));
 
         let extension_codec = DefaultLogicalExtensionCodec {};
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index 4435ee0f56cbc..b778db46769d0 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -61,5 +61,6 @@ datafusion-functions-aggregate = { workspace = true }
 datafusion-functions-nested = { workspace = true }
 datafusion-functions-window = { workspace = true }
 env_logger = { workspace = true }
+insta = { workspace = true }
 paste = "^1.0"
 rstest = { workspace = true }
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 436f4388d8a31..c0cb5b38ff020 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -74,7 +74,7 @@ fn find_closest_match(candidates: Vec<String>, target: &str) -> Option<String> {
     })
 }
 
-/// Arguments to for a function call extracted from the SQL AST
+/// Arguments for a function call extracted from the SQL AST
 #[derive(Debug)]
 struct FunctionArgs {
     /// Function name
@@ -91,6 +91,8 @@ struct FunctionArgs {
     null_treatment: Option<NullTreatment>,
     /// DISTINCT
     distinct: bool,
+    /// WITHIN GROUP clause, if any
+    within_group: Vec<OrderByExpr>,
 }
 
 impl FunctionArgs {
@@ -115,6 +117,7 @@ impl FunctionArgs {
                 filter,
                 null_treatment,
                 distinct: false,
+                within_group,
             });
         };
 
@@ -144,6 +147,9 @@ impl FunctionArgs {
                 }
                 FunctionArgumentClause::OrderBy(oby) => {
                     if order_by.is_some() {
+                        if !within_group.is_empty() {
+                            return plan_err!("ORDER BY clause is only permitted in WITHIN GROUP clause when a WITHIN GROUP is used");
+                        }
                         return not_impl_err!("Calling {name}: Duplicated ORDER BY clause in function arguments");
                     }
                     order_by = Some(oby);
@@ -176,8 +182,10 @@ impl FunctionArgs {
             }
         }
 
-        if !within_group.is_empty() {
-            return not_impl_err!("WITHIN GROUP is not supported yet: {within_group:?}");
+        if within_group.len() > 1 {
+            return not_impl_err!(
+                "Only a single ordering expression is permitted in a WITHIN GROUP clause"
+            );
         }
 
         let order_by = order_by.unwrap_or_default();
@@ -190,6 +198,7 @@ impl FunctionArgs {
             filter,
             null_treatment,
             distinct,
+            within_group,
         })
     }
 }
@@ -210,8 +219,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             filter,
             null_treatment,
             distinct,
+            within_group,
         } = function_args;
 
+        if over.is_some() && !within_group.is_empty() {
+            return plan_err!("OVER and WITHIN GROUP clause are can not be used together. \
+                OVER is for window function, whereas WITHIN GROUP is for ordered set aggregate function");
+        }
+
         // If function is a window function (it has an OVER clause),
         // it shouldn't have ordering requirement as function argument
         // required ordering should be defined in OVER clause.
@@ -356,15 +371,49 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         } else {
             // User defined aggregate functions (UDAF) have precedence in case it has the same name as a scalar built-in function
             if let Some(fm) = self.context_provider.get_aggregate_meta(&name) {
-                let order_by = self.order_by_to_sort_expr(
-                    order_by,
-                    schema,
-                    planner_context,
-                    true,
-                    None,
-                )?;
-                let order_by = (!order_by.is_empty()).then_some(order_by);
-                let args = self.function_args_to_expr(args, schema, planner_context)?;
+                if fm.is_ordered_set_aggregate() && within_group.is_empty() {
+                    return plan_err!("WITHIN GROUP clause is required when calling ordered set aggregate function({})", fm.name());
+                }
+
+                if null_treatment.is_some() && !fm.supports_null_handling_clause() {
+                    return plan_err!(
+                        "[IGNORE | RESPECT] NULLS are not permitted for {}",
+                        fm.name()
+                    );
+                }
+
+                let mut args =
+                    self.function_args_to_expr(args, schema, planner_context)?;
+
+                let order_by = if fm.is_ordered_set_aggregate() {
+                    let within_group = self.order_by_to_sort_expr(
+                        within_group,
+                        schema,
+                        planner_context,
+                        false,
+                        None,
+                    )?;
+
+                    // add target column expression in within group clause to function arguments
+                    if !within_group.is_empty() {
+                        args = within_group
+                            .iter()
+                            .map(|sort| sort.expr.clone())
+                            .chain(args)
+                            .collect::<Vec<_>>();
+                    }
+                    (!within_group.is_empty()).then_some(within_group)
+                } else {
+                    let order_by = self.order_by_to_sort_expr(
+                        order_by,
+                        schema,
+                        planner_context,
+                        true,
+                        None,
+                    )?;
+                    (!order_by.is_empty()).then_some(order_by)
+                };
+
                 let filter: Option<Box<Expr>> = filter
                     .map(|e| self.sql_expr_to_logical_expr(*e, schema, planner_context))
                     .transpose()?
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index d53691ef05d17..be4a45a25750c 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -301,7 +301,7 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
 fn try_decode_hex_literal(s: &str) -> Option<Vec<u8>> {
     let hex_bytes = s.as_bytes();
 
-    let mut decoded_bytes = Vec::with_capacity((hex_bytes.len() + 1) / 2);
+    let mut decoded_bytes = Vec::with_capacity(hex_bytes.len().div_ceil(2));
 
     let start_idx = hex_bytes.len() % 2;
     if start_idx > 0 {
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 822b651eae864..27c897f7ad608 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -20,9 +20,9 @@
 //! This parser implements DataFusion specific statements such as
 //! `CREATE EXTERNAL TABLE`
 
-use std::collections::VecDeque;
-use std::fmt;
-
+use datafusion_common::config::SqlParserOptions;
+use datafusion_common::DataFusionError;
+use datafusion_common::{sql_err, Diagnostic, Span};
 use sqlparser::ast::{ExprWithAlias, OrderByOptions};
 use sqlparser::tokenizer::TokenWithSpan;
 use sqlparser::{
@@ -34,6 +34,8 @@ use sqlparser::{
     parser::{Parser, ParserError},
     tokenizer::{Token, Tokenizer, Word},
 };
+use std::collections::VecDeque;
+use std::fmt;
 
 // Use `Parser::expected` instead, if possible
 macro_rules! parser_err {
@@ -42,7 +44,7 @@ macro_rules! parser_err {
     };
 }
 
-fn parse_file_type(s: &str) -> Result<String, ParserError> {
+fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
     Ok(s.to_uppercase())
 }
 
@@ -266,11 +268,9 @@ impl fmt::Display for Statement {
     }
 }
 
-fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
+fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> {
     if field.is_some() {
-        return Err(ParserError::ParserError(format!(
-            "{name} specified more than once",
-        )));
+        parser_err!(format!("{name} specified more than once",))?
     }
     Ok(())
 }
@@ -285,6 +285,7 @@ fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), ParserError> {
 /// [`Statement`] for a list of this special syntax
 pub struct DFParser<'a> {
     pub parser: Parser<'a>,
+    options: SqlParserOptions,
 }
 
 /// Same as `sqlparser`
@@ -356,21 +357,28 @@ impl<'a> DFParserBuilder<'a> {
         self
     }
 
-    pub fn build(self) -> Result<DFParser<'a>, ParserError> {
+    pub fn build(self) -> Result<DFParser<'a>, DataFusionError> {
         let mut tokenizer = Tokenizer::new(self.dialect, self.sql);
-        let tokens = tokenizer.tokenize_with_location()?;
+        // Convert TokenizerError -> ParserError
+        let tokens = tokenizer
+            .tokenize_with_location()
+            .map_err(ParserError::from)?;
 
         Ok(DFParser {
             parser: Parser::new(self.dialect)
                 .with_tokens_with_locations(tokens)
                 .with_recursion_limit(self.recursion_limit),
+            options: SqlParserOptions {
+                recursion_limit: self.recursion_limit,
+                ..Default::default()
+            },
         })
     }
 }
 
 impl<'a> DFParser<'a> {
     #[deprecated(since = "46.0.0", note = "DFParserBuilder")]
-    pub fn new(sql: &'a str) -> Result<Self, ParserError> {
+    pub fn new(sql: &'a str) -> Result<Self, DataFusionError> {
         DFParserBuilder::new(sql).build()
     }
 
@@ -378,13 +386,13 @@ impl<'a> DFParser<'a> {
     pub fn new_with_dialect(
         sql: &'a str,
         dialect: &'a dyn Dialect,
-    ) -> Result<Self, ParserError> {
+    ) -> Result<Self, DataFusionError> {
         DFParserBuilder::new(sql).with_dialect(dialect).build()
     }
 
     /// Parse a sql string into one or [`Statement`]s using the
     /// [`GenericDialect`].
-    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, ParserError> {
+    pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> {
         let mut parser = DFParserBuilder::new(sql).build()?;
 
         parser.parse_statements()
@@ -395,7 +403,7 @@ impl<'a> DFParser<'a> {
     pub fn parse_sql_with_dialect(
         sql: &str,
         dialect: &dyn Dialect,
-    ) -> Result<VecDeque<Statement>, ParserError> {
+    ) -> Result<VecDeque<Statement>, DataFusionError> {
         let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
         parser.parse_statements()
     }
@@ -403,14 +411,14 @@ impl<'a> DFParser<'a> {
     pub fn parse_sql_into_expr_with_dialect(
         sql: &str,
         dialect: &dyn Dialect,
-    ) -> Result<ExprWithAlias, ParserError> {
+    ) -> Result<ExprWithAlias, DataFusionError> {
         let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?;
 
         parser.parse_expr()
     }
 
     /// Parse a sql string into one or [`Statement`]s
-    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, ParserError> {
+    pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> {
         let mut stmts = VecDeque::new();
         let mut expecting_statement_delimiter = false;
         loop {
@@ -438,12 +446,26 @@ impl<'a> DFParser<'a> {
         &self,
         expected: &str,
         found: TokenWithSpan,
-    ) -> Result<T, ParserError> {
-        parser_err!(format!("Expected {expected}, found: {found}"))
+    ) -> Result<T, DataFusionError> {
+        let sql_parser_span = found.span;
+        parser_err!(format!(
+            "Expected: {expected}, found: {found}{}",
+            found.span.start
+        ))
+        .map_err(|e| {
+            let e = DataFusionError::from(e);
+            let span = Span::try_from_sqlparser_span(sql_parser_span);
+            let diagnostic = Diagnostic::new_error(
+                format!("Expected: {expected}, found: {found}{}", found.span.start),
+                span,
+            );
+
+            e.with_diagnostic(diagnostic)
+        })
     }
 
     /// Parse a new expression
-    pub fn parse_statement(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> {
         match self.parser.peek_token().token {
             Token::Word(w) => {
                 match w.keyword {
@@ -455,9 +477,7 @@ impl<'a> DFParser<'a> {
                         if let Token::Word(w) = self.parser.peek_nth_token(1).token {
                             // use native parser for COPY INTO
                             if w.keyword == Keyword::INTO {
-                                return Ok(Statement::Statement(Box::from(
-                                    self.parser.parse_statement()?,
-                                )));
+                                return self.parse_and_handle_statement();
                             }
                         }
                         self.parser.next_token(); // COPY
@@ -469,36 +489,49 @@ impl<'a> DFParser<'a> {
                     }
                     _ => {
                         // use sqlparser-rs parser
-                        Ok(Statement::Statement(Box::from(
-                            self.parser.parse_statement()?,
-                        )))
+                        self.parse_and_handle_statement()
                     }
                 }
             }
             _ => {
                 // use the native parser
-                Ok(Statement::Statement(Box::from(
-                    self.parser.parse_statement()?,
-                )))
+                self.parse_and_handle_statement()
             }
         }
     }
 
-    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, ParserError> {
+    pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
         if let Token::Word(w) = self.parser.peek_token().token {
             match w.keyword {
                 Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => {
-                    return parser_err!("Unsupported command in expression");
+                    return parser_err!("Unsupported command in expression")?;
                 }
                 _ => {}
             }
         }
 
-        self.parser.parse_expr_with_alias()
+        Ok(self.parser.parse_expr_with_alias()?)
+    }
+
+    /// Helper method to parse a statement and handle errors consistently, especially for recursion limits
+    fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> {
+        self.parser
+            .parse_statement()
+            .map(|stmt| Statement::Statement(Box::from(stmt)))
+            .map_err(|e| match e {
+                ParserError::RecursionLimitExceeded => DataFusionError::SQL(
+                    ParserError::RecursionLimitExceeded,
+                    Some(format!(
+                        " (current limit: {})",
+                        self.options.recursion_limit
+                    )),
+                ),
+                other => DataFusionError::SQL(other, None),
+            })
     }
 
     /// Parse a SQL `COPY TO` statement
-    pub fn parse_copy(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> {
         // parse as a query
         let source = if self.parser.consume_token(&Token::LParen) {
             let query = self.parser.parse_query()?;
@@ -541,7 +574,7 @@ impl<'a> DFParser<'a> {
                     Keyword::WITH => {
                         self.parser.expect_keyword(Keyword::HEADER)?;
                         self.parser.expect_keyword(Keyword::ROW)?;
-                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')");
+                        return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')")?;
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -561,17 +594,13 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return Err(ParserError::ParserError(format!(
-                        "Unexpected token {token}"
-                    )));
+                    return self.expected("end of statement or ;", token)?;
                 }
             }
         }
 
         let Some(target) = builder.target else {
-            return Err(ParserError::ParserError(
-                "Missing TO clause in COPY statement".into(),
-            ));
+            return parser_err!("Missing TO clause in COPY statement")?;
         };
 
         Ok(Statement::CopyTo(CopyToStatement {
@@ -589,7 +618,7 @@ impl<'a> DFParser<'a> {
     /// because it allows keywords as well as other non words
     ///
     /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string
-    pub fn parse_option_key(&mut self) -> Result<String, ParserError> {
+    pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> {
         let next_token = self.parser.next_token();
         match next_token.token {
             Token::Word(Word { value, .. }) => {
@@ -602,7 +631,7 @@ impl<'a> DFParser<'a> {
                         // Unquoted namespaced keys have to conform to the syntax
                         // "<WORD>[\.<WORD>]*". If we have a key that breaks this
                         // pattern, error out:
-                        return self.parser.expected("key name", next_token);
+                        return self.expected("key name", next_token);
                     }
                 }
                 Ok(parts.join("."))
@@ -610,7 +639,7 @@ impl<'a> DFParser<'a> {
             Token::SingleQuotedString(s) => Ok(s),
             Token::DoubleQuotedString(s) => Ok(s),
             Token::EscapedStringLiteral(s) => Ok(s),
-            _ => self.parser.expected("key name", next_token),
+            _ => self.expected("key name", next_token),
         }
     }
 
@@ -620,7 +649,7 @@ impl<'a> DFParser<'a> {
     /// word or keyword in this location.
     ///
     /// [`parse_value`]: sqlparser::parser::Parser::parse_value
-    pub fn parse_option_value(&mut self) -> Result<Value, ParserError> {
+    pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> {
         let next_token = self.parser.next_token();
         match next_token.token {
             // e.g. things like "snappy" or "gzip" that may be keywords
@@ -629,12 +658,12 @@ impl<'a> DFParser<'a> {
             Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
             Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
             Token::Number(n, l) => Ok(Value::Number(n, l)),
-            _ => self.parser.expected("string or numeric value", next_token),
+            _ => self.expected("string or numeric value", next_token),
         }
     }
 
     /// Parse a SQL `EXPLAIN`
-    pub fn parse_explain(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> {
         let analyze = self.parser.parse_keyword(Keyword::ANALYZE);
         let verbose = self.parser.parse_keyword(Keyword::VERBOSE);
         let format = self.parse_explain_format()?;
@@ -649,7 +678,7 @@ impl<'a> DFParser<'a> {
         }))
     }
 
-    pub fn parse_explain_format(&mut self) -> Result<Option<String>, ParserError> {
+    pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> {
         if !self.parser.parse_keyword(Keyword::FORMAT) {
             return Ok(None);
         }
@@ -659,15 +688,13 @@ impl<'a> DFParser<'a> {
             Token::Word(w) => Ok(w.value),
             Token::SingleQuotedString(w) => Ok(w),
             Token::DoubleQuotedString(w) => Ok(w),
-            _ => self
-                .parser
-                .expected("an explain format such as TREE", next_token),
+            _ => self.expected("an explain format such as TREE", next_token),
         }?;
         Ok(Some(format))
     }
 
     /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE`
-    pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> {
         if self.parser.parse_keyword(Keyword::EXTERNAL) {
             self.parse_create_external_table(false)
         } else if self.parser.parse_keyword(Keyword::UNBOUNDED) {
@@ -678,7 +705,7 @@ impl<'a> DFParser<'a> {
         }
     }
 
-    fn parse_partitions(&mut self) -> Result<Vec<String>, ParserError> {
+    fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> {
         let mut partitions: Vec<String> = vec![];
         if !self.parser.consume_token(&Token::LParen)
             || self.parser.consume_token(&Token::RParen)
@@ -708,7 +735,7 @@ impl<'a> DFParser<'a> {
     }
 
     /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement
-    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, ParserError> {
+    pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> {
         let mut values = vec![];
         self.parser.expect_token(&Token::LParen)?;
         loop {
@@ -721,7 +748,7 @@ impl<'a> DFParser<'a> {
     }
 
     /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC.
-    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> {
+    pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> {
         let expr = self.parser.parse_expr()?;
 
         let asc = if self.parser.parse_keyword(Keyword::ASC) {
@@ -753,7 +780,7 @@ impl<'a> DFParser<'a> {
     // This is a copy of the equivalent implementation in sqlparser.
     fn parse_columns(
         &mut self,
-    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), ParserError> {
+    ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> {
         let mut columns = vec![];
         let mut constraints = vec![];
         if !self.parser.consume_token(&Token::LParen)
@@ -789,7 +816,7 @@ impl<'a> DFParser<'a> {
         Ok((columns, constraints))
     }
 
-    fn parse_column_def(&mut self) -> Result<ColumnDef, ParserError> {
+    fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> {
         let name = self.parser.parse_identifier()?;
         let data_type = self.parser.parse_data_type()?;
         let mut options = vec![];
@@ -820,7 +847,7 @@ impl<'a> DFParser<'a> {
     fn parse_create_external_table(
         &mut self,
         unbounded: bool,
-    ) -> Result<Statement, ParserError> {
+    ) -> Result<Statement, DataFusionError> {
         let temporary = self
             .parser
             .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY])
@@ -868,15 +895,15 @@ impl<'a> DFParser<'a> {
                         } else {
                             self.parser.expect_keyword(Keyword::HEADER)?;
                             self.parser.expect_keyword(Keyword::ROW)?;
-                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)");
+                            return parser_err!("WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)")?;
                         }
                     }
                     Keyword::DELIMITER => {
-                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')");
+                        return parser_err!("DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')")?;
                     }
                     Keyword::COMPRESSION => {
                         self.parser.expect_keyword(Keyword::TYPE)?;
-                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)");
+                        return parser_err!("COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)")?;
                     }
                     Keyword::PARTITIONED => {
                         self.parser.expect_keyword(Keyword::BY)?;
@@ -899,7 +926,7 @@ impl<'a> DFParser<'a> {
                             columns.extend(cols);
 
                             if !cons.is_empty() {
-                                return Err(ParserError::ParserError(
+                                return sql_err!(ParserError::ParserError(
                                     "Constraints on Partition Columns are not supported"
                                         .to_string(),
                                 ));
@@ -919,21 +946,19 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return Err(ParserError::ParserError(format!(
-                        "Unexpected token {token}"
-                    )));
+                    return self.expected("end of statement or ;", token)?;
                 }
             }
         }
 
         // Validations: location and file_type are required
         if builder.file_type.is_none() {
-            return Err(ParserError::ParserError(
+            return sql_err!(ParserError::ParserError(
                 "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(),
             ));
         }
         if builder.location.is_none() {
-            return Err(ParserError::ParserError(
+            return sql_err!(ParserError::ParserError(
                 "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(),
             ));
         }
@@ -955,7 +980,7 @@ impl<'a> DFParser<'a> {
     }
 
     /// Parses the set of valid formats
-    fn parse_file_format(&mut self) -> Result<String, ParserError> {
+    fn parse_file_format(&mut self) -> Result<String, DataFusionError> {
         let token = self.parser.next_token();
         match &token.token {
             Token::Word(w) => parse_file_type(&w.value),
@@ -967,7 +992,7 @@ impl<'a> DFParser<'a> {
     ///
     /// This method supports keywords as key names as well as multiple
     /// value types such as Numbers as well as Strings.
-    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, ParserError> {
+    fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> {
         let mut options = vec![];
         self.parser.expect_token(&Token::LParen)?;
 
@@ -999,7 +1024,7 @@ mod tests {
     use sqlparser::dialect::SnowflakeDialect;
     use sqlparser::tokenizer::Span;
 
-    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), ParserError> {
+    fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> {
         let statements = DFParser::parse_sql(sql)?;
         assert_eq!(
             statements.len(),
@@ -1041,7 +1066,7 @@ mod tests {
     }
 
     #[test]
-    fn create_external_table() -> Result<(), ParserError> {
+    fn create_external_table() -> Result<(), DataFusionError> {
         // positive case
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
         let display = None;
@@ -1262,13 +1287,13 @@ mod tests {
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'";
         expect_parse_error(
             sql,
-            "sql parser error: Expected: a data type name, found: )",
+            "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")",
         );
 
         // negative case: mixed column defs and column names in `PARTITIONED BY` clause
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'";
-        expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int");
+        expect_parse_error(sql, "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")");
 
         // positive case: additional options (one entry) can be specified
         let sql =
@@ -1514,7 +1539,7 @@ mod tests {
     }
 
     #[test]
-    fn copy_to_table_to_table() -> Result<(), ParserError> {
+    fn copy_to_table_to_table() -> Result<(), DataFusionError> {
         // positive case
         let sql = "COPY foo TO bar STORED AS CSV";
         let expected = Statement::CopyTo(CopyToStatement {
@@ -1530,7 +1555,7 @@ mod tests {
     }
 
     #[test]
-    fn skip_copy_into_snowflake() -> Result<(), ParserError> {
+    fn skip_copy_into_snowflake() -> Result<(), DataFusionError> {
         let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');";
         let dialect = Box::new(SnowflakeDialect);
         let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?;
@@ -1547,7 +1572,7 @@ mod tests {
     }
 
     #[test]
-    fn explain_copy_to_table_to_table() -> Result<(), ParserError> {
+    fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> {
         let cases = vec![
             ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false),
             (
@@ -1588,7 +1613,7 @@ mod tests {
     }
 
     #[test]
-    fn copy_to_query_to_table() -> Result<(), ParserError> {
+    fn copy_to_query_to_table() -> Result<(), DataFusionError> {
         let statement = verified_stmt("SELECT 1");
 
         // unwrap the various layers
@@ -1621,7 +1646,7 @@ mod tests {
     }
 
     #[test]
-    fn copy_to_options() -> Result<(), ParserError> {
+    fn copy_to_options() -> Result<(), DataFusionError> {
         let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
@@ -1638,7 +1663,7 @@ mod tests {
     }
 
     #[test]
-    fn copy_to_partitioned_by() -> Result<(), ParserError> {
+    fn copy_to_partitioned_by() -> Result<(), DataFusionError> {
         let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')";
         let expected = Statement::CopyTo(CopyToStatement {
             source: object_name("foo"),
@@ -1655,7 +1680,7 @@ mod tests {
     }
 
     #[test]
-    fn copy_to_multi_options() -> Result<(), ParserError> {
+    fn copy_to_multi_options() -> Result<(), DataFusionError> {
         // order of options is preserved
         let sql =
             "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)";
@@ -1754,7 +1779,7 @@ mod tests {
 
         assert_contains!(
             err.to_string(),
-            "sql parser error: recursion limit exceeded"
+            "SQL error: RecursionLimitExceeded (current limit: 1)"
         );
     }
 }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 180017ee9c191..3325c98aa74b6 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -331,7 +331,7 @@ impl PlannerContext {
 ///
 /// Key interfaces are:
 /// * [`Self::sql_statement_to_plan`]: Convert a statement
-///     (e.g. `SELECT ...`) into a [`LogicalPlan`]
+///   (e.g. `SELECT ...`) into a [`LogicalPlan`]
 /// * [`Self::sql_to_expr`]: Convert an expression (e.g. `1 + 2`) into an [`Expr`]
 pub struct SqlToRel<'a, S: ContextProvider> {
     pub(crate) context_provider: &'a S,
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 2a2d0b3b3eb8b..33994b60b7357 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::collections::HashSet;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
@@ -45,8 +46,8 @@ use datafusion_expr::{
 
 use indexmap::IndexMap;
 use sqlparser::ast::{
-    Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr, OrderBy,
-    SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType,
+    visit_expressions_mut, Distinct, Expr as SQLExpr, GroupByExpr, NamedWindowExpr,
+    OrderBy, SelectItemQualifiedWildcardKind, WildcardAdditionalOptions, WindowType,
 };
 use sqlparser::ast::{NamedWindowDefinition, Select, SelectItem, TableWithJoins};
 
@@ -84,7 +85,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         // Handle named windows before processing the projection expression
         check_conflicting_windows(&select.named_window)?;
-        match_window_definitions(&mut select.projection, &select.named_window)?;
+        self.match_window_definitions(&mut select.projection, &select.named_window)?;
         // Process the SELECT expressions
         let select_exprs = self.prepare_select_exprs(
             &base_plan,
@@ -758,11 +759,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     /// # Arguments
     ///
     /// * `input`           - The input plan that will be aggregated. The grouping, aggregate, and
-    ///                       "having" expressions must all be resolvable from this plan.
+    ///   "having" expressions must all be resolvable from this plan.
     /// * `select_exprs`    - The projection expressions from the SELECT clause.
     /// * `having_expr_opt` - Optional HAVING clause.
     /// * `group_by_exprs`  - Grouping expressions from the GROUP BY clause. These can be column
-    ///                       references or more complex expressions.
+    ///   references or more complex expressions.
     /// * `aggr_exprs`      - Aggregate expressions, such as `SUM(a)` or `COUNT(1)`.
     ///
     /// # Return
@@ -771,9 +772,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     ///
     /// * `plan`                   - A [LogicalPlan::Aggregate] plan for the newly created aggregate.
     /// * `select_exprs_post_aggr` - The projection expressions rewritten to reference columns from
-    ///                              the aggregate
+    ///   the aggregate
     /// * `having_expr_post_aggr`  - The "having" expression rewritten to reference a column from
-    ///                              the aggregate
+    ///   the aggregate
     fn aggregate(
         &self,
         input: &LogicalPlan,
@@ -867,6 +868,61 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         Ok((plan, select_exprs_post_aggr, having_expr_post_aggr))
     }
+
+    // If the projection is done over a named window, that window
+    // name must be defined. Otherwise, it gives an error.
+    fn match_window_definitions(
+        &self,
+        projection: &mut [SelectItem],
+        named_windows: &[NamedWindowDefinition],
+    ) -> Result<()> {
+        let named_windows: Vec<(&NamedWindowDefinition, String)> = named_windows
+            .iter()
+            .map(|w| (w, self.ident_normalizer.normalize(w.0.clone())))
+            .collect();
+        for proj in projection.iter_mut() {
+            if let SelectItem::ExprWithAlias { expr, alias: _ }
+            | SelectItem::UnnamedExpr(expr) = proj
+            {
+                let mut err = None;
+                visit_expressions_mut(expr, |expr| {
+                    if let SQLExpr::Function(f) = expr {
+                        if let Some(WindowType::NamedWindow(ident)) = &f.over {
+                            let normalized_ident =
+                                self.ident_normalizer.normalize(ident.clone());
+                            for (
+                                NamedWindowDefinition(_, window_expr),
+                                normalized_window_ident,
+                            ) in named_windows.iter()
+                            {
+                                if normalized_ident.eq(normalized_window_ident) {
+                                    f.over = Some(match window_expr {
+                                        NamedWindowExpr::NamedWindow(ident) => {
+                                            WindowType::NamedWindow(ident.clone())
+                                        }
+                                        NamedWindowExpr::WindowSpec(spec) => {
+                                            WindowType::WindowSpec(spec.clone())
+                                        }
+                                    })
+                                }
+                            }
+                            // All named windows must be defined with a WindowSpec.
+                            if let Some(WindowType::NamedWindow(ident)) = &f.over {
+                                err =
+                                    Some(plan_err!("The window {ident} is not defined!"));
+                                return ControlFlow::Break(());
+                            }
+                        }
+                    }
+                    ControlFlow::Continue(())
+                });
+                if let Some(err) = err {
+                    return err;
+                }
+            }
+        }
+        Ok(())
+    }
 }
 
 // If there are any multiple-defined windows, we raise an error.
@@ -883,39 +939,3 @@ fn check_conflicting_windows(window_defs: &[NamedWindowDefinition]) -> Result<()
     }
     Ok(())
 }
-
-// If the projection is done over a named window, that window
-// name must be defined. Otherwise, it gives an error.
-fn match_window_definitions(
-    projection: &mut [SelectItem],
-    named_windows: &[NamedWindowDefinition],
-) -> Result<()> {
-    for proj in projection.iter_mut() {
-        if let SelectItem::ExprWithAlias {
-            expr: SQLExpr::Function(f),
-            alias: _,
-        }
-        | SelectItem::UnnamedExpr(SQLExpr::Function(f)) = proj
-        {
-            for NamedWindowDefinition(window_ident, window_expr) in named_windows.iter() {
-                if let Some(WindowType::NamedWindow(ident)) = &f.over {
-                    if ident.eq(window_ident) {
-                        f.over = Some(match window_expr {
-                            NamedWindowExpr::NamedWindow(ident) => {
-                                WindowType::NamedWindow(ident.clone())
-                            }
-                            NamedWindowExpr::WindowSpec(spec) => {
-                                WindowType::WindowSpec(spec.clone())
-                            }
-                        })
-                    }
-                }
-            }
-            // All named windows must be defined with a WindowSpec.
-            if let Some(WindowType::NamedWindow(ident)) = &f.over {
-                return plan_err!("The window {ident} is not defined!");
-            }
-        }
-    }
-    Ok(())
-}
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index fc6cb0d32feff..1f1c235fee6f4 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1034,7 +1034,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         TransactionMode::AccessMode(_) => None,
                         TransactionMode::IsolationLevel(level) => Some(level),
                     })
-                    .last()
+                    .next_back()
                     .copied()
                     .unwrap_or(ast::TransactionIsolationLevel::Serializable);
                 let access_mode: ast::TransactionAccessMode = modes
@@ -1043,7 +1043,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         TransactionMode::AccessMode(mode) => Some(mode),
                         TransactionMode::IsolationLevel(_) => None,
                     })
-                    .last()
+                    .next_back()
                     .copied()
                     .unwrap_or(ast::TransactionAccessMode::ReadWrite);
                 let isolation_level = match isolation_level {
@@ -1340,11 +1340,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let options_map = self.parse_options_map(statement.options, true)?;
 
         let maybe_file_type = if let Some(stored_as) = &statement.stored_as {
-            if let Ok(ext_file_type) = self.context_provider.get_file_type(stored_as) {
-                Some(ext_file_type)
-            } else {
-                None
-            }
+            self.context_provider.get_file_type(stored_as).ok()
         } else {
             None
         };
@@ -1547,7 +1543,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     }
 
     /// Convert each [TableConstraint] to corresponding [Constraint]
-    fn new_constraint_from_table_constraints(
+    pub fn new_constraint_from_table_constraints(
         &self,
         constraints: &[TableConstraint],
         df_schema: &DFSchemaRef,
diff --git a/datafusion/sql/src/unparser/ast.rs b/datafusion/sql/src/unparser/ast.rs
index 6fcc203637cc3..117971af762a1 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -32,6 +32,8 @@ pub struct QueryBuilder {
     fetch: Option<ast::Fetch>,
     locks: Vec<ast::LockClause>,
     for_clause: Option<ast::ForClause>,
+    // If true, we need to unparse LogicalPlan::Union as a SQL `UNION` rather than a `UNION ALL`.
+    distinct_union: bool,
 }
 
 #[allow(dead_code)]
@@ -75,6 +77,13 @@ impl QueryBuilder {
         self.for_clause = value;
         self
     }
+    pub fn distinct_union(&mut self) -> &mut Self {
+        self.distinct_union = true;
+        self
+    }
+    pub fn is_distinct_union(&self) -> bool {
+        self.distinct_union
+    }
     pub fn build(&self) -> Result<ast::Query, BuilderError> {
         let order_by = self
             .order_by_kind
@@ -112,6 +121,7 @@ impl QueryBuilder {
             fetch: Default::default(),
             locks: Default::default(),
             for_clause: Default::default(),
+            distinct_union: false,
         }
     }
 }
@@ -155,6 +165,11 @@ impl SelectBuilder {
         self.projection = value;
         self
     }
+    pub fn pop_projections(&mut self) -> Vec<ast::SelectItem> {
+        let ret = self.projection.clone();
+        self.projection.clear();
+        ret
+    }
     pub fn already_projected(&self) -> bool {
         !self.projection.is_empty()
     }
diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 05914b98f55f0..a7bde967f2fa4 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -17,7 +17,10 @@
 
 use std::{collections::HashMap, sync::Arc};
 
-use super::{utils::character_length_to_sql, utils::date_part_to_sql, Unparser};
+use super::{
+    utils::character_length_to_sql, utils::date_part_to_sql,
+    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser,
+};
 use arrow::datatypes::TimeUnit;
 use datafusion_common::Result;
 use datafusion_expr::Expr;
@@ -490,6 +493,8 @@ impl Dialect for SqliteDialect {
             "character_length" => {
                 character_length_to_sql(unparser, self.character_length_style(), args)
             }
+            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
+            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
             _ => Ok(None),
         }
     }
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index 064adde55bdfd..bf3e7880bea8f 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -293,6 +293,7 @@ impl Unparser<'_> {
                     distinct,
                     args,
                     filter,
+                    order_by,
                     ..
                 } = &agg.params;
 
@@ -301,6 +302,16 @@ impl Unparser<'_> {
                     Some(filter) => Some(Box::new(self.expr_to_sql_inner(filter)?)),
                     None => None,
                 };
+                let within_group = if agg.func.is_ordered_set_aggregate() {
+                    order_by
+                        .as_ref()
+                        .unwrap_or(&Vec::new())
+                        .iter()
+                        .map(|sort_expr| self.sort_to_sql(sort_expr))
+                        .collect::<Result<Vec<_>>>()?
+                } else {
+                    Vec::new()
+                };
                 Ok(ast::Expr::Function(Function {
                     name: ObjectName::from(vec![Ident {
                         value: func_name.to_string(),
@@ -316,7 +327,7 @@ impl Unparser<'_> {
                     filter,
                     null_treatment: None,
                     over: None,
-                    within_group: vec![],
+                    within_group,
                     parameters: ast::FunctionArguments::None,
                     uses_odbc_syntax: false,
                 }))
@@ -1689,6 +1700,7 @@ mod tests {
     use std::ops::{Add, Sub};
     use std::{any::Any, sync::Arc, vec};
 
+    use crate::unparser::dialect::SqliteDialect;
     use arrow::array::{LargeListArray, ListArray};
     use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit};
     use ast::ObjectName;
@@ -1701,6 +1713,7 @@ mod tests {
         ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition,
     };
     use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt};
+    use datafusion_functions::datetime::from_unixtime::FromUnixtimeFunc;
     use datafusion_functions::expr_fn::{get_field, named_struct};
     use datafusion_functions_aggregate::count::count_udaf;
     use datafusion_functions_aggregate::expr_fn::sum;
@@ -1712,7 +1725,7 @@ mod tests {
 
     use crate::unparser::dialect::{
         CharacterLengthStyle, CustomDialect, CustomDialectBuilder, DateFieldExtractStyle,
-        Dialect, DuckDBDialect, PostgreSqlDialect, ScalarFnToSqlHandler,
+        DefaultDialect, Dialect, DuckDBDialect, PostgreSqlDialect, ScalarFnToSqlHandler,
     };
 
     use super::*;
@@ -2871,6 +2884,115 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_from_unixtime() -> Result<()> {
+        let default_dialect: Arc<dyn Dialect> = Arc::new(DefaultDialect {});
+        let sqlite_dialect: Arc<dyn Dialect> = Arc::new(SqliteDialect {});
+
+        for (dialect, expected) in [
+            (default_dialect, "from_unixtime(date_col)"),
+            (sqlite_dialect, "datetime(`date_col`, 'unixepoch')"),
+        ] {
+            let unparser = Unparser::new(dialect.as_ref());
+            let expr = Expr::ScalarFunction(ScalarFunction {
+                func: Arc::new(ScalarUDF::from(FromUnixtimeFunc::new())),
+                args: vec![col("date_col")],
+            });
+
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = ast.to_string();
+            let expected = expected.to_string();
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
+    #[test]
+    fn test_date_trunc() -> Result<()> {
+        let default_dialect: Arc<dyn Dialect> = Arc::new(DefaultDialect {});
+        let sqlite_dialect: Arc<dyn Dialect> = Arc::new(SqliteDialect {});
+
+        for (dialect, precision, expected) in [
+            (
+                Arc::clone(&default_dialect),
+                "YEAR",
+                "date_trunc('YEAR', date_col)",
+            ),
+            (
+                Arc::clone(&sqlite_dialect),
+                "YEAR",
+                "strftime('%Y', `date_col`)",
+            ),
+            (
+                Arc::clone(&default_dialect),
+                "MONTH",
+                "date_trunc('MONTH', date_col)",
+            ),
+            (
+                Arc::clone(&sqlite_dialect),
+                "MONTH",
+                "strftime('%Y-%m', `date_col`)",
+            ),
+            (
+                Arc::clone(&default_dialect),
+                "DAY",
+                "date_trunc('DAY', date_col)",
+            ),
+            (
+                Arc::clone(&sqlite_dialect),
+                "DAY",
+                "strftime('%Y-%m-%d', `date_col`)",
+            ),
+            (
+                Arc::clone(&default_dialect),
+                "HOUR",
+                "date_trunc('HOUR', date_col)",
+            ),
+            (
+                Arc::clone(&sqlite_dialect),
+                "HOUR",
+                "strftime('%Y-%m-%d %H', `date_col`)",
+            ),
+            (
+                Arc::clone(&default_dialect),
+                "MINUTE",
+                "date_trunc('MINUTE', date_col)",
+            ),
+            (
+                Arc::clone(&sqlite_dialect),
+                "MINUTE",
+                "strftime('%Y-%m-%d %H:%M', `date_col`)",
+            ),
+            (default_dialect, "SECOND", "date_trunc('SECOND', date_col)"),
+            (
+                sqlite_dialect,
+                "SECOND",
+                "strftime('%Y-%m-%d %H:%M:%S', `date_col`)",
+            ),
+        ] {
+            let unparser = Unparser::new(dialect.as_ref());
+            let expr = Expr::ScalarFunction(ScalarFunction {
+                func: Arc::new(ScalarUDF::from(
+                    datafusion_functions::datetime::date_trunc::DateTruncFunc::new(),
+                )),
+                args: vec![
+                    Expr::Literal(ScalarValue::Utf8(Some(precision.to_string()))),
+                    col("date_col"),
+                ],
+            });
+
+            let ast = unparser.expr_to_sql(&expr)?;
+
+            let actual = ast.to_string();
+            let expected = expected.to_string();
+
+            assert_eq!(actual, expected);
+        }
+        Ok(())
+    }
+
     #[test]
     fn test_dictionary_to_sql() -> Result<()> {
         let dialect = CustomDialectBuilder::new().build();
diff --git a/datafusion/sql/src/unparser/mod.rs b/datafusion/sql/src/unparser/mod.rs
index f90efd103b0f5..05b472dc92a93 100644
--- a/datafusion/sql/src/unparser/mod.rs
+++ b/datafusion/sql/src/unparser/mod.rs
@@ -118,9 +118,9 @@ impl<'a> Unparser<'a> {
     /// The child unparsers are called iteratively.
     /// There are two methods in [`Unparser`] will be called:
     /// - `extension_to_statement`: This method is called when the custom logical node is a custom statement.
-    ///     If multiple child unparsers return a non-None value, the last unparsing result will be returned.
+    ///   If multiple child unparsers return a non-None value, the last unparsing result will be returned.
     /// - `extension_to_sql`: This method is called when the custom logical node is part of a statement.
-    ///    If multiple child unparsers are registered for the same custom logical node, all of them will be called in order.
+    ///   If multiple child unparsers are registered for the same custom logical node, all of them will be called in order.
     pub fn with_extension_unparsers(
         mut self,
         extension_unparsers: Vec<Arc<dyn UserDefinedLogicalNodeUnparser>>,
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index a6d89638ff41d..b849ca45d299c 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -545,6 +545,23 @@ impl Unparser<'_> {
                         false,
                     );
                 }
+
+                // If this distinct is the parent of a Union and we're in a query context,
+                // then we need to unparse as a `UNION` rather than a `UNION ALL`.
+                if let Distinct::All(input) = distinct {
+                    if matches!(input.as_ref(), LogicalPlan::Union(_)) {
+                        if let Some(query_mut) = query.as_mut() {
+                            query_mut.distinct_union();
+                            return self.select_to_sql_recursively(
+                                input.as_ref(),
+                                query,
+                                select,
+                                relation,
+                            );
+                        }
+                    }
+                }
+
                 let (select_distinct, input) = match distinct {
                     Distinct::All(input) => (ast::Distinct::Distinct, input.as_ref()),
                     Distinct::On(on) => {
@@ -582,6 +599,10 @@ impl Unparser<'_> {
                     }
                     _ => (&join.left, &join.right),
                 };
+                // If there's an outer projection plan, it will already set up the projection.
+                // In that case, we don't need to worry about setting up the projection here.
+                // The outer projection plan will handle projecting the correct columns.
+                let already_projected = select.already_projected();
 
                 let left_plan =
                     match try_transform_to_simple_table_scan_with_filters(left_plan)? {
@@ -599,6 +620,13 @@ impl Unparser<'_> {
                     relation,
                 )?;
 
+                let left_projection: Option<Vec<ast::SelectItem>> = if !already_projected
+                {
+                    Some(select.pop_projections())
+                } else {
+                    None
+                };
+
                 let right_plan =
                     match try_transform_to_simple_table_scan_with_filters(right_plan)? {
                         Some((plan, filters)) => {
@@ -657,6 +685,13 @@ impl Unparser<'_> {
                     &mut right_relation,
                 )?;
 
+                let right_projection: Option<Vec<ast::SelectItem>> = if !already_projected
+                {
+                    Some(select.pop_projections())
+                } else {
+                    None
+                };
+
                 match join.join_type {
                     JoinType::LeftSemi
                     | JoinType::LeftAnti
@@ -702,6 +737,9 @@ impl Unparser<'_> {
                         } else {
                             select.selection(Some(exists_expr));
                         }
+                        if let Some(projection) = left_projection {
+                            select.projection(projection);
+                        }
                     }
                     JoinType::Inner
                     | JoinType::Left
@@ -719,6 +757,21 @@ impl Unparser<'_> {
                         let mut from = select.pop_from().unwrap();
                         from.push_join(ast_join);
                         select.push_from(from);
+                        if !already_projected {
+                            let Some(left_projection) = left_projection else {
+                                return internal_err!("Left projection is missing");
+                            };
+
+                            let Some(right_projection) = right_projection else {
+                                return internal_err!("Right projection is missing");
+                            };
+
+                            let projection = left_projection
+                                .into_iter()
+                                .chain(right_projection.into_iter())
+                                .collect();
+                            select.projection(projection);
+                        }
                     }
                 };
 
@@ -793,6 +846,15 @@ impl Unparser<'_> {
                     return internal_err!("UNION operator requires at least 2 inputs");
                 }
 
+                let set_quantifier =
+                    if query.as_ref().is_some_and(|q| q.is_distinct_union()) {
+                        // Setting the SetQuantifier to None will unparse as a `UNION`
+                        // rather than a `UNION ALL`.
+                        ast::SetQuantifier::None
+                    } else {
+                        ast::SetQuantifier::All
+                    };
+
                 // Build the union expression tree bottom-up by reversing the order
                 // note that we are also swapping left and right inputs because of the rev
                 let union_expr = input_exprs
@@ -800,7 +862,7 @@ impl Unparser<'_> {
                     .rev()
                     .reduce(|a, b| SetExpr::SetOperation {
                         op: ast::SetOperator::Union,
-                        set_quantifier: ast::SetQuantifier::All,
+                        set_quantifier,
                         left: Box::new(b),
                         right: Box::new(a),
                     })
@@ -900,9 +962,9 @@ impl Unparser<'_> {
     /// Try to find the placeholder column name generated by `RecursiveUnnestRewriter`.
     ///
     /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(...)"))`,
-    ///     it means it is a scalar column, return [UnnestInputType::Scalar].
+    ///   it means it is a scalar column, return [UnnestInputType::Scalar].
     /// - If the column is a placeholder column match the pattern `Expr::Alias(Expr::Column("__unnest_placeholder(outer_ref(...)))")`,
-    ///     it means it is an outer reference column, return [UnnestInputType::OuterReference].
+    ///   it means it is an outer reference column, return [UnnestInputType::OuterReference].
     /// - If the column is not a placeholder column, return [None].
     ///
     /// `outer_ref` is the display result of [Expr::OuterReferenceColumn]
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
index 75038ccc43145..37f0a77972007 100644
--- a/datafusion/sql/src/unparser/utils.rs
+++ b/datafusion/sql/src/unparser/utils.rs
@@ -385,7 +385,7 @@ pub(crate) fn try_transform_to_simple_table_scan_with_filters(
                 let mut builder = LogicalPlanBuilder::scan(
                     table_scan.table_name.clone(),
                     Arc::clone(&table_scan.source),
-                    None,
+                    table_scan.projection.clone(),
                 )?;
 
                 if let Some(alias) = table_alias.take() {
@@ -500,3 +500,72 @@ pub(crate) fn character_length_to_sql(
         character_length_args,
     )?))
 }
+
+/// SQLite does not support timestamp/date scalars like `to_timestamp`, `from_unixtime`, `date_trunc`, etc.
+/// This remaps `from_unixtime` to `datetime(expr, 'unixepoch')`, expecting the input to be in seconds.
+/// It supports no other arguments, so if any are supplied it will return an error.
+///
+/// # Errors
+///
+/// - If the number of arguments is not 1 - the column or expression to convert.
+/// - If the scalar function cannot be converted to SQL.
+pub(crate) fn sqlite_from_unixtime_to_sql(
+    unparser: &Unparser,
+    from_unixtime_args: &[Expr],
+) -> Result<Option<ast::Expr>> {
+    if from_unixtime_args.len() != 1 {
+        return internal_err!(
+            "from_unixtime for SQLite expects 1 argument, found {}",
+            from_unixtime_args.len()
+        );
+    }
+
+    Ok(Some(unparser.scalar_function_to_sql(
+        "datetime",
+        &[
+            from_unixtime_args[0].clone(),
+            Expr::Literal(ScalarValue::Utf8(Some("unixepoch".to_string()))),
+        ],
+    )?))
+}
+
+/// SQLite does not support timestamp/date scalars like `to_timestamp`, `from_unixtime`, `date_trunc`, etc.
+/// This uses the `strftime` function to format the timestamp as a string depending on the truncation unit.
+///
+/// # Errors
+///
+/// - If the number of arguments is not 2 - truncation unit and the column or expression to convert.
+/// - If the scalar function cannot be converted to SQL.
+pub(crate) fn sqlite_date_trunc_to_sql(
+    unparser: &Unparser,
+    date_trunc_args: &[Expr],
+) -> Result<Option<ast::Expr>> {
+    if date_trunc_args.len() != 2 {
+        return internal_err!(
+            "date_trunc for SQLite expects 2 arguments, found {}",
+            date_trunc_args.len()
+        );
+    }
+
+    if let Expr::Literal(ScalarValue::Utf8(Some(unit))) = &date_trunc_args[0] {
+        let format = match unit.to_lowercase().as_str() {
+            "year" => "%Y",
+            "month" => "%Y-%m",
+            "day" => "%Y-%m-%d",
+            "hour" => "%Y-%m-%d %H",
+            "minute" => "%Y-%m-%d %H:%M",
+            "second" => "%Y-%m-%d %H:%M:%S",
+            _ => return Ok(None),
+        };
+
+        return Ok(Some(unparser.scalar_function_to_sql(
+            "strftime",
+            &[
+                Expr::Literal(ScalarValue::Utf8(Some(format.to_string()))),
+                date_trunc_args[1].clone(),
+            ],
+        )?));
+    }
+
+    Ok(None)
+}
diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs
index ebb21e9cdef53..e2e4ada9036b7 100644
--- a/datafusion/sql/tests/cases/diagnostic.rs
+++ b/datafusion/sql/tests/cases/diagnostic.rs
@@ -16,19 +16,21 @@
 // under the License.
 
 use datafusion_functions::string;
+use insta::assert_snapshot;
 use std::{collections::HashMap, sync::Arc};
 
 use datafusion_common::{Diagnostic, Location, Result, Span};
-use datafusion_sql::planner::{ParserOptions, SqlToRel};
+use datafusion_sql::{
+    parser::{DFParser, DFParserBuilder},
+    planner::{ParserOptions, SqlToRel},
+};
 use regex::Regex;
-use sqlparser::{dialect::GenericDialect, parser::Parser};
 
 use crate::{MockContextProvider, MockSessionState};
 
 fn do_query(sql: &'static str) -> Diagnostic {
-    let dialect = GenericDialect {};
-    let statement = Parser::new(&dialect)
-        .try_with_sql(sql)
+    let statement = DFParserBuilder::new(sql)
+        .build()
         .expect("unable to create parser")
         .parse_statement()
         .expect("unable to parse query");
@@ -40,7 +42,7 @@ fn do_query(sql: &'static str) -> Diagnostic {
         .with_scalar_function(Arc::new(string::concat().as_ref().clone()));
     let context = MockContextProvider { state };
     let sql_to_rel = SqlToRel::new_with_options(&context, options);
-    match sql_to_rel.sql_statement_to_plan(statement) {
+    match sql_to_rel.statement_to_plan(statement) {
         Ok(_) => panic!("expected error"),
         Err(err) => match err.diagnostic() {
             Some(diag) => diag.clone(),
@@ -136,7 +138,7 @@ fn test_table_not_found() -> Result<()> {
     let query = "SELECT * FROM /*a*/personx/*a*/";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "table 'personx' not found");
+    assert_snapshot!(diag.message, @"table 'personx' not found");
     assert_eq!(diag.span, Some(spans["a"]));
     Ok(())
 }
@@ -146,7 +148,7 @@ fn test_unqualified_column_not_found() -> Result<()> {
     let query = "SELECT /*a*/first_namex/*a*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "column 'first_namex' not found");
+    assert_snapshot!(diag.message, @"column 'first_namex' not found");
     assert_eq!(diag.span, Some(spans["a"]));
     Ok(())
 }
@@ -156,7 +158,7 @@ fn test_qualified_column_not_found() -> Result<()> {
     let query = "SELECT /*a*/person.first_namex/*a*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "column 'first_namex' not found in 'person'");
+    assert_snapshot!(diag.message, @"column 'first_namex' not found in 'person'");
     assert_eq!(diag.span, Some(spans["a"]));
     Ok(())
 }
@@ -166,14 +168,11 @@ fn test_union_wrong_number_of_columns() -> Result<()> {
     let query = "/*whole+left*/SELECT first_name FROM person/*left*/ UNION ALL /*right*/SELECT first_name, last_name FROM person/*right+whole*/";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(
-        diag.message,
-        "UNION queries have different number of columns"
-    );
+    assert_snapshot!(diag.message, @"UNION queries have different number of columns");
     assert_eq!(diag.span, Some(spans["whole"]));
-    assert_eq!(diag.notes[0].message, "this side has 1 fields");
+    assert_snapshot!(diag.notes[0].message, @"this side has 1 fields");
     assert_eq!(diag.notes[0].span, Some(spans["left"]));
-    assert_eq!(diag.notes[1].message, "this side has 2 fields");
+    assert_snapshot!(diag.notes[1].message, @"this side has 2 fields");
     assert_eq!(diag.notes[1].span, Some(spans["right"]));
     Ok(())
 }
@@ -183,15 +182,9 @@ fn test_missing_non_aggregate_in_group_by() -> Result<()> {
     let query = "SELECT id, /*a*/first_name/*a*/ FROM person GROUP BY id";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(
-        diag.message,
-        "'person.first_name' must appear in GROUP BY clause because it's not an aggregate expression"
-    );
+    assert_snapshot!(diag.message, @"'person.first_name' must appear in GROUP BY clause because it's not an aggregate expression");
     assert_eq!(diag.span, Some(spans["a"]));
-    assert_eq!(
-        diag.helps[0].message,
-        "Either add 'person.first_name' to GROUP BY clause, or use an aggregare function like ANY_VALUE(person.first_name)"
-    );
+    assert_snapshot!(diag.helps[0].message, @"Either add 'person.first_name' to GROUP BY clause, or use an aggregare function like ANY_VALUE(person.first_name)");
     Ok(())
 }
 
@@ -200,10 +193,10 @@ fn test_ambiguous_reference() -> Result<()> {
     let query = "SELECT /*a*/first_name/*a*/ FROM person a, person b";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "column 'first_name' is ambiguous");
+    assert_snapshot!(diag.message, @"column 'first_name' is ambiguous");
     assert_eq!(diag.span, Some(spans["a"]));
-    assert_eq!(diag.notes[0].message, "possible column a.first_name");
-    assert_eq!(diag.notes[1].message, "possible column b.first_name");
+    assert_snapshot!(diag.notes[0].message, @"possible column a.first_name");
+    assert_snapshot!(diag.notes[1].message, @"possible column b.first_name");
     Ok(())
 }
 
@@ -213,11 +206,11 @@ fn test_incompatible_types_binary_arithmetic() -> Result<()> {
         "SELECT /*whole+left*/id/*left*/ + /*right*/first_name/*right+whole*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "expressions have incompatible types");
+    assert_snapshot!(diag.message, @"expressions have incompatible types");
     assert_eq!(diag.span, Some(spans["whole"]));
-    assert_eq!(diag.notes[0].message, "has type UInt32");
+    assert_snapshot!(diag.notes[0].message, @"has type UInt32");
     assert_eq!(diag.notes[0].span, Some(spans["left"]));
-    assert_eq!(diag.notes[1].message, "has type Utf8");
+    assert_snapshot!(diag.notes[1].message, @"has type Utf8");
     assert_eq!(diag.notes[1].span, Some(spans["right"]));
     Ok(())
 }
@@ -227,7 +220,7 @@ fn test_field_not_found_suggestion() -> Result<()> {
     let query = "SELECT /*whole*/first_na/*whole*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "column 'first_na' not found");
+    assert_snapshot!(diag.message, @"column 'first_na' not found");
     assert_eq!(diag.span, Some(spans["whole"]));
     assert_eq!(diag.notes.len(), 1);
 
@@ -243,7 +236,7 @@ fn test_field_not_found_suggestion() -> Result<()> {
         })
         .collect();
     suggested_fields.sort();
-    assert_eq!(suggested_fields[0], "person.first_name");
+    assert_snapshot!(suggested_fields[0], @"person.first_name");
     Ok(())
 }
 
@@ -253,7 +246,7 @@ fn test_ambiguous_column_suggestion() -> Result<()> {
     let spans = get_spans(query);
     let diag = do_query(query);
 
-    assert_eq!(diag.message, "column 'id' is ambiguous");
+    assert_snapshot!(diag.message, @"column 'id' is ambiguous");
     assert_eq!(diag.span, Some(spans["whole"]));
 
     assert_eq!(diag.notes.len(), 2);
@@ -281,8 +274,8 @@ fn test_invalid_function() -> Result<()> {
     let query = "SELECT /*whole*/concat_not_exist/*whole*/()";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "Invalid function 'concat_not_exist'");
-    assert_eq!(diag.notes[0].message, "Possible function 'concat'");
+    assert_snapshot!(diag.message, @"Invalid function 'concat_not_exist'");
+    assert_snapshot!(diag.notes[0].message, @"Possible function 'concat'");
     assert_eq!(diag.span, Some(spans["whole"]));
     Ok(())
 }
@@ -292,10 +285,7 @@ fn test_scalar_subquery_multiple_columns() -> Result<(), Box<dyn std::error::Err
     let spans = get_spans(query);
     let diag = do_query(query);
 
-    assert_eq!(
-        diag.message,
-        "Too many columns! The subquery should only return one column"
-    );
+    assert_snapshot!(diag.message, @"Too many columns! The subquery should only return one column");
 
     let expected_span = Some(Span {
         start: spans["x"].start,
@@ -327,10 +317,7 @@ fn test_in_subquery_multiple_columns() -> Result<(), Box<dyn std::error::Error>>
     let spans = get_spans(query);
     let diag = do_query(query);
 
-    assert_eq!(
-        diag.message,
-        "Too many columns! The subquery should only return one column"
-    );
+    assert_snapshot!(diag.message, @"Too many columns! The subquery should only return one column");
 
     let expected_span = Some(Span {
         start: spans["id"].start,
@@ -360,16 +347,10 @@ fn test_unary_op_plus_with_column() -> Result<()> {
     let query = "SELECT +/*whole*/first_name/*whole*/ FROM person";
     let spans = get_spans(query);
     let diag = do_query(query);
-    assert_eq!(diag.message, "+ cannot be used with Utf8");
+    assert_snapshot!(diag.message, @"+ cannot be used with Utf8");
     assert_eq!(diag.span, Some(spans["whole"]));
-    assert_eq!(
-        diag.notes[0].message,
-        "+ can only be used with numbers, intervals, and timestamps"
-    );
-    assert_eq!(
-        diag.helps[0].message,
-        "perhaps you need to cast person.first_name"
-    );
+    assert_snapshot!(diag.notes[0].message, @"+ can only be used with numbers, intervals, and timestamps");
+    assert_snapshot!(diag.helps[0].message, @"perhaps you need to cast person.first_name");
     Ok(())
 }
 
@@ -379,16 +360,32 @@ fn test_unary_op_plus_with_non_column() -> Result<()> {
     let query = "SELECT +'a'";
     let diag = do_query(query);
     assert_eq!(diag.message, "+ cannot be used with Utf8");
-    assert_eq!(
-        diag.notes[0].message,
-        "+ can only be used with numbers, intervals, and timestamps"
-    );
+    assert_snapshot!(diag.notes[0].message, @"+ can only be used with numbers, intervals, and timestamps");
     assert_eq!(diag.notes[0].span, None);
-    assert_eq!(
-        diag.helps[0].message,
-        "perhaps you need to cast Utf8(\"a\")"
-    );
+    assert_snapshot!(diag.helps[0].message, @r#"perhaps you need to cast Utf8("a")"#);
     assert_eq!(diag.helps[0].span, None);
     assert_eq!(diag.span, None);
     Ok(())
 }
+
+#[test]
+fn test_syntax_error() -> Result<()> {
+    // create a table with a column of type varchar
+    let query = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 /*int*/int/*int*/) LOCATION 'foo.csv'";
+    let spans = get_spans(query);
+    match DFParser::parse_sql(query) {
+        Ok(_) => panic!("expected error"),
+        Err(err) => match err.diagnostic() {
+            Some(diag) => {
+                let diag = diag.clone();
+                assert_snapshot!(diag.message, @"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 77");
+                println!("{:?}", spans);
+                assert_eq!(diag.span, Some(spans["int"]));
+                Ok(())
+            }
+            None => {
+                panic!("expected diagnostic")
+            }
+        },
+    }
+}
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index b7185c2d503df..a458d72d282f1 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -17,7 +17,8 @@
 
 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion_common::{
-    assert_contains, Column, DFSchema, DFSchemaRef, Result, TableReference,
+    assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
+    TableReference,
 };
 use datafusion_expr::test::function_stub::{
     count_udaf, max_udaf, min_udaf, sum, sum_udaf,
@@ -38,6 +39,7 @@ use datafusion_sql::unparser::dialect::{
     PostgreSqlDialect as UnparserPostgreSqlDialect, SqliteDialect,
 };
 use datafusion_sql::unparser::{expr_to_sql, plan_to_sql, Unparser};
+use insta::assert_snapshot;
 use sqlparser::ast::Statement;
 use std::hash::Hash;
 use std::ops::Add;
@@ -62,46 +64,44 @@ use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect};
 use sqlparser::parser::Parser;
 
 #[test]
-fn roundtrip_expr() {
-    let tests: Vec<(TableReference, &str, &str)> = vec![
-        (TableReference::bare("person"), "age > 35", r#"(age > 35)"#),
-        (
-            TableReference::bare("person"),
-            "id = '10'",
-            r#"(id = '10')"#,
-        ),
-        (
-            TableReference::bare("person"),
-            "CAST(id AS VARCHAR)",
-            r#"CAST(id AS VARCHAR)"#,
-        ),
-        (
-            TableReference::bare("person"),
-            "sum((age * 2))",
-            r#"sum((age * 2))"#,
-        ),
-    ];
+fn test_roundtrip_expr_1() {
+    let expr = roundtrip_expr(TableReference::bare("person"), "age > 35").unwrap();
+    assert_snapshot!(expr, @r#"(age > 35)"#);
+}
 
-    let roundtrip = |table, sql: &str| -> Result<String> {
-        let dialect = GenericDialect {};
-        let sql_expr = Parser::new(&dialect).try_with_sql(sql)?.parse_expr()?;
-        let state = MockSessionState::default().with_aggregate_function(sum_udaf());
-        let context = MockContextProvider { state };
-        let schema = context.get_table_source(table)?.schema();
-        let df_schema = DFSchema::try_from(schema.as_ref().clone())?;
-        let sql_to_rel = SqlToRel::new(&context);
-        let expr =
-            sql_to_rel.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())?;
+#[test]
+fn test_roundtrip_expr_2() {
+    let expr = roundtrip_expr(TableReference::bare("person"), "id = '10'").unwrap();
+    assert_snapshot!(expr, @r#"(id = '10')"#);
+}
 
-        let ast = expr_to_sql(&expr)?;
+#[test]
+fn test_roundtrip_expr_3() {
+    let expr =
+        roundtrip_expr(TableReference::bare("person"), "CAST(id AS VARCHAR)").unwrap();
+    assert_snapshot!(expr, @r#"CAST(id AS VARCHAR)"#);
+}
 
-        Ok(ast.to_string())
-    };
+#[test]
+fn test_roundtrip_expr_4() {
+    let expr = roundtrip_expr(TableReference::bare("person"), "sum((age * 2))").unwrap();
+    assert_snapshot!(expr, @r#"sum((age * 2))"#);
+}
 
-    for (table, query, expected) in tests {
-        let actual = roundtrip(table, query).unwrap();
-        assert_eq!(actual, expected);
-    }
+fn roundtrip_expr(table: TableReference, sql: &str) -> Result<String> {
+    let dialect = GenericDialect {};
+    let sql_expr = Parser::new(&dialect).try_with_sql(sql)?.parse_expr()?;
+    let state = MockSessionState::default().with_aggregate_function(sum_udaf());
+    let context = MockContextProvider { state };
+    let schema = context.get_table_source(table)?.schema();
+    let df_schema = DFSchema::try_from(schema.as_ref().clone())?;
+    let sql_to_rel = SqlToRel::new(&context);
+    let expr =
+        sql_to_rel.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())?;
+
+    let ast = expr_to_sql(&expr)?;
+
+    Ok(ast.to_string())
 }
 
 #[test]
@@ -170,6 +170,13 @@ fn roundtrip_statement() -> Result<()> {
                 UNION ALL
                 SELECT j3_string AS col1, j3_id AS id FROM j3
             ) AS subquery GROUP BY col1, id ORDER BY col1 ASC, id ASC"#,
+            r#"SELECT col1, id FROM (
+                SELECT j1_string AS col1, j1_id AS id FROM j1
+                UNION
+                SELECT j2_string AS col1, j2_id AS id FROM j2
+                UNION
+                SELECT j3_string AS col1, j3_id AS id FROM j3
+            ) AS subquery ORDER BY col1 ASC, id ASC"#,
             "SELECT id, count(*) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             last_name, sum(id) over (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
             first_name from person",
@@ -236,10 +243,6 @@ fn roundtrip_statement() -> Result<()> {
 
         let roundtrip_statement = plan_to_sql(&plan)?;
 
-        let actual = &roundtrip_statement.to_string();
-        println!("roundtrip sql: {actual}");
-        println!("plan {}", plan.display_indent());
-
         let plan_roundtrip = sql_to_rel
             .sql_statement_to_plan(roundtrip_statement.clone())
             .unwrap();
@@ -275,109 +278,185 @@ fn roundtrip_crossjoin() -> Result<()> {
     let plan_roundtrip = sql_to_rel
         .sql_statement_to_plan(roundtrip_statement)
         .unwrap();
+    assert_snapshot!(
+        plan_roundtrip,
+        @r"
+    Projection: j1.j1_id, j2.j2_string
+      Cross Join: 
+        TableScan: j1
+        TableScan: j2
+    "
+    );
+
+    Ok(())
+}
 
-    let expected = "Projection: j1.j1_id, j2.j2_string\
-        \n  Cross Join: \
-        \n    TableScan: j1\
-        \n    TableScan: j2";
+#[macro_export]
+macro_rules! roundtrip_statement_with_dialect_helper {
+    (
+        sql: $sql:expr,
+        parser_dialect: $parser_dialect:expr,
+        unparser_dialect: $unparser_dialect:expr,
+        expected: @ $expected:literal $(,)?
+    ) => {{
+        let statement = Parser::new(&$parser_dialect)
+            .try_with_sql($sql)?
+            .parse_statement()?;
 
-    assert_eq!(plan_roundtrip.to_string(), expected);
+        let state = MockSessionState::default()
+            .with_aggregate_function(max_udaf())
+            .with_aggregate_function(min_udaf())
+            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
+            .with_expr_planner(Arc::new(NestedFunctionPlanner));
 
+        let context = MockContextProvider { state };
+        let sql_to_rel = SqlToRel::new(&context);
+        let plan = sql_to_rel
+            .sql_statement_to_plan(statement)
+            .unwrap_or_else(|e| panic!("Failed to parse sql: {}\n{e}", $sql));
+
+        let unparser = Unparser::new(&$unparser_dialect);
+        let roundtrip_statement = unparser.plan_to_sql(&plan)?;
+
+        let actual = &roundtrip_statement.to_string();
+        insta::assert_snapshot!(actual, @ $expected);
+    }};
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_1() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
+        parser_dialect: MySqlDialect {},
+        unparser_dialect: UnparserMySqlDialect {},
+        // top projection sort gets derived into a subquery
+        // for MySQL, this subquery needs an alias
+        expected: @"SELECT `j1_min` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, min(`ta`.`j1_id`) FROM `j1` AS `ta` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
+    );
     Ok(())
 }
 
 #[test]
-fn roundtrip_statement_with_dialect() -> Result<()> {
-    struct TestStatementWithDialect {
-        sql: &'static str,
-        expected: &'static str,
-        parser_dialect: Box<dyn Dialect>,
-        unparser_dialect: Box<dyn UnparserDialect>,
-    }
-    let tests: Vec<TestStatementWithDialect> = vec![
-        TestStatementWithDialect {
-            sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
-            expected:
-                // top projection sort gets derived into a subquery
-                // for MySQL, this subquery needs an alias
-                "SELECT `j1_min` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, min(`ta`.`j1_id`) FROM `j1` AS `ta` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
-            parser_dialect: Box::new(MySqlDialect {}),
-            unparser_dialect: Box::new(UnparserMySqlDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
-            expected:
-                // top projection sort still gets derived into a subquery in default dialect
-                // except for the default dialect, the subquery is left non-aliased
-                "SELECT j1_min FROM (SELECT min(ta.j1_id) AS j1_min, min(ta.j1_id) FROM j1 AS ta ORDER BY min(ta.j1_id) ASC NULLS LAST) LIMIT 10",
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select min(ta.j1_id) as j1_min, max(tb.j1_max) from j1 ta, (select distinct max(ta.j1_id) as j1_max from j1 ta order by max(ta.j1_id)) tb order by min(ta.j1_id) limit 10;",
-            expected:
-                "SELECT `j1_min`, `max(tb.j1_max)` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, max(`tb`.`j1_max`), min(`ta`.`j1_id`) FROM `j1` AS `ta` CROSS JOIN (SELECT `j1_max` FROM (SELECT DISTINCT max(`ta`.`j1_id`) AS `j1_max` FROM `j1` AS `ta`) AS `derived_distinct`) AS `tb` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
-            parser_dialect: Box::new(MySqlDialect {}),
-            unparser_dialect: Box::new(UnparserMySqlDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select j1_id from (select 1 as j1_id);",
-            expected:
-                "SELECT `j1_id` FROM (SELECT 1 AS `j1_id`) AS `derived_projection`",
-            parser_dialect: Box::new(MySqlDialect {}),
-            unparser_dialect: Box::new(UnparserMySqlDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select j1_id from (select j1_id from j1 limit 10);",
-            expected:
-                "SELECT `j1`.`j1_id` FROM (SELECT `j1`.`j1_id` FROM `j1` LIMIT 10) AS `derived_limit`",
-            parser_dialect: Box::new(MySqlDialect {}),
-            unparser_dialect: Box::new(UnparserMySqlDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select ta.j1_id from j1 ta order by j1_id limit 10;",
-            expected:
-                "SELECT `ta`.`j1_id` FROM `j1` AS `ta` ORDER BY `ta`.`j1_id` ASC LIMIT 10",
-            parser_dialect: Box::new(MySqlDialect {}),
-            unparser_dialect: Box::new(UnparserMySqlDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "select ta.j1_id from j1 ta order by j1_id limit 10;",
-            expected: r#"SELECT ta.j1_id FROM j1 AS ta ORDER BY ta.j1_id ASC NULLS LAST LIMIT 10"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT j1_id FROM j1
+fn roundtrip_statement_with_dialect_2() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select min(ta.j1_id) as j1_min from j1 ta order by min(ta.j1_id) limit 10;",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        // top projection sort still gets derived into a subquery in default dialect
+        // except for the default dialect, the subquery is left non-aliased
+        expected: @"SELECT j1_min FROM (SELECT min(ta.j1_id) AS j1_min, min(ta.j1_id) FROM j1 AS ta ORDER BY min(ta.j1_id) ASC NULLS LAST) LIMIT 10",
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_3() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select min(ta.j1_id) as j1_min, max(tb.j1_max) from j1 ta, (select distinct max(ta.j1_id) as j1_max from j1 ta order by max(ta.j1_id)) tb order by min(ta.j1_id) limit 10;",
+        parser_dialect: MySqlDialect {},
+        unparser_dialect: UnparserMySqlDialect {},
+        expected: @"SELECT `j1_min`, `max(tb.j1_max)` FROM (SELECT min(`ta`.`j1_id`) AS `j1_min`, max(`tb`.`j1_max`), min(`ta`.`j1_id`) FROM `j1` AS `ta` CROSS JOIN (SELECT `j1_max` FROM (SELECT DISTINCT max(`ta`.`j1_id`) AS `j1_max` FROM `j1` AS `ta`) AS `derived_distinct`) AS `tb` ORDER BY min(`ta`.`j1_id`) ASC) AS `derived_sort` LIMIT 10",
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_4() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select j1_id from (select 1 as j1_id);",
+        parser_dialect: MySqlDialect {},
+        unparser_dialect: UnparserMySqlDialect {},
+        expected: @"SELECT `j1_id` FROM (SELECT 1 AS `j1_id`) AS `derived_projection`",
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_5() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select j1_id from (select j1_id from j1 limit 10);",
+        parser_dialect: MySqlDialect {},
+        unparser_dialect: UnparserMySqlDialect {},
+        expected: @"SELECT `j1`.`j1_id` FROM (SELECT `j1`.`j1_id` FROM `j1` LIMIT 10) AS `derived_limit`",
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_6() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select ta.j1_id from j1 ta order by j1_id limit 10;",
+        parser_dialect: MySqlDialect {},
+        unparser_dialect: UnparserMySqlDialect {},
+        expected: @"SELECT `ta`.`j1_id` FROM `j1` AS `ta` ORDER BY `ta`.`j1_id` ASC LIMIT 10",
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_7() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "select ta.j1_id from j1 ta order by j1_id limit 10;",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT ta.j1_id FROM j1 AS ta ORDER BY ta.j1_id ASC NULLS LAST LIMIT 10"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_8() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT j1_id FROM j1
                   UNION ALL
                   SELECT tb.j2_id as j1_id FROM j2 tb
                   ORDER BY j1_id
                   LIMIT 10;",
-            expected: r#"SELECT j1.j1_id FROM j1 UNION ALL SELECT tb.j2_id AS j1_id FROM j2 AS tb ORDER BY j1_id ASC NULLS LAST LIMIT 10"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        // Test query with derived tables that put distinct,sort,limit on the wrong level
-        TestStatementWithDialect {
-            sql: "SELECT j1_string from j1 order by j1_id",
-            expected: r#"SELECT j1.j1_string FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT j1_string AS a from j1 order by j1_id",
-            expected: r#"SELECT j1.j1_string AS a FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT j1_string from j1 join j2 on j1.j1_id = j2.j2_id order by j1_id",
-            expected: r#"SELECT j1.j1_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT j1.j1_id FROM j1 UNION ALL SELECT tb.j2_id AS j1_id FROM j2 AS tb ORDER BY j1_id ASC NULLS LAST LIMIT 10"#,
+    );
+    Ok(())
+}
+
+// Test query with derived tables that put distinct,sort,limit on the wrong level
+#[test]
+fn roundtrip_statement_with_dialect_9() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT j1_string from j1 order by j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT j1.j1_string FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_10() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT j1_string AS a from j1 order by j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT j1.j1_string AS a FROM j1 ORDER BY j1.j1_id ASC NULLS LAST"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_11() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT j1_string from j1 join j2 on j1.j1_id = j2.j2_id order by j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT j1.j1_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id ASC NULLS LAST"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_12() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "
                 SELECT
                   j1_string,
                   j2_string
@@ -397,13 +476,18 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
                   ) abc
                 ORDER BY
                   abc.j2_string",
-            expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        // more tests around subquery/derived table roundtrip
-        TestStatementWithDialect {
-            sql: "SELECT string_count FROM (
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT DISTINCT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+    );
+    Ok(())
+}
+
+// more tests around subquery/derived table roundtrip
+#[test]
+fn roundtrip_statement_with_dialect_13() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT string_count FROM (
                     SELECT
                         j1_id,
                         min(j2_string)
@@ -414,12 +498,17 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
                         j1_id
                 ) AS agg (id, string_count)
             ",
-            expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT OUTER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT agg.string_count FROM (SELECT j1.j1_id, min(j2.j2_string) FROM j1 LEFT OUTER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_14() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "
                 SELECT
                   j1_string,
                   j2_string
@@ -443,13 +532,18 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
                   ) abc
                 ORDER BY
                   abc.j2_string",
-            expected: r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        // Test query that order by columns are not in select columns
-        TestStatementWithDialect {
-            sql: "
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT abc.j1_string, abc.j2_string FROM (SELECT j1.j1_id, j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id, j1.j1_string, j2.j2_string ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+    );
+    Ok(())
+}
+
+// Test query that order by columns are not in select columns
+#[test]
+fn roundtrip_statement_with_dialect_15() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "
                 SELECT
                   j1_string
                 FROM
@@ -468,221 +562,364 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
                   ) abc
                 ORDER BY
                   j2_string",
-            expected: r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
-            expected: r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        // Test query that has calculation in derived table with columns
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT j1_id + 1 * 3 from j1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT (j1.j1_id + (1 * 3)) FROM j1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        // Test query that has limit/distinct/order in derived table with columns
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT distinct (j1_id + 1 * 3) FROM j1 LIMIT 1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT DISTINCT (j1.j1_id + (1 * 3)) FROM j1 LIMIT 1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT j1_id + 1 FROM j1 ORDER BY j1_id DESC LIMIT 1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT (j1.j1_id + 1) FROM j1 ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT CAST((CAST(j1_id as BIGINT) + 1) as int) * 10 FROM j1 LIMIT 1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT (CAST((CAST(j1.j1_id AS BIGINT) + 1) AS INTEGER) * 10) FROM j1 LIMIT 1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT id FROM (SELECT CAST(j1_id as BIGINT) + 1 FROM j1 ORDER BY j1_id LIMIT 1) AS c (id)",
-            expected: r#"SELECT c.id FROM (SELECT (CAST(j1.j1_id AS BIGINT) + 1) FROM j1 ORDER BY j1.j1_id ASC NULLS LAST LIMIT 1) AS c (id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
-            expected: r#"SELECT temp_j.id2 FROM (SELECT j1.j1_id, j1.j1_string FROM j1) AS temp_j (id2, string2)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
-            expected: r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2`, `j1`.`j1_string` AS `string2` FROM `j1`) AS `temp_j`"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(SqliteDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM (SELECT j1_id + 1 FROM j1) AS temp_j(id2)",
-            expected: r#"SELECT `temp_j`.`id2` FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(SqliteDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM (SELECT j1_id FROM j1 LIMIT 1) AS temp_j(id2)",
-            expected: r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(SqliteDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3])",
-            expected: r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))" FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
-            expected: r#"SELECT t1.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]), j1",
-            expected: r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))", j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") CROSS JOIN j1"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
-            expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)",
-            expected: r#"SELECT u.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) UNION ALL SELECT u.c1 FROM (SELECT UNNEST([4, 5, 6]) AS "UNNEST(make_array(Int64(4),Int64(5),Int64(6)))") AS u (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3])",
-            expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
-            expected: r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
-            expected: r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]), j1",
-            expected: r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
-            expected: r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)",
-            expected: r#"SELECT u.c1 FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT u.c1 FROM UNNEST([4, 5, 6]) AS u (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT UNNEST([1,2,3])",
-            expected: r#"SELECT * FROM UNNEST([1, 2, 3])"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT UNNEST([1,2,3]) as c1",
-            expected: r#"SELECT UNNEST([1, 2, 3]) AS c1"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT UNNEST([1,2,3]), 1",
-            expected: r#"SELECT UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))), Int64(1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
-            expected: r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
-            expected: r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT unnest([1, 2, 3, 4]) from unnest([1, 2, 3]);",
-            expected: r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(CustomDialectBuilder::default().with_unnest_as_table_factor(true).build()),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
-            expected: r#"SELECT u.array_col, u.struct_col, "UNNEST(outer_ref(u.array_col))" FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-        TestStatementWithDialect {
-            sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
-            expected: r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#,
-            parser_dialect: Box::new(GenericDialect {}),
-            unparser_dialect: Box::new(UnparserDefaultDialect {}),
-        },
-    ];
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT abc.j1_string FROM (SELECT j1.j1_string, j2.j2_string FROM j1 INNER JOIN j2 ON (j1.j1_id = j2.j2_id) ORDER BY j1.j1_id DESC NULLS FIRST, j2.j2_id DESC NULLS FIRST LIMIT 10) AS abc ORDER BY abc.j2_string ASC NULLS LAST"#,
+    );
+    Ok(())
+}
 
-    for query in tests {
-        let statement = Parser::new(&*query.parser_dialect)
-            .try_with_sql(query.sql)?
-            .parse_statement()?;
+#[test]
+fn roundtrip_statement_with_dialect_16() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
+    );
+    Ok(())
+}
 
-        let state = MockSessionState::default()
-            .with_aggregate_function(max_udaf())
-            .with_aggregate_function(min_udaf())
-            .with_expr_planner(Arc::new(CoreFunctionPlanner::default()))
-            .with_expr_planner(Arc::new(NestedFunctionPlanner));
+#[test]
+fn roundtrip_statement_with_dialect_17() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
+    );
+    Ok(())
+}
 
-        let context = MockContextProvider { state };
-        let sql_to_rel = SqlToRel::new(&context);
-        let plan = sql_to_rel
-            .sql_statement_to_plan(statement)
-            .unwrap_or_else(|e| panic!("Failed to parse sql: {}\n{e}", query.sql));
+// Test query that has calculation in derived table with columns
+#[test]
+fn roundtrip_statement_with_dialect_18() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT j1_id + 1 * 3 from j1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT (j1.j1_id + (1 * 3)) FROM j1) AS c (id)"#,
+    );
+    Ok(())
+}
 
-        let unparser = Unparser::new(&*query.unparser_dialect);
-        let roundtrip_statement = unparser.plan_to_sql(&plan)?;
+// Test query that has limit/distinct/order in derived table with columns
+#[test]
+fn roundtrip_statement_with_dialect_19() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT distinct (j1_id + 1 * 3) FROM j1 LIMIT 1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT DISTINCT (j1.j1_id + (1 * 3)) FROM j1 LIMIT 1) AS c (id)"#,
+    );
+    Ok(())
+}
 
-        let actual = &roundtrip_statement.to_string();
-        println!("roundtrip sql: {actual}");
-        println!("plan {}", plan.display_indent());
+#[test]
+fn roundtrip_statement_with_dialect_20() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT j1_id + 1 FROM j1 ORDER BY j1_id DESC LIMIT 1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT (j1.j1_id + 1) FROM j1 ORDER BY j1.j1_id DESC NULLS FIRST LIMIT 1) AS c (id)"#,
+    );
+    Ok(())
+}
 
-        assert_eq!(query.expected, actual);
-    }
+#[test]
+fn roundtrip_statement_with_dialect_21() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT CAST((CAST(j1_id as BIGINT) + 1) as int) * 10 FROM j1 LIMIT 1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT (CAST((CAST(j1.j1_id AS BIGINT) + 1) AS INTEGER) * 10) FROM j1 LIMIT 1) AS c (id)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_22() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT id FROM (SELECT CAST(j1_id as BIGINT) + 1 FROM j1 ORDER BY j1_id LIMIT 1) AS c (id)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT c.id FROM (SELECT (CAST(j1.j1_id AS BIGINT) + 1) FROM j1 ORDER BY j1.j1_id ASC NULLS LAST LIMIT 1) AS c (id)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_23() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT temp_j.id2 FROM (SELECT j1.j1_id, j1.j1_string FROM j1) AS temp_j (id2, string2)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_24() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT temp_j.id2 FROM (SELECT j1_id, j1_string FROM j1) AS temp_j(id2, string2)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: SqliteDialect {},
+        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2`, `j1`.`j1_string` AS `string2` FROM `j1`) AS `temp_j`"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_25() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM (SELECT j1_id + 1 FROM j1) AS temp_j(id2)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: SqliteDialect {},
+        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT (`j1`.`j1_id` + 1) AS `id2` FROM `j1`) AS `temp_j`"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_26() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM (SELECT j1_id FROM j1 LIMIT 1) AS temp_j(id2)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: SqliteDialect {},
+        expected: @r#"SELECT `temp_j`.`id2` FROM (SELECT `j1`.`j1_id` AS `id2` FROM `j1` LIMIT 1) AS `temp_j`"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_27() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))" FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))")"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_28() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT t1.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS t1 (c1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_29() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]), j1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))", j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") CROSS JOIN j1"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_30() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_31() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT u.c1 FROM (SELECT UNNEST([1, 2, 3]) AS "UNNEST(make_array(Int64(1),Int64(2),Int64(3)))") AS u (c1) UNION ALL SELECT u.c1 FROM (SELECT UNNEST([4, 5, 6]) AS "UNNEST(make_array(Int64(4),Int64(5),Int64(6)))") AS u (c1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_32() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))) FROM UNNEST([1, 2, 3])"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_33() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT u.array_col, u.struct_col, "UNNEST(outer_ref(u.array_col))" FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))")"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_34() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) AS t1 (c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT t1.c1 FROM UNNEST([1, 2, 3]) AS t1 (c1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_35() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]), j1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT UNNEST(make_array(Int64(1),Int64(2),Int64(3))), j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) CROSS JOIN j1"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_36() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) JOIN j1 ON u.c1 = j1.j1_id",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT u.c1, j1.j1_id, j1.j1_string FROM UNNEST([1, 2, 3]) AS u (c1) INNER JOIN j1 ON (u.c1 = j1.j1_id)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_37() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM UNNEST([1,2,3]) u(c1) UNION ALL SELECT * FROM UNNEST([4,5,6]) u(c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT u.c1 FROM UNNEST([1, 2, 3]) AS u (c1) UNION ALL SELECT u.c1 FROM UNNEST([4, 5, 6]) AS u (c1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_38() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3])",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT * FROM UNNEST([1, 2, 3])"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_39() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3]) as c1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT UNNEST([1, 2, 3]) AS c1"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_40() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT UNNEST([1,2,3]), 1",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT UNNEST([1, 2, 3]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3))), Int64(1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_41() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT u.array_col, u.struct_col, UNNEST(outer_ref(u.array_col)) FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_42() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN UNNEST(u.array_col) AS t1 (c1)"#,
+    );
+    Ok(())
+}
+
+#[test]
+fn roundtrip_statement_with_dialect_43() -> Result<(), DataFusionError> {
+    let unparser = CustomDialectBuilder::default()
+        .with_unnest_as_table_factor(true)
+        .build();
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT unnest([1, 2, 3, 4]) from unnest([1, 2, 3]);",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: unparser,
+        expected: @r#"SELECT UNNEST([1, 2, 3, 4]) AS UNNEST(make_array(Int64(1),Int64(2),Int64(3),Int64(4))) FROM UNNEST([1, 2, 3])"#,
+    );
+    Ok(())
+}
 
+#[test]
+fn roundtrip_statement_with_dialect_45() -> Result<(), DataFusionError> {
+    roundtrip_statement_with_dialect_helper!(
+        sql: "SELECT * FROM unnest_table u, UNNEST(u.array_col) AS t1 (c1)",
+        parser_dialect: GenericDialect {},
+        unparser_dialect: UnparserDefaultDialect {},
+        expected: @r#"SELECT u.array_col, u.struct_col, t1.c1 FROM unnest_table AS u CROSS JOIN LATERAL (SELECT UNNEST(u.array_col) AS "UNNEST(outer_ref(u.array_col))") AS t1 (c1)"#,
+    );
     Ok(())
 }
 
@@ -700,13 +937,14 @@ fn test_unnest_logical_plan() -> Result<()> {
     };
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
-    let expected = r#"
+    assert_snapshot!(
+        plan,
+        @r#"
 Projection: __unnest_placeholder(unnest_table.struct_col).field1, __unnest_placeholder(unnest_table.struct_col).field2, __unnest_placeholder(unnest_table.array_col,depth=1) AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
   Unnest: lists[__unnest_placeholder(unnest_table.array_col)|depth=1] structs[__unnest_placeholder(unnest_table.struct_col)]
     Projection: unnest_table.struct_col AS __unnest_placeholder(unnest_table.struct_col), unnest_table.array_col AS __unnest_placeholder(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col
-      TableScan: unnest_table"#.trim_start();
-
-    assert_eq!(plan.to_string(), expected);
+      TableScan: unnest_table"#
+    );
 
     Ok(())
 }
@@ -726,121 +964,248 @@ fn test_aggregation_without_projection() -> Result<()> {
 
     let unparser = Unparser::default();
     let statement = unparser.plan_to_sql(&plan)?;
-
-    let actual = &statement.to_string();
-
-    assert_eq!(
-        actual,
-        r#"SELECT sum(users.age), users."name" FROM users GROUP BY users."name""#
+    assert_snapshot!(
+        statement,
+        @r#"SELECT sum(users.age), users."name" FROM users GROUP BY users."name""#
     );
 
     Ok(())
 }
 
-#[test]
-fn test_table_references_in_plan_to_sql() {
-    fn test(table_name: &str, expected_sql: &str, dialect: &impl UnparserDialect) {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Utf8, false),
-            Field::new("value", DataType::Utf8, false),
-        ]);
-        let plan = table_scan(Some(table_name), &schema, None)
-            .unwrap()
-            .project(vec![col("id"), col("value")])
-            .unwrap()
-            .build()
-            .unwrap();
-
-        let unparser = Unparser::new(dialect);
-        let sql = unparser.plan_to_sql(&plan).unwrap();
-
-        assert_eq!(sql.to_string(), expected_sql)
-    }
+/// return a schema with two string columns: "id" and "value"
+fn test_schema() -> Schema {
+    Schema::new(vec![
+        Field::new("id", DataType::Utf8, false),
+        Field::new("value", DataType::Utf8, false),
+    ])
+}
 
-    test(
-        "catalog.schema.table",
-        r#"SELECT "table".id, "table"."value" FROM "catalog"."schema"."table""#,
+#[test]
+fn test_table_references_in_plan_to_sql_1() {
+    let table_name = "catalog.schema.table";
+    let schema = test_schema();
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &DefaultDialect {},
     );
-    test(
-        "schema.table",
-        r#"SELECT "table".id, "table"."value" FROM "schema"."table""#,
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "table".id, "table"."value" FROM "catalog"."schema"."table""#
+    );
+}
+
+#[test]
+fn test_table_references_in_plan_to_sql_2() {
+    let table_name = "schema.table";
+    let schema = test_schema();
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &DefaultDialect {},
     );
-    test(
-        "table",
-        r#"SELECT "table".id, "table"."value" FROM "table""#,
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "table".id, "table"."value" FROM "schema"."table""#
+    );
+}
+
+#[test]
+fn test_table_references_in_plan_to_sql_3() {
+    let table_name = "table";
+    let schema = test_schema();
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &DefaultDialect {},
     );
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "table".id, "table"."value" FROM "table""#
+    );
+}
 
+#[test]
+fn test_table_references_in_plan_to_sql_4() {
+    let table_name = "catalog.schema.table";
+    let schema = test_schema();
     let custom_dialect = CustomDialectBuilder::default()
         .with_full_qualified_col(true)
         .with_identifier_quote_style('"')
         .build();
 
-    test(
-        "catalog.schema.table",
-        r#"SELECT "catalog"."schema"."table"."id", "catalog"."schema"."table"."value" FROM "catalog"."schema"."table""#,
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &custom_dialect,
     );
-    test(
-        "schema.table",
-        r#"SELECT "schema"."table"."id", "schema"."table"."value" FROM "schema"."table""#,
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "catalog"."schema"."table"."id", "catalog"."schema"."table"."value" FROM "catalog"."schema"."table""#
+    );
+}
+
+#[test]
+fn test_table_references_in_plan_to_sql_5() {
+    let table_name = "schema.table";
+    let schema = test_schema();
+    let custom_dialect = CustomDialectBuilder::default()
+        .with_full_qualified_col(true)
+        .with_identifier_quote_style('"')
+        .build();
+
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &custom_dialect,
     );
-    test(
-        "table",
-        r#"SELECT "table"."id", "table"."value" FROM "table""#,
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "schema"."table"."id", "schema"."table"."value" FROM "schema"."table""#
+    );
+}
+
+#[test]
+fn test_table_references_in_plan_to_sql_6() {
+    let table_name = "table";
+    let schema = test_schema();
+    let custom_dialect = CustomDialectBuilder::default()
+        .with_full_qualified_col(true)
+        .with_identifier_quote_style('"')
+        .build();
+
+    let sql = table_references_in_plan_helper(
+        table_name,
+        schema,
+        vec![col("id"), col("value")],
         &custom_dialect,
     );
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "table"."id", "table"."value" FROM "table""#
+    );
+}
+
+fn table_references_in_plan_helper(
+    table_name: &str,
+    table_schema: Schema,
+    expr: impl IntoIterator<Item = impl Into<datafusion_expr::select_expr::SelectExpr>>,
+    dialect: &impl UnparserDialect,
+) -> Statement {
+    let plan = table_scan(Some(table_name), &table_schema, None)
+        .unwrap()
+        .project(expr)
+        .unwrap()
+        .build()
+        .unwrap();
+    let unparser = Unparser::new(dialect);
+    unparser.plan_to_sql(&plan).unwrap()
 }
 
 #[test]
-fn test_table_scan_with_none_projection_in_plan_to_sql() {
-    fn test(table_name: &str, expected_sql: &str) {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Utf8, false),
-            Field::new("value", DataType::Utf8, false),
-        ]);
+fn test_table_scan_with_none_projection_in_plan_to_sql_1() {
+    let schema = test_schema();
+    let table_name = "catalog.schema.table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name, schema, None,
+    );
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM "catalog"."schema"."table""#
+    );
+}
 
-        let plan = table_scan(Some(table_name), &schema, None)
-            .unwrap()
-            .build()
-            .unwrap();
-        let sql = plan_to_sql(&plan).unwrap();
-        assert_eq!(sql.to_string(), expected_sql)
-    }
+#[test]
+fn test_table_scan_with_none_projection_in_plan_to_sql_2() {
+    let schema = test_schema();
+    let table_name = "schema.table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name, schema, None,
+    );
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM "schema"."table""#
+    );
+}
 
-    test(
-        "catalog.schema.table",
-        r#"SELECT * FROM "catalog"."schema"."table""#,
+#[test]
+fn test_table_scan_with_none_projection_in_plan_to_sql_3() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name, schema, None,
+    );
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM "table""#
     );
-    test("schema.table", r#"SELECT * FROM "schema"."table""#);
-    test("table", r#"SELECT * FROM "table""#);
 }
 
 #[test]
-fn test_table_scan_with_empty_projection_in_plan_to_sql() {
-    fn test(table_name: &str, expected_sql: &str) {
-        let schema = Schema::new(vec![
-            Field::new("id", DataType::Utf8, false),
-            Field::new("value", DataType::Utf8, false),
-        ]);
+fn test_table_scan_with_empty_projection_in_plan_to_sql_1() {
+    let schema = test_schema();
+    let table_name = "catalog.schema.table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name,
+        schema,
+        Some(vec![]),
+    );
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT 1 FROM "catalog"."schema"."table""#
+    );
+}
 
-        let plan = table_scan(Some(table_name), &schema, Some(vec![]))
-            .unwrap()
-            .build()
-            .unwrap();
-        let sql = plan_to_sql(&plan).unwrap();
-        assert_eq!(sql.to_string(), expected_sql)
-    }
+#[test]
+fn test_table_scan_with_empty_projection_in_plan_to_sql_2() {
+    let schema = test_schema();
+    let table_name = "schema.table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name,
+        schema,
+        Some(vec![]),
+    );
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT 1 FROM "schema"."table""#
+    );
+}
 
-    test(
-        "catalog.schema.table",
-        r#"SELECT 1 FROM "catalog"."schema"."table""#,
+#[test]
+fn test_table_scan_with_empty_projection_in_plan_to_sql_3() {
+    let schema = test_schema();
+    let table_name = "table";
+    let plan = table_scan_with_empty_projection_and_none_projection_helper(
+        table_name,
+        schema,
+        Some(vec![]),
     );
-    test("schema.table", r#"SELECT 1 FROM "schema"."table""#);
-    test("table", r#"SELECT 1 FROM "table""#);
+    let sql = plan_to_sql(&plan).unwrap();
+    assert_snapshot!(
+        sql,
+        @r#"SELECT 1 FROM "table""#
+    );
+}
+
+fn table_scan_with_empty_projection_and_none_projection_helper(
+    table_name: &str,
+    table_schema: Schema,
+    projection: Option<Vec<usize>>,
+) -> LogicalPlan {
+    table_scan(Some(table_name), &table_schema, projection)
+        .unwrap()
+        .build()
+        .unwrap()
 }
 
 #[test]
@@ -920,12 +1285,12 @@ fn test_pretty_roundtrip() -> Result<()> {
     Ok(())
 }
 
-fn sql_round_trip<D>(dialect: D, query: &str, expect: &str)
+fn generate_round_trip_statement<D>(dialect: D, sql: &str) -> Statement
 where
     D: Dialect,
 {
     let statement = Parser::new(&dialect)
-        .try_with_sql(query)
+        .try_with_sql(sql)
         .unwrap()
         .parse_statement()
         .unwrap();
@@ -942,8 +1307,7 @@ where
     let sql_to_rel = SqlToRel::new(&context);
     let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();
 
-    let roundtrip_statement = plan_to_sql(&plan).unwrap();
-    assert_eq!(roundtrip_statement.to_string(), expect);
+    plan_to_sql(&plan).unwrap()
 }
 
 #[test]
@@ -958,7 +1322,10 @@ fn test_table_scan_alias() -> Result<()> {
         .alias("a")?
         .build()?;
     let sql = plan_to_sql(&plan)?;
-    assert_eq!(sql.to_string(), "SELECT * FROM (SELECT t1.id FROM t1) AS a");
+    assert_snapshot!(
+        sql,
+        @"SELECT * FROM (SELECT t1.id FROM t1) AS a"
+    );
 
     let plan = table_scan(Some("t1"), &schema, None)?
         .project(vec![col("id")])?
@@ -966,7 +1333,10 @@ fn test_table_scan_alias() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&plan)?;
-    assert_eq!(sql.to_string(), "SELECT * FROM (SELECT t1.id FROM t1) AS a");
+    assert_snapshot!(
+        sql,
+        @"SELECT * FROM (SELECT t1.id FROM t1) AS a"
+    );
 
     let plan = table_scan(Some("t1"), &schema, None)?
         .filter(col("id").gt(lit(5)))?
@@ -974,9 +1344,9 @@ fn test_table_scan_alias() -> Result<()> {
         .alias("a")?
         .build()?;
     let sql = plan_to_sql(&plan)?;
-    assert_eq!(
-        sql.to_string(),
-        "SELECT * FROM (SELECT t1.id FROM t1 WHERE (t1.id > 5)) AS a"
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM (SELECT t1.id FROM t1 WHERE (t1.id > 5)) AS a"#
     );
 
     let table_scan_with_two_filter = table_scan_with_filters(
@@ -989,9 +1359,9 @@ fn test_table_scan_alias() -> Result<()> {
     .alias("a")?
     .build()?;
     let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
-    assert_eq!(
-        table_scan_with_two_filter.to_string(),
-        "SELECT a.id FROM t1 AS a WHERE ((a.id > 1) AND (a.age < 2))"
+    assert_snapshot!(
+        table_scan_with_two_filter,
+        @r#"SELECT a.id FROM t1 AS a WHERE ((a.id > 1) AND (a.age < 2))"#
     );
 
     let table_scan_with_fetch =
@@ -1000,9 +1370,9 @@ fn test_table_scan_alias() -> Result<()> {
             .alias("a")?
             .build()?;
     let table_scan_with_fetch = plan_to_sql(&table_scan_with_fetch)?;
-    assert_eq!(
-        table_scan_with_fetch.to_string(),
-        "SELECT a.id FROM (SELECT * FROM t1 LIMIT 10) AS a"
+    assert_snapshot!(
+        table_scan_with_fetch,
+        @r#"SELECT a.id FROM (SELECT * FROM t1 LIMIT 10) AS a"#
     );
 
     let table_scan_with_pushdown_all = table_scan_with_filter_and_fetch(
@@ -1016,9 +1386,9 @@ fn test_table_scan_alias() -> Result<()> {
     .alias("a")?
     .build()?;
     let table_scan_with_pushdown_all = plan_to_sql(&table_scan_with_pushdown_all)?;
-    assert_eq!(
-        table_scan_with_pushdown_all.to_string(),
-        "SELECT a.id FROM (SELECT a.id, a.age FROM t1 AS a WHERE (a.id > 1) LIMIT 10) AS a"
+    assert_snapshot!(
+        table_scan_with_pushdown_all,
+        @r#"SELECT a.id FROM (SELECT a.id, a.age FROM t1 AS a WHERE (a.id > 1) LIMIT 10) AS a"#
     );
     Ok(())
 }
@@ -1032,18 +1402,24 @@ fn test_table_scan_pushdown() -> Result<()> {
     let scan_with_projection =
         table_scan(Some("t1"), &schema, Some(vec![0, 1]))?.build()?;
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
-    assert_eq!(
-        scan_with_projection.to_string(),
-        "SELECT t1.id, t1.age FROM t1"
+    assert_snapshot!(
+        scan_with_projection,
+        @r#"SELECT t1.id, t1.age FROM t1"#
     );
 
     let scan_with_projection = table_scan(Some("t1"), &schema, Some(vec![1]))?.build()?;
     let scan_with_projection = plan_to_sql(&scan_with_projection)?;
-    assert_eq!(scan_with_projection.to_string(), "SELECT t1.age FROM t1");
+    assert_snapshot!(
+        scan_with_projection,
+        @r#"SELECT t1.age FROM t1"#
+    );
 
     let scan_with_no_projection = table_scan(Some("t1"), &schema, None)?.build()?;
     let scan_with_no_projection = plan_to_sql(&scan_with_no_projection)?;
-    assert_eq!(scan_with_no_projection.to_string(), "SELECT * FROM t1");
+    assert_snapshot!(
+        scan_with_no_projection,
+        @r#"SELECT * FROM t1"#
+    );
 
     let table_scan_with_projection_alias =
         table_scan(Some("t1"), &schema, Some(vec![0, 1]))?
@@ -1051,9 +1427,9 @@ fn test_table_scan_pushdown() -> Result<()> {
             .build()?;
     let table_scan_with_projection_alias =
         plan_to_sql(&table_scan_with_projection_alias)?;
-    assert_eq!(
-        table_scan_with_projection_alias.to_string(),
-        "SELECT ta.id, ta.age FROM t1 AS ta"
+    assert_snapshot!(
+        table_scan_with_projection_alias,
+        @r#"SELECT ta.id, ta.age FROM t1 AS ta"#
     );
 
     let table_scan_with_projection_alias =
@@ -1062,9 +1438,9 @@ fn test_table_scan_pushdown() -> Result<()> {
             .build()?;
     let table_scan_with_projection_alias =
         plan_to_sql(&table_scan_with_projection_alias)?;
-    assert_eq!(
-        table_scan_with_projection_alias.to_string(),
-        "SELECT ta.age FROM t1 AS ta"
+    assert_snapshot!(
+        table_scan_with_projection_alias,
+        @r#"SELECT ta.age FROM t1 AS ta"#
     );
 
     let table_scan_with_no_projection_alias = table_scan(Some("t1"), &schema, None)?
@@ -1072,9 +1448,9 @@ fn test_table_scan_pushdown() -> Result<()> {
         .build()?;
     let table_scan_with_no_projection_alias =
         plan_to_sql(&table_scan_with_no_projection_alias)?;
-    assert_eq!(
-        table_scan_with_no_projection_alias.to_string(),
-        "SELECT * FROM t1 AS ta"
+    assert_snapshot!(
+        table_scan_with_no_projection_alias,
+        @r#"SELECT * FROM t1 AS ta"#
     );
 
     let query_from_table_scan_with_projection = LogicalPlanBuilder::from(
@@ -1084,9 +1460,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let query_from_table_scan_with_projection =
         plan_to_sql(&query_from_table_scan_with_projection)?;
-    assert_eq!(
-        query_from_table_scan_with_projection.to_string(),
-        "SELECT t1.id, t1.age FROM t1"
+    assert_snapshot!(
+        query_from_table_scan_with_projection,
+        @r#"SELECT t1.id, t1.age FROM t1"#
     );
 
     let query_from_table_scan_with_two_projections = LogicalPlanBuilder::from(
@@ -1097,9 +1473,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let query_from_table_scan_with_two_projections =
         plan_to_sql(&query_from_table_scan_with_two_projections)?;
-    assert_eq!(
-        query_from_table_scan_with_two_projections.to_string(),
-        "SELECT t1.id, t1.age FROM (SELECT t1.id, t1.age FROM t1)"
+    assert_snapshot!(
+        query_from_table_scan_with_two_projections,
+        @r#"SELECT t1.id, t1.age FROM (SELECT t1.id, t1.age FROM t1)"#
     );
 
     let table_scan_with_filter = table_scan_with_filters(
@@ -1110,9 +1486,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     )?
     .build()?;
     let table_scan_with_filter = plan_to_sql(&table_scan_with_filter)?;
-    assert_eq!(
-        table_scan_with_filter.to_string(),
-        "SELECT * FROM t1 WHERE (t1.id > t1.age)"
+    assert_snapshot!(
+        table_scan_with_filter,
+        @r#"SELECT * FROM t1 WHERE (t1.id > t1.age)"#
     );
 
     let table_scan_with_two_filter = table_scan_with_filters(
@@ -1123,9 +1499,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     )?
     .build()?;
     let table_scan_with_two_filter = plan_to_sql(&table_scan_with_two_filter)?;
-    assert_eq!(
-        table_scan_with_two_filter.to_string(),
-        "SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"
+    assert_snapshot!(
+        table_scan_with_two_filter,
+        @r#"SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"#
     );
 
     let table_scan_with_filter_alias = table_scan_with_filters(
@@ -1137,9 +1513,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .alias("ta")?
     .build()?;
     let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
-    assert_eq!(
-        table_scan_with_filter_alias.to_string(),
-        "SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
+    assert_snapshot!(
+        table_scan_with_filter_alias,
+        @r#"SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"#
     );
 
     let table_scan_with_projection_and_filter = table_scan_with_filters(
@@ -1151,9 +1527,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_projection_and_filter =
         plan_to_sql(&table_scan_with_projection_and_filter)?;
-    assert_eq!(
-        table_scan_with_projection_and_filter.to_string(),
-        "SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age)"
+    assert_snapshot!(
+        table_scan_with_projection_and_filter,
+        @r#"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age)"#
     );
 
     let table_scan_with_projection_and_filter = table_scan_with_filters(
@@ -1165,18 +1541,18 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_projection_and_filter =
         plan_to_sql(&table_scan_with_projection_and_filter)?;
-    assert_eq!(
-        table_scan_with_projection_and_filter.to_string(),
-        "SELECT t1.age FROM t1 WHERE (t1.id > t1.age)"
+    assert_snapshot!(
+        table_scan_with_projection_and_filter,
+        @r#"SELECT t1.age FROM t1 WHERE (t1.id > t1.age)"#
     );
 
     let table_scan_with_inline_fetch =
         table_scan_with_filter_and_fetch(Some("t1"), &schema, None, vec![], Some(10))?
             .build()?;
     let table_scan_with_inline_fetch = plan_to_sql(&table_scan_with_inline_fetch)?;
-    assert_eq!(
-        table_scan_with_inline_fetch.to_string(),
-        "SELECT * FROM t1 LIMIT 10"
+    assert_snapshot!(
+        table_scan_with_inline_fetch,
+        @r#"SELECT * FROM t1 LIMIT 10"#
     );
 
     let table_scan_with_projection_and_inline_fetch = table_scan_with_filter_and_fetch(
@@ -1189,9 +1565,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .build()?;
     let table_scan_with_projection_and_inline_fetch =
         plan_to_sql(&table_scan_with_projection_and_inline_fetch)?;
-    assert_eq!(
-        table_scan_with_projection_and_inline_fetch.to_string(),
-        "SELECT t1.id, t1.age FROM t1 LIMIT 10"
+    assert_snapshot!(
+        table_scan_with_projection_and_inline_fetch,
+        @r#"SELECT t1.id, t1.age FROM t1 LIMIT 10"#
     );
 
     let table_scan_with_all = table_scan_with_filter_and_fetch(
@@ -1203,9 +1579,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     )?
     .build()?;
     let table_scan_with_all = plan_to_sql(&table_scan_with_all)?;
-    assert_eq!(
-        table_scan_with_all.to_string(),
-        "SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10"
+    assert_snapshot!(
+        table_scan_with_all,
+        @r#"SELECT t1.id, t1.age FROM t1 WHERE (t1.id > t1.age) LIMIT 10"#
     );
 
     let table_scan_with_additional_filter = table_scan_with_filters(
@@ -1217,9 +1593,9 @@ fn test_table_scan_pushdown() -> Result<()> {
     .filter(col("id").eq(lit(5)))?
     .build()?;
     let table_scan_with_filter = plan_to_sql(&table_scan_with_additional_filter)?;
-    assert_eq!(
-        table_scan_with_filter.to_string(),
-        "SELECT * FROM t1 WHERE (t1.id = 5) AND (t1.id > t1.age)"
+    assert_snapshot!(
+        table_scan_with_filter,
+        @r#"SELECT * FROM t1 WHERE (t1.id = 5) AND (t1.id > t1.age)"#
     );
 
     Ok(())
@@ -1238,9 +1614,9 @@ fn test_sort_with_push_down_fetch() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&plan)?;
-    assert_eq!(
-        format!("{}", sql),
-        "SELECT t1.id, t1.age FROM t1 ORDER BY t1.age ASC NULLS FIRST LIMIT 10"
+    assert_snapshot!(
+        sql,
+        @r#"SELECT t1.id, t1.age FROM t1 ORDER BY t1.age ASC NULLS FIRST LIMIT 10"#
     );
     Ok(())
 }
@@ -1284,10 +1660,10 @@ fn test_join_with_table_scan_filters() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&join_plan_with_filter)?;
-
-    let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND ("left"."name" LIKE 'some_name' AND (age > 10)))"#;
-
-    assert_eq!(sql.to_string(), expected_sql);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND ("left"."name" LIKE 'some_name' AND (age > 10)))"#
+    );
 
     let join_plan_no_filter = LogicalPlanBuilder::from(left_plan.clone())
         .join(
@@ -1299,10 +1675,10 @@ fn test_join_with_table_scan_filters() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&join_plan_no_filter)?;
-
-    let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND ("left"."name" LIKE 'some_name' AND (age > 10))"#;
-
-    assert_eq!(sql.to_string(), expected_sql);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND ("left"."name" LIKE 'some_name' AND (age > 10))"#
+    );
 
     let right_plan_with_filter = table_scan_with_filters(
         Some("right_table"),
@@ -1324,10 +1700,10 @@ fn test_join_with_table_scan_filters() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&join_plan_multiple_filters)?;
-
-    let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table."name" = 'before_join_filter_val')) AND (age > 10))) WHERE ("left"."name" = 'after_join_filter_val')"#;
-
-    assert_eq!(sql.to_string(), expected_sql);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table."name" = 'before_join_filter_val')) AND (age > 10))) WHERE ("left"."name" = 'after_join_filter_val')"#
+    );
 
     let right_plan_with_filter_schema = table_scan_with_filters(
         Some("right_table"),
@@ -1354,114 +1730,153 @@ fn test_join_with_table_scan_filters() -> Result<()> {
         .build()?;
 
     let sql = plan_to_sql(&join_plan_duplicated_filter)?;
-
-    let expected_sql = r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table.age > 10)) AND (right_table.age < 11)))"#;
-
-    assert_eq!(sql.to_string(), expected_sql);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT * FROM left_table AS "left" INNER JOIN right_table ON "left".id = right_table.id AND (("left".id > 5) AND (("left"."name" LIKE 'some_name' AND (right_table.age > 10)) AND (right_table.age < 11)))"#
+    );
 
     Ok(())
 }
 
 #[test]
 fn test_interval_lhs_eq() {
-    sql_round_trip(
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         "select interval '2 seconds' = interval '2 seconds'",
-        "SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')",
     );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')"#
+    )
 }
 
 #[test]
 fn test_interval_lhs_lt() {
-    sql_round_trip(
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         "select interval '2 seconds' < interval '2 seconds'",
-        "SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')",
     );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')"#
+    )
 }
 
 #[test]
 fn test_without_offset() {
-    sql_round_trip(MySqlDialect {}, "select 1", "SELECT 1");
+    let statement = generate_round_trip_statement(MySqlDialect {}, "select 1");
+    assert_snapshot!(
+        statement,
+        @r#"SELECT 1"#
+    )
 }
 
 #[test]
 fn test_with_offset0() {
-    sql_round_trip(MySqlDialect {}, "select 1 offset 0", "SELECT 1 OFFSET 0");
+    let statement = generate_round_trip_statement(MySqlDialect {}, "select 1 offset 0");
+    assert_snapshot!(
+        statement,
+        @r#"SELECT 1 OFFSET 0"#
+    )
 }
 
 #[test]
 fn test_with_offset95() {
-    sql_round_trip(MySqlDialect {}, "select 1 offset 95", "SELECT 1 OFFSET 95");
+    let statement = generate_round_trip_statement(MySqlDialect {}, "select 1 offset 95");
+    assert_snapshot!(
+        statement,
+        @r#"SELECT 1 OFFSET 95"#
+    )
 }
 
 #[test]
-fn test_order_by_to_sql() {
+fn test_order_by_to_sql_1() {
     // order by aggregation function
-    sql_round_trip(
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         r#"SELECT id, first_name, SUM(id) FROM person GROUP BY id, first_name ORDER BY SUM(id) ASC, first_name DESC, id, first_name LIMIT 10"#,
-        r#"SELECT person.id, person.first_name, sum(person.id) FROM person GROUP BY person.id, person.first_name ORDER BY sum(person.id) ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#,
     );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT person.id, person.first_name, sum(person.id) FROM person GROUP BY person.id, person.first_name ORDER BY sum(person.id) ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#
+    );
+}
 
+#[test]
+fn test_order_by_to_sql_2() {
     // order by aggregation function alias
-    sql_round_trip(
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         r#"SELECT id, first_name, SUM(id) as total_sum FROM person GROUP BY id, first_name ORDER BY total_sum ASC, first_name DESC, id, first_name LIMIT 10"#,
-        r#"SELECT person.id, person.first_name, sum(person.id) AS total_sum FROM person GROUP BY person.id, person.first_name ORDER BY total_sum ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#,
     );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT person.id, person.first_name, sum(person.id) AS total_sum FROM person GROUP BY person.id, person.first_name ORDER BY total_sum ASC NULLS LAST, person.first_name DESC NULLS FIRST, person.id ASC NULLS LAST, person.first_name ASC NULLS LAST LIMIT 10"#
+    );
+}
 
-    // order by scalar function from projection
-    sql_round_trip(
+#[test]
+fn test_order_by_to_sql_3() {
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         r#"SELECT id, first_name, substr(first_name,0,5) FROM person ORDER BY id, substr(first_name,0,5)"#,
-        r#"SELECT person.id, person.first_name, substr(person.first_name, 0, 5) FROM person ORDER BY person.id ASC NULLS LAST, substr(person.first_name, 0, 5) ASC NULLS LAST"#,
+    );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT person.id, person.first_name, substr(person.first_name, 0, 5) FROM person ORDER BY person.id ASC NULLS LAST, substr(person.first_name, 0, 5) ASC NULLS LAST"#
     );
 }
 
 #[test]
 fn test_aggregation_to_sql() {
-    sql_round_trip(
-        GenericDialect {},
-        r#"SELECT id, first_name,
+    let sql = r#"SELECT id, first_name,
         SUM(id) AS total_sum,
         SUM(id) OVER (PARTITION BY first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS moving_sum,
         MAX(SUM(id)) OVER (PARTITION BY first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS max_total,
         rank() OVER (PARTITION BY grouping(id) + grouping(age), CASE WHEN grouping(age) = 0 THEN id END ORDER BY sum(id) DESC) AS rank_within_parent_1,
         rank() OVER (PARTITION BY grouping(age) + grouping(id), CASE WHEN (CAST(grouping(age) AS BIGINT) = 0) THEN id END ORDER BY sum(id) DESC) AS rank_within_parent_2
         FROM person
-        GROUP BY id, first_name;"#,
-        r#"SELECT person.id, person.first_name,
-sum(person.id) AS total_sum, sum(person.id) OVER (PARTITION BY person.first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS moving_sum,
-max(sum(person.id)) OVER (PARTITION BY person.first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS max_total,
-rank() OVER (PARTITION BY (grouping(person.id) + grouping(person.age)), CASE WHEN (grouping(person.age) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_1,
-rank() OVER (PARTITION BY (grouping(person.age) + grouping(person.id)), CASE WHEN (CAST(grouping(person.age) AS BIGINT) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_2
-FROM person
-GROUP BY person.id, person.first_name"#.replace("\n", " ").as_str(),
+        GROUP BY id, first_name"#;
+    let statement = generate_round_trip_statement(GenericDialect {}, sql);
+    assert_snapshot!(
+        statement,
+        @"SELECT person.id, person.first_name, sum(person.id) AS total_sum, sum(person.id) OVER (PARTITION BY person.first_name ROWS BETWEEN 5 PRECEDING AND 2 FOLLOWING) AS moving_sum, max(sum(person.id)) OVER (PARTITION BY person.first_name ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS max_total, rank() OVER (PARTITION BY (grouping(person.id) + grouping(person.age)), CASE WHEN (grouping(person.age) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_1, rank() OVER (PARTITION BY (grouping(person.age) + grouping(person.id)), CASE WHEN (CAST(grouping(person.age) AS BIGINT) = 0) THEN person.id END ORDER BY sum(person.id) DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS rank_within_parent_2 FROM person GROUP BY person.id, person.first_name",
     );
 }
 
 #[test]
-fn test_unnest_to_sql() {
-    sql_round_trip(
+fn test_unnest_to_sql_1() {
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         r#"SELECT unnest(array_col) as u1, struct_col, array_col FROM unnest_table WHERE array_col != NULL ORDER BY struct_col, array_col"#,
-        r#"SELECT UNNEST(unnest_table.array_col) AS u1, unnest_table.struct_col, unnest_table.array_col FROM unnest_table WHERE (unnest_table.array_col <> NULL) ORDER BY unnest_table.struct_col ASC NULLS LAST, unnest_table.array_col ASC NULLS LAST"#,
     );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT UNNEST(unnest_table.array_col) AS u1, unnest_table.struct_col, unnest_table.array_col FROM unnest_table WHERE (unnest_table.array_col <> NULL) ORDER BY unnest_table.struct_col ASC NULLS LAST, unnest_table.array_col ASC NULLS LAST"#
+    );
+}
 
-    sql_round_trip(
+#[test]
+fn test_unnest_to_sql_2() {
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         r#"SELECT unnest(make_array(1, 2, 2, 5, NULL)) as u1"#,
-        r#"SELECT UNNEST([1, 2, 2, 5, NULL]) AS u1"#,
+    );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT UNNEST([1, 2, 2, 5, NULL]) AS u1"#
     );
 }
 
 #[test]
 fn test_join_with_no_conditions() {
-    sql_round_trip(
+    let statement = generate_round_trip_statement(
         GenericDialect {},
         "SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2",
-        "SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2",
+    );
+    assert_snapshot!(
+        statement,
+        @r#"SELECT j1.j1_id, j1.j1_string FROM j1 CROSS JOIN j2"#
     );
 }
 
@@ -1562,8 +1977,10 @@ fn test_unparse_extension_to_statement() -> Result<()> {
         Arc::new(UnusedUnparser {}),
     ]);
     let sql = unparser.plan_to_sql(&extension)?;
-    let expected = "SELECT j1.j1_id, j1.j1_string FROM j1";
-    assert_eq!(sql.to_string(), expected);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT j1.j1_id, j1.j1_string FROM j1"#
+    );
 
     if let Some(err) = plan_to_sql(&extension).err() {
         assert_contains!(
@@ -1625,9 +2042,10 @@ fn test_unparse_extension_to_sql() -> Result<()> {
         Arc::new(UnusedUnparser {}),
     ]);
     let sql = unparser.plan_to_sql(&plan)?;
-    let expected =
-        "SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id, j1.j1_string FROM j1)";
-    assert_eq!(sql.to_string(), expected);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT j1.j1_id AS user_id FROM (SELECT j1.j1_id, j1.j1_string FROM j1)"#
+    );
 
     if let Some(err) = plan_to_sql(&plan).err() {
         assert_contains!(
@@ -1665,10 +2083,10 @@ fn test_unparse_optimized_multi_union() -> Result<()> {
         ],
         schema: dfschema.clone(),
     });
-
-    let sql = "SELECT 1 AS x, 'a' AS y UNION ALL SELECT 1 AS x, 'b' AS y UNION ALL SELECT 2 AS x, 'a' AS y UNION ALL SELECT 2 AS x, 'c' AS y";
-
-    assert_eq!(unparser.plan_to_sql(&plan)?.to_string(), sql);
+    assert_snapshot!(
+        unparser.plan_to_sql(&plan)?,
+        @r#"SELECT 1 AS x, 'a' AS y UNION ALL SELECT 1 AS x, 'b' AS y UNION ALL SELECT 2 AS x, 'a' AS y UNION ALL SELECT 2 AS x, 'c' AS y"#
+    );
 
     let plan = LogicalPlan::Union(Union {
         inputs: vec![project(
@@ -1746,8 +2164,10 @@ fn test_unparse_subquery_alias_with_table_pushdown() -> Result<()> {
 
     let unparser = Unparser::default();
     let sql = unparser.plan_to_sql(&plan)?;
-    let expected = "SELECT customer_view.c_custkey, customer_view.c_name, customer_view.custkey_plus FROM (SELECT customer.c_custkey, (CAST(customer.c_custkey AS BIGINT) + 1) AS custkey_plus, customer.c_name FROM (SELECT customer.c_custkey, customer.c_name FROM customer AS customer) AS customer) AS customer_view";
-    assert_eq!(sql.to_string(), expected);
+    assert_snapshot!(
+        sql,
+        @r#"SELECT customer_view.c_custkey, customer_view.c_name, customer_view.custkey_plus FROM (SELECT customer.c_custkey, (CAST(customer.c_custkey AS BIGINT) + 1) AS custkey_plus, customer.c_name FROM (SELECT customer.c_custkey, customer.c_name FROM customer AS customer) AS customer) AS customer_view"#
+    );
     Ok(())
 }
 
@@ -1778,7 +2198,10 @@ fn test_unparse_left_anti_join() -> Result<()> {
 
     let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
-    assert_eq!("SELECT \"t1\".\"d\" FROM \"t1\" WHERE NOT EXISTS (SELECT 1 FROM \"t2\" AS \"__correlated_sq_1\" WHERE (\"t1\".\"c\" = \"__correlated_sq_1\".\"c\"))", sql.to_string());
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."d" FROM "t1" WHERE NOT EXISTS (SELECT 1 FROM "t2" AS "__correlated_sq_1" WHERE ("t1"."c" = "__correlated_sq_1"."c"))"#
+    );
     Ok(())
 }
 
@@ -1809,7 +2232,10 @@ fn test_unparse_left_semi_join() -> Result<()> {
 
     let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
-    assert_eq!("SELECT \"t1\".\"d\" FROM \"t1\" WHERE EXISTS (SELECT 1 FROM \"t2\" AS \"__correlated_sq_1\" WHERE (\"t1\".\"c\" = \"__correlated_sq_1\".\"c\"))", sql.to_string());
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."d" FROM "t1" WHERE EXISTS (SELECT 1 FROM "t2" AS "__correlated_sq_1" WHERE ("t1"."c" = "__correlated_sq_1"."c"))"#
+    );
     Ok(())
 }
 
@@ -1841,7 +2267,10 @@ fn test_unparse_left_mark_join() -> Result<()> {
 
     let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
-    assert_eq!("SELECT \"t1\".\"d\" FROM \"t1\" WHERE (EXISTS (SELECT 1 FROM \"t2\" AS \"__correlated_sq_1\" WHERE (\"t1\".\"c\" = \"__correlated_sq_1\".\"c\")) OR (\"t1\".\"d\" < 0))", sql.to_string());
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."d" FROM "t1" WHERE (EXISTS (SELECT 1 FROM "t2" AS "__correlated_sq_1" WHERE ("t1"."c" = "__correlated_sq_1"."c")) OR ("t1"."d" < 0))"#
+    );
     Ok(())
 }
 
@@ -1876,7 +2305,10 @@ fn test_unparse_right_semi_join() -> Result<()> {
         .build()?;
     let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
-    assert_eq!("SELECT \"t2\".\"c\", \"t2\".\"d\" FROM \"t2\" WHERE (\"t2\".\"c\" <= 1) AND EXISTS (SELECT 1 FROM \"t1\" WHERE (\"t1\".\"c\" = \"t2\".\"c\"))", sql.to_string());
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t2"."c", "t2"."d" FROM "t2" WHERE ("t2"."c" <= 1) AND EXISTS (SELECT 1 FROM "t1" WHERE ("t1"."c" = "t2"."c"))"#
+    );
     Ok(())
 }
 
@@ -1911,6 +2343,92 @@ fn test_unparse_right_anti_join() -> Result<()> {
         .build()?;
     let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
     let sql = unparser.plan_to_sql(&plan)?;
-    assert_eq!("SELECT \"t2\".\"c\", \"t2\".\"d\" FROM \"t2\" WHERE (\"t2\".\"c\" <= 1) AND NOT EXISTS (SELECT 1 FROM \"t1\" WHERE (\"t1\".\"c\" = \"t2\".\"c\"))", sql.to_string());
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t2"."c", "t2"."d" FROM "t2" WHERE ("t2"."c" <= 1) AND NOT EXISTS (SELECT 1 FROM "t1" WHERE ("t1"."c" = "t2"."c"))"#
+    );
+    Ok(())
+}
+
+#[test]
+fn test_unparse_cross_join_with_table_scan_projection() -> Result<()> {
+    let schema = Schema::new(vec![
+        Field::new("k", DataType::Int32, false),
+        Field::new("v", DataType::Int32, false),
+    ]);
+    // Cross Join:
+    //   SubqueryAlias: t1
+    //     TableScan: test projection=[v]
+    //   SubqueryAlias: t2
+    //     TableScan: test projection=[v]
+    let table_scan1 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let table_scan2 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let plan = LogicalPlanBuilder::from(subquery_alias(table_scan1, "t1")?)
+        .cross_join(subquery_alias(table_scan2, "t2")?)?
+        .build()?;
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."v", "t2"."v" FROM "test" AS "t1" CROSS JOIN "test" AS "t2""#
+    );
+    Ok(())
+}
+
+#[test]
+fn test_unparse_inner_join_with_table_scan_projection() -> Result<()> {
+    let schema = Schema::new(vec![
+        Field::new("k", DataType::Int32, false),
+        Field::new("v", DataType::Int32, false),
+    ]);
+    // Inner Join:
+    //   SubqueryAlias: t1
+    //     TableScan: test projection=[v]
+    //   SubqueryAlias: t2
+    //     TableScan: test projection=[v]
+    let table_scan1 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let table_scan2 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let plan = LogicalPlanBuilder::from(subquery_alias(table_scan1, "t1")?)
+        .join_on(
+            subquery_alias(table_scan2, "t2")?,
+            datafusion_expr::JoinType::Inner,
+            vec![col("t1.v").eq(col("t2.v"))],
+        )?
+        .build()?;
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."v", "t2"."v" FROM "test" AS "t1" INNER JOIN "test" AS "t2" ON ("t1"."v" = "t2"."v")"#
+    );
+    Ok(())
+}
+
+#[test]
+fn test_unparse_left_semi_join_with_table_scan_projection() -> Result<()> {
+    let schema = Schema::new(vec![
+        Field::new("k", DataType::Int32, false),
+        Field::new("v", DataType::Int32, false),
+    ]);
+    // LeftSemi Join:
+    //   SubqueryAlias: t1
+    //     TableScan: test projection=[v]
+    //   SubqueryAlias: t2
+    //     TableScan: test projection=[v]
+    let table_scan1 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let table_scan2 = table_scan(Some("test"), &schema, Some(vec![1]))?.build()?;
+    let plan = LogicalPlanBuilder::from(subquery_alias(table_scan1, "t1")?)
+        .join_on(
+            subquery_alias(table_scan2, "t2")?,
+            datafusion_expr::JoinType::LeftSemi,
+            vec![col("t1.v").eq(col("t2.v"))],
+        )?
+        .build()?;
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "t1"."v" FROM "test" AS "t1" WHERE EXISTS (SELECT 1 FROM "test" AS "t2" WHERE ("t1"."v" = "t2"."v"))"#
+    );
     Ok(())
 }
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index 866c08ed0257e..2804a1de06064 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -48,6 +48,7 @@ use datafusion_functions_aggregate::{
 use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf};
 use datafusion_functions_nested::make_array::make_array_udf;
 use datafusion_functions_window::rank::rank_udwf;
+use insta::{allow_duplicates, assert_snapshot};
 use rstest::rstest;
 use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect};
 
@@ -55,317 +56,508 @@ mod cases;
 mod common;
 
 #[test]
-fn parse_decimals() {
-    let test_data = [
-        ("1", "Int64(1)"),
-        ("001", "Int64(1)"),
-        ("0.1", "Decimal128(Some(1),1,1)"),
-        ("0.01", "Decimal128(Some(1),2,2)"),
-        ("1.0", "Decimal128(Some(10),2,1)"),
-        ("10.01", "Decimal128(Some(1001),4,2)"),
-        (
-            "10000000000000000000.00",
-            "Decimal128(Some(1000000000000000000000),22,2)",
-        ),
-        ("18446744073709551615", "UInt64(18446744073709551615)"),
-        (
-            "18446744073709551616",
-            "Decimal128(Some(18446744073709551616),20,0)",
-        ),
-    ];
-    for (a, b) in test_data {
-        let sql = format!("SELECT {a}");
-        let expected = format!("Projection: {b}\n  EmptyRelation");
-        quick_test_with_options(
-            &sql,
-            &expected,
-            ParserOptions {
-                parse_float_as_decimal: true,
-                enable_ident_normalization: false,
-                support_varchar_with_length: false,
-                map_varchar_to_utf8view: false,
-                enable_options_value_normalization: false,
-                collect_spans: false,
-            },
-        );
-    }
+fn parse_decimals_1() {
+    let sql = "SELECT 1";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Int64(1)
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
-fn parse_ident_normalization() {
-    let test_data = [
-        (
-            "SELECT CHARACTER_LENGTH('str')",
-            "Ok(Projection: character_length(Utf8(\"str\"))\n  EmptyRelation)",
-            false,
-        ),
-        (
-            "SELECT CONCAT('Hello', 'World')",
-            "Ok(Projection: concat(Utf8(\"Hello\"), Utf8(\"World\"))\n  EmptyRelation)",
-            false,
-        ),
-        (
-            "SELECT age FROM person",
-            "Ok(Projection: person.age\n  TableScan: person)",
-            true,
-        ),
-        (
-            "SELECT AGE FROM PERSON",
-            "Ok(Projection: person.age\n  TableScan: person)",
-            true,
-        ),
-        (
-            "SELECT AGE FROM PERSON",
-            "Error during planning: No table named: PERSON found",
-            false,
-        ),
-        (
-            "SELECT Id FROM UPPERCASE_test",
-            "Ok(Projection: UPPERCASE_test.Id\
-                \n  TableScan: UPPERCASE_test)",
-            false,
-        ),
-        (
-            "SELECT \"Id\", lower FROM \"UPPERCASE_test\"",
-            "Ok(Projection: UPPERCASE_test.Id, UPPERCASE_test.lower\
-                \n  TableScan: UPPERCASE_test)",
-            true,
-        ),
-    ];
+fn parse_decimals_2() {
+    let sql = "SELECT 001";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Int64(1)
+          EmptyRelation
+        "#
+    );
+}
 
-    for (sql, expected, enable_ident_normalization) in test_data {
-        let plan = logical_plan_with_options(
-            sql,
-            ParserOptions {
-                parse_float_as_decimal: false,
-                enable_ident_normalization,
-                support_varchar_with_length: false,
-                map_varchar_to_utf8view: false,
-                enable_options_value_normalization: false,
-                collect_spans: false,
-            },
-        );
-        if plan.is_ok() {
-            let plan = plan.unwrap();
-            assert_eq!(expected, format!("Ok({plan})"));
-        } else {
-            assert_eq!(expected, plan.unwrap_err().strip_backtrace());
-        }
-    }
+#[test]
+fn parse_decimals_3() {
+    let sql = "SELECT 0.1";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(1),1,1)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_4() {
+    let sql = "SELECT 0.01";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(1),2,2)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_5() {
+    let sql = "SELECT 1.0";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(10),2,1)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_6() {
+    let sql = "SELECT 10.01";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(1001),4,2)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_7() {
+    let sql = "SELECT 10000000000000000000.00";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(1000000000000000000000),22,2)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_8() {
+    let sql = "SELECT 18446744073709551615";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: UInt64(18446744073709551615)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_decimals_9() {
+    let sql = "SELECT 18446744073709551616";
+    let options = parse_decimals_parser_options();
+    let plan = logical_plan_with_options(sql, options).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Decimal128(Some(18446744073709551616),20,0)
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_1() {
+    let sql = "SELECT CHARACTER_LENGTH('str')";
+    let parser_option = ident_normalization_parser_options_no_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: character_length(Utf8("str"))
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_2() {
+    let sql = "SELECT CONCAT('Hello', 'World')";
+    let parser_option = ident_normalization_parser_options_no_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: concat(Utf8("Hello"), Utf8("World"))
+          EmptyRelation
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_3() {
+    let sql = "SELECT age FROM person";
+    let parser_option = ident_normalization_parser_options_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age
+          TableScan: person
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_4() {
+    let sql = "SELECT AGE FROM PERSON";
+    let parser_option = ident_normalization_parser_options_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age
+          TableScan: person
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_5() {
+    let sql = "SELECT AGE FROM PERSON";
+    let parser_option = ident_normalization_parser_options_no_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option)
+        .unwrap_err()
+        .strip_backtrace();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Error during planning: No table named: PERSON found
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_6() {
+    let sql = "SELECT Id FROM UPPERCASE_test";
+    let parser_option = ident_normalization_parser_options_no_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: UPPERCASE_test.Id
+          TableScan: UPPERCASE_test
+        "#
+    );
+}
+
+#[test]
+fn parse_ident_normalization_7() {
+    let sql = r#"SELECT "Id", lower FROM "UPPERCASE_test""#;
+    let parser_option = ident_normalization_parser_options_ident_normalization();
+    let plan = logical_plan_with_options(sql, parser_option).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: UPPERCASE_test.Id, UPPERCASE_test.lower
+          TableScan: UPPERCASE_test
+        "#
+    );
 }
 
 #[test]
 fn select_no_relation() {
-    quick_test(
-        "SELECT 1",
-        "Projection: Int64(1)\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT 1").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: Int64(1)
+          EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn test_real_f32() {
-    quick_test(
-        "SELECT CAST(1.1 AS REAL)",
-        "Projection: CAST(Float64(1.1) AS Float32)\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT CAST(1.1 AS REAL)").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(Float64(1.1) AS Float32)
+          EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn test_int_decimal_default() {
-    quick_test(
-        "SELECT CAST(10 AS DECIMAL)",
-        "Projection: CAST(Int64(10) AS Decimal128(38, 10))\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT CAST(10 AS DECIMAL)").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(Int64(10) AS Decimal128(38, 10))
+          EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn test_int_decimal_no_scale() {
-    quick_test(
-        "SELECT CAST(10 AS DECIMAL(5))",
-        "Projection: CAST(Int64(10) AS Decimal128(5, 0))\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT CAST(10 AS DECIMAL(5))").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(Int64(10) AS Decimal128(5, 0))
+          EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn test_tinyint() {
-    quick_test(
-        "SELECT CAST(6 AS TINYINT)",
-        "Projection: CAST(Int64(6) AS Int8)\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT CAST(6 AS TINYINT)").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(Int64(6) AS Int8)
+          EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn cast_from_subquery() {
-    quick_test(
-        "SELECT CAST (a AS FLOAT) FROM (SELECT 1 AS a)",
-        "Projection: CAST(a AS Float32)\
-            \n  Projection: Int64(1) AS a\
-            \n    EmptyRelation",
+    let plan = logical_plan("SELECT CAST (a AS FLOAT) FROM (SELECT 1 AS a)").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(a AS Float32)
+          Projection: Int64(1) AS a
+            EmptyRelation
+        "#
     );
 }
 
 #[test]
 fn try_cast_from_aggregation() {
-    quick_test(
-        "SELECT TRY_CAST(sum(age) AS FLOAT) FROM person",
-        "Projection: TRY_CAST(sum(person.age) AS Float32)\
-            \n  Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]\
-            \n    TableScan: person",
+    let plan = logical_plan("SELECT TRY_CAST(sum(age) AS FLOAT) FROM person").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: TRY_CAST(sum(person.age) AS Float32)
+          Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn cast_to_invalid_decimal_type_precision_0() {
     // precision == 0
-    {
-        let sql = "SELECT CAST(10 AS DECIMAL(0))";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Error during planning: Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`.",
-            err.strip_backtrace()
-        );
-    }
+    let sql = "SELECT CAST(10 AS DECIMAL(0))";
+    let err = logical_plan(sql).expect_err("query should have failed");
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Error during planning: Decimal(precision = 0, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+    );
 }
 
 #[test]
 fn cast_to_invalid_decimal_type_precision_gt_38() {
     // precision > 38
-    {
-        let sql = "SELECT CAST(10 AS DECIMAL(39))";
-        let plan = "Projection: CAST(Int64(10) AS Decimal256(39, 0))\n  EmptyRelation";
-        quick_test(sql, plan);
-    }
+    let sql = "SELECT CAST(10 AS DECIMAL(39))";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: CAST(Int64(10) AS Decimal256(39, 0))
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn cast_to_invalid_decimal_type_precision_gt_76() {
     // precision > 76
-    {
-        let sql = "SELECT CAST(10 AS DECIMAL(79))";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Error during planning: Decimal(precision = 79, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`.",
-            err.strip_backtrace()
-        );
-    }
+    let sql = "SELECT CAST(10 AS DECIMAL(79))";
+    let err = logical_plan(sql).expect_err("query should have failed");
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Error during planning: Decimal(precision = 79, scale = 0) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+    );
 }
 
 #[test]
 fn cast_to_invalid_decimal_type_precision_lt_scale() {
     // precision < scale
-    {
-        let sql = "SELECT CAST(10 AS DECIMAL(5, 10))";
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Error during planning: Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 76`, and `scale <= precision`.",
-            err.strip_backtrace()
-        );
-    }
+    let sql = "SELECT CAST(10 AS DECIMAL(5, 10))";
+    let err = logical_plan(sql).expect_err("query should have failed");
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Error during planning: Decimal(precision = 5, scale = 10) should satisfy `0 < precision <= 76`, and `scale <= precision`."
+    );
 }
 
 #[test]
 fn plan_create_table_with_pk() {
     let sql = "create table person (id int, name string, primary key(id))";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0])]
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0])]
+          EmptyRelation
+        "#
+    );
 
     let sql = "create table person (id int primary key, name string)";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0])]
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0])]
+          EmptyRelation
+        "#
+    );
 
     let sql =
         "create table person (id int, name string unique not null, primary key(id))";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0]), Unique([1])]
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0]), Unique([1])]
+          EmptyRelation
+        "#
+    );
 
     let sql = "create table person (id int, name varchar,  primary key(name,  id));";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([1, 0])]
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([1, 0])]
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn plan_create_table_with_multi_pk() {
     let sql = "create table person (id int, name string primary key, primary key(id))";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0]), PrimaryKey([1])]
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[PrimaryKey([0]), PrimaryKey([1])]
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn plan_create_table_with_unique() {
     let sql = "create table person (id int unique, name string)";
-    let plan = "CreateMemoryTable: Bare { table: \"person\" } constraints=[Unique([0])]\n  EmptyRelation";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[Unique([0])]
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn plan_create_table_no_pk() {
     let sql = "create table person (id int, name string)";
-    let plan = r#"
-CreateMemoryTable: Bare { table: "person" }
-  EmptyRelation
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" }
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn plan_create_table_check_constraint() {
     let sql = "create table person (id int, name string, unique(id))";
-    let plan = "CreateMemoryTable: Bare { table: \"person\" } constraints=[Unique([0])]\n  EmptyRelation";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CreateMemoryTable: Bare { table: "person" } constraints=[Unique([0])]
+          EmptyRelation
+        "#
+    );
 }
 
 #[test]
 fn plan_start_transaction() {
     let sql = "start transaction";
-    let plan = "TransactionStart: ReadWrite Serializable";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionStart: ReadWrite Serializable
+        "#
+    );
 }
 
 #[test]
 fn plan_start_transaction_isolation() {
     let sql = "start transaction isolation level read committed";
-    let plan = "TransactionStart: ReadWrite ReadCommitted";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionStart: ReadWrite ReadCommitted
+        "#
+    );
 }
 
 #[test]
 fn plan_start_transaction_read_only() {
     let sql = "start transaction read only";
-    let plan = "TransactionStart: ReadOnly Serializable";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionStart: ReadOnly Serializable
+        "#
+    );
 }
 
 #[test]
 fn plan_start_transaction_fully_qualified() {
     let sql = "start transaction isolation level read committed read only";
-    let plan = "TransactionStart: ReadOnly ReadCommitted";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionStart: ReadOnly ReadCommitted
+        "#
+    );
 }
 
 #[test]
@@ -375,95 +567,131 @@ isolation level read committed
 read only
 isolation level repeatable read
 "#;
-    let plan = "TransactionStart: ReadOnly RepeatableRead";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionStart: ReadOnly RepeatableRead
+        "#
+    );
 }
 
 #[test]
 fn plan_commit_transaction() {
     let sql = "commit transaction";
-    let plan = "TransactionEnd: Commit chain:=false";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionEnd: Commit chain:=false
+        "#
+    );
 }
 
 #[test]
 fn plan_commit_transaction_chained() {
     let sql = "commit transaction and chain";
-    let plan = "TransactionEnd: Commit chain:=true";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionEnd: Commit chain:=true
+        "#
+    );
 }
 
 #[test]
 fn plan_rollback_transaction() {
     let sql = "rollback transaction";
-    let plan = "TransactionEnd: Rollback chain:=false";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionEnd: Rollback chain:=false
+        "#
+    );
 }
 
 #[test]
 fn plan_rollback_transaction_chained() {
     let sql = "rollback transaction and chain";
-    let plan = "TransactionEnd: Rollback chain:=true";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        TransactionEnd: Rollback chain:=true
+        "#
+    );
 }
 
 #[test]
 fn plan_copy_to() {
     let sql = "COPY test_decimal to 'output.csv' STORED AS CSV";
-    let plan = r#"
-CopyTo: format=csv output_url=output.csv options: ()
-  TableScan: test_decimal
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        CopyTo: format=csv output_url=output.csv options: ()
+          TableScan: test_decimal
+        "#
+    );
 }
 
 #[test]
 fn plan_explain_copy_to() {
     let sql = "EXPLAIN COPY test_decimal to 'output.csv'";
-    let plan = r#"
-Explain
-  CopyTo: format=csv output_url=output.csv options: ()
-    TableScan: test_decimal
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Explain
+          CopyTo: format=csv output_url=output.csv options: ()
+            TableScan: test_decimal
+        "#
+    );
 }
 
 #[test]
 fn plan_explain_copy_to_format() {
     let sql = "EXPLAIN COPY test_decimal to 'output.tbl' STORED AS CSV";
-    let plan = r#"
-Explain
-  CopyTo: format=csv output_url=output.tbl options: ()
-    TableScan: test_decimal
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Explain
+          CopyTo: format=csv output_url=output.tbl options: ()
+            TableScan: test_decimal
+        "#
+    );
 }
 
 #[test]
 fn plan_insert() {
     let sql =
         "insert into person (id, first_name, last_name) values (1, 'Alan', 'Turing')";
-    let plan = "Dml: op=[Insert Into] table=[person]\
-                \n  Projection: column1 AS id, column2 AS first_name, column3 AS last_name, \
-                        CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, \
-                        CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀\
-                \n    Values: (CAST(Int64(1) AS UInt32), Utf8(\"Alan\"), Utf8(\"Turing\"))";
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Dml: op=[Insert Into] table=[person]
+          Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+            Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
+        "#
+    );
 }
 
 #[test]
 fn plan_insert_no_target_columns() {
     let sql = "INSERT INTO test_decimal VALUES (1, 2), (3, 4)";
-    let plan = r#"
-Dml: op=[Insert Into] table=[test_decimal]
-  Projection: column1 AS id, column2 AS price
-    Values: (CAST(Int64(1) AS Int32), CAST(Int64(2) AS Decimal128(10, 2))), (CAST(Int64(3) AS Int32), CAST(Int64(4) AS Decimal128(10, 2)))
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Dml: op=[Insert Into] table=[test_decimal]
+          Projection: column1 AS id, column2 AS price
+            Values: (CAST(Int64(1) AS Int32), CAST(Int64(2) AS Decimal128(10, 2))), (CAST(Int64(3) AS Int32), CAST(Int64(4) AS Decimal128(10, 2)))
+        "#
+    );
 }
 
 #[rstest]
@@ -501,14 +729,16 @@ fn test_insert_schema_errors(#[case] sql: &str, #[case] error: &str) {
 #[test]
 fn plan_update() {
     let sql = "update person set last_name='Kay' where id=1";
-    let plan = r#"
-Dml: op=[Update] table=[person]
-  Projection: person.id AS id, person.first_name AS first_name, Utf8("Kay") AS last_name, person.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
-    Filter: person.id = Int64(1)
-      TableScan: person
-      "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Dml: op=[Update] table=[person]
+          Projection: person.id AS id, person.first_name AS first_name, Utf8("Kay") AS last_name, person.age AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
+            Filter: person.id = Int64(1)
+              TableScan: person
+        "#
+    );
 }
 
 #[rstest]
@@ -526,26 +756,30 @@ fn update_column_does_not_exist(#[case] sql: &str) {
 #[test]
 fn plan_delete() {
     let sql = "delete from person where id=1";
-    let plan = r#"
-Dml: op=[Delete] table=[person]
-  Filter: id = Int64(1)
-    TableScan: person
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Dml: op=[Delete] table=[person]
+          Filter: id = Int64(1)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn plan_delete_quoted_identifier_case_sensitive() {
     let sql =
         "DELETE FROM \"SomeCatalog\".\"SomeSchema\".\"UPPERCASE_test\" WHERE \"Id\" = 1";
-    let plan = r#"
-Dml: op=[Delete] table=[SomeCatalog.SomeSchema.UPPERCASE_test]
-  Filter: Id = Int64(1)
-    TableScan: SomeCatalog.SomeSchema.UPPERCASE_test
-    "#
-    .trim();
-    quick_test(sql, plan);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Dml: op=[Delete] table=[SomeCatalog.SomeSchema.UPPERCASE_test]
+          Filter: Id = Int64(1)
+            TableScan: SomeCatalog.SomeSchema.UPPERCASE_test
+        "#
+    );
 }
 
 #[test]
@@ -559,18 +793,24 @@ fn select_column_does_not_exist() {
 fn select_repeated_column() {
     let sql = "SELECT age, age FROM person";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"person.age\" at position 0 and \"person.age\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Projections require unique expression names but the expression "person.age" at position 0 and "person.age" at position 1 have the same name. Consider aliasing ("AS") one of them.
+        "#
     );
 }
 
 #[test]
 fn select_scalar_func_with_literal_no_relation() {
-    quick_test(
-        "SELECT sqrt(9)",
-        "Projection: sqrt(Int64(9))\
-             \n  EmptyRelation",
+    let plan = logical_plan("SELECT sqrt(9)").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: sqrt(Int64(9))
+          EmptyRelation
+        "#
     );
 }
 
@@ -578,10 +818,15 @@ fn select_scalar_func_with_literal_no_relation() {
 fn select_simple_filter() {
     let sql = "SELECT id, first_name, last_name \
                    FROM person WHERE state = 'CO'";
-    let expected = "Projection: person.id, person.first_name, person.last_name\
-                        \n  Filter: person.state = Utf8(\"CO\")\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.id, person.first_name, person.last_name
+          Filter: person.state = Utf8("CO")
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -602,40 +847,58 @@ fn select_filter_cannot_use_alias() {
 fn select_neg_filter() {
     let sql = "SELECT id, first_name, last_name \
                    FROM person WHERE NOT state";
-    let expected = "Projection: person.id, person.first_name, person.last_name\
-                        \n  Filter: NOT person.state\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.id, person.first_name, person.last_name
+          Filter: NOT person.state
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_compound_filter() {
     let sql = "SELECT id, first_name, last_name \
                    FROM person WHERE state = 'CO' AND age >= 21 AND age <= 65";
-    let expected = "Projection: person.id, person.first_name, person.last_name\
-            \n  Filter: person.state = Utf8(\"CO\") AND person.age >= Int64(21) AND person.age <= Int64(65)\
-            \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.id, person.first_name, person.last_name
+          Filter: person.state = Utf8("CO") AND person.age >= Int64(21) AND person.age <= Int64(65)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn test_timestamp_filter() {
     let sql = "SELECT state FROM person WHERE birth_date < CAST (158412331400600000 as timestamp)";
-    let expected = "Projection: person.state\
-            \n  Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS Timestamp(Nanosecond, None))\
-            \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state
+          Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS Timestamp(Nanosecond, None))
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn test_date_filter() {
     let sql = "SELECT state FROM person WHERE birth_date < CAST ('2020-01-01' as date)";
-
-    let expected = "Projection: person.state\
-            \n  Filter: person.birth_date < CAST(Utf8(\"2020-01-01\") AS Date32)\
-            \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state
+          Filter: person.birth_date < CAST(Utf8("2020-01-01") AS Date32)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -648,35 +911,43 @@ fn select_all_boolean_operators() {
                    AND age >= 21 \
                    AND age < 65 \
                    AND age <= 65";
-    let expected = "Projection: person.age, person.first_name, person.last_name\
-                        \n  Filter: person.age = Int64(21) \
-                        AND person.age != Int64(21) \
-                        AND person.age > Int64(21) \
-                        AND person.age >= Int64(21) \
-                        AND person.age < Int64(65) \
-                        AND person.age <= Int64(65)\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age, person.first_name, person.last_name
+          Filter: person.age = Int64(21) AND person.age != Int64(21) AND person.age > Int64(21) AND person.age >= Int64(21) AND person.age < Int64(65) AND person.age <= Int64(65)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_between() {
     let sql = "SELECT state FROM person WHERE age BETWEEN 21 AND 65";
-    let expected = "Projection: person.state\
-            \n  Filter: person.age BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state
+          Filter: person.age BETWEEN Int64(21) AND Int64(65)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_between_negated() {
     let sql = "SELECT state FROM person WHERE age NOT BETWEEN 21 AND 65";
-    let expected = "Projection: person.state\
-            \n  Filter: person.age NOT BETWEEN Int64(21) AND Int64(65)\
-            \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state
+          Filter: person.age NOT BETWEEN Int64(21) AND Int64(65)
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -689,13 +960,18 @@ fn select_nested() {
                        FROM person
                      ) AS a
                    ) AS b";
-    let expected = "Projection: b.fn2, b.last_name\
-        \n  SubqueryAlias: b\
-        \n    Projection: a.fn1 AS fn2, a.last_name, a.birth_date\
-        \n      SubqueryAlias: a\
-        \n        Projection: person.first_name AS fn1, person.last_name, person.birth_date, person.age\
-        \n          TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: b.fn2, b.last_name
+          SubqueryAlias: b
+            Projection: a.fn1 AS fn2, a.last_name, a.birth_date
+              SubqueryAlias: a
+                Projection: person.first_name AS fn1, person.last_name, person.birth_date, person.age
+                  TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -707,27 +983,34 @@ fn select_nested_with_filters() {
                      WHERE age > 20
                    ) AS a
                    WHERE fn1 = 'X' AND age < 30";
-
-    let expected = "Projection: a.fn1, a.age\
-        \n  Filter: a.fn1 = Utf8(\"X\") AND a.age < Int64(30)\
-        \n    SubqueryAlias: a\
-        \n      Projection: person.first_name AS fn1, person.age\
-        \n        Filter: person.age > Int64(20)\
-        \n          TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: a.fn1, a.age
+          Filter: a.fn1 = Utf8("X") AND a.age < Int64(30)
+            SubqueryAlias: a
+              Projection: person.first_name AS fn1, person.age
+                Filter: person.age > Int64(20)
+                  TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn table_with_column_alias() {
     let sql = "SELECT a, b, c
                    FROM lineitem l (a, b, c)";
-    let expected = "Projection: l.a, l.b, l.c\
-        \n  SubqueryAlias: l\
-        \n    Projection: lineitem.l_item_id AS a, lineitem.l_description AS b, lineitem.price AS c\
-        \n      TableScan: lineitem";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: l.a, l.b, l.c
+          SubqueryAlias: l
+            Projection: lineitem.l_item_id AS a, lineitem.l_description AS b, lineitem.price AS c
+              TableScan: lineitem
+        "#
+    );
 }
 
 #[test]
@@ -735,9 +1018,10 @@ fn table_with_column_alias_number_cols() {
     let sql = "SELECT a, b, c
                    FROM lineitem l (a, b)";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Source table contains 3 columns but only 2 names given as column alias",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Error during planning: Source table contains 3 columns but only 2 names given as column alias"
     );
 }
 
@@ -745,9 +1029,10 @@ fn table_with_column_alias_number_cols() {
 fn select_with_ambiguous_column() {
     let sql = "SELECT id FROM person a, person b";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Schema error: Ambiguous reference to unqualified field id",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Schema error: Ambiguous reference to unqualified field id"
     );
 }
 
@@ -755,37 +1040,52 @@ fn select_with_ambiguous_column() {
 fn join_with_ambiguous_column() {
     // This is legal.
     let sql = "SELECT id FROM person a join person b using(id)";
-    let expected = "Projection: a.id\
-                        \n  Inner Join: Using a.id = b.id\
-                        \n    SubqueryAlias: a\
-                        \n      TableScan: person\
-                        \n    SubqueryAlias: b\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: a.id
+          Inner Join: Using a.id = b.id
+            SubqueryAlias: a
+              TableScan: person
+            SubqueryAlias: b
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn natural_left_join() {
     let sql = "SELECT l_item_id FROM lineitem a NATURAL LEFT JOIN lineitem b";
-    let expected = "Projection: a.l_item_id\
-                        \n  Left Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price\
-                        \n    SubqueryAlias: a\
-                        \n      TableScan: lineitem\
-                        \n    SubqueryAlias: b\
-                        \n      TableScan: lineitem";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: a.l_item_id
+          Left Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
+            SubqueryAlias: a
+              TableScan: lineitem
+            SubqueryAlias: b
+              TableScan: lineitem
+        "#
+    );
 }
 
 #[test]
 fn natural_right_join() {
     let sql = "SELECT l_item_id FROM lineitem a NATURAL RIGHT JOIN lineitem b";
-    let expected = "Projection: a.l_item_id\
-                        \n  Right Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price\
-                        \n    SubqueryAlias: a\
-                        \n      TableScan: lineitem\
-                        \n    SubqueryAlias: b\
-                        \n      TableScan: lineitem";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: a.l_item_id
+          Right Join: Using a.l_item_id = b.l_item_id, a.l_description = b.l_description, a.price = b.price
+            SubqueryAlias: a
+              TableScan: lineitem
+            SubqueryAlias: b
+              TableScan: lineitem
+        "#
+    );
 }
 
 #[test]
@@ -794,10 +1094,11 @@ fn select_with_having() {
                    FROM person
                    HAVING age > 100 AND age < 200";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-            "Error during planning: HAVING clause references: person.age > Int64(100) AND person.age < Int64(200) must appear in the GROUP BY clause or be used in an aggregate function",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r"Error during planning: HAVING clause references: person.age > Int64(100) AND person.age < Int64(200) must appear in the GROUP BY clause or be used in an aggregate function"
+    );
 }
 
 #[test]
@@ -806,10 +1107,13 @@ fn select_with_having_referencing_column_not_in_select() {
                    FROM person
                    HAVING first_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-            "Error during planning: HAVING clause references: person.first_name = Utf8(\"M\") must appear in the GROUP BY clause or be used in an aggregate function",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: HAVING clause references: person.first_name = Utf8("M") must appear in the GROUP BY clause or be used in an aggregate function
+        "#
+    );
 }
 
 #[test]
@@ -819,10 +1123,13 @@ fn select_with_having_refers_to_invalid_column() {
                    GROUP BY id
                    HAVING first_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-            "Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.id, max(person.age)\" appears in the SELECT clause satisfies this requirement",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.id, max(person.age)" appears in the SELECT clause satisfies this requirement
+        "#
+    );
 }
 
 #[test]
@@ -831,10 +1138,13 @@ fn select_with_having_referencing_column_nested_in_select_expression() {
                    FROM person
                    HAVING age > 100";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-            "Error during planning: HAVING clause references: person.age > Int64(100) must appear in the GROUP BY clause or be used in an aggregate function",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: HAVING clause references: person.age > Int64(100) must appear in the GROUP BY clause or be used in an aggregate function
+        "#
+    );
 }
 
 #[test]
@@ -843,10 +1153,11 @@ fn select_with_having_with_aggregate_not_in_select() {
                    FROM person
                    HAVING MAX(age) > 100";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-            "Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"max(person.age)\" appears in the SELECT clause satisfies this requirement",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "max(person.age)" appears in the SELECT clause satisfies this requirement"#
+    );
 }
 
 #[test]
@@ -854,11 +1165,16 @@ fn select_aggregate_with_having_that_reuses_aggregate() {
     let sql = "SELECT MAX(age)
                    FROM person
                    HAVING MAX(age) < 30";
-    let expected = "Projection: max(person.age)\
-                        \n  Filter: max(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.age)
+          Filter: max(person.age) < Int64(30)
+            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -866,11 +1182,16 @@ fn select_aggregate_with_having_with_aggregate_not_in_select() {
     let sql = "SELECT max(age)
                    FROM person
                    HAVING max(first_name) > 'M'";
-    let expected = "Projection: max(person.age)\
-                        \n  Filter: max(person.first_name) > Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age), max(person.first_name)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.age)
+          Filter: max(person.first_name) > Utf8("M")
+            Aggregate: groupBy=[[]], aggr=[[max(person.age), max(person.first_name)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -879,9 +1200,12 @@ fn select_aggregate_with_having_referencing_column_not_in_select() {
                    FROM person
                    HAVING first_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.first_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"count(*)\" appears in the SELECT clause satisfies this requirement",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.first_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "count(*)" appears in the SELECT clause satisfies this requirement
+        "#
     );
 }
 
@@ -891,11 +1215,16 @@ fn select_aggregate_aliased_with_having_referencing_aggregate_by_its_alias() {
                    FROM person
                    HAVING max_age < 30";
     // FIXME: add test for having in execution
-    let expected = "Projection: max(person.age) AS max_age\
-                        \n  Filter: max(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.age) AS max_age
+          Filter: max(person.age) < Int64(30)
+            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -903,11 +1232,16 @@ fn select_aggregate_aliased_with_having_that_reuses_aggregate_but_not_by_its_ali
     let sql = "SELECT max(age) as max_age
                    FROM person
                    HAVING max(age) < 30";
-    let expected = "Projection: max(person.age) AS max_age\
-                        \n  Filter: max(person.age) < Int64(30)\
-                        \n    Aggregate: groupBy=[[]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.age) AS max_age
+          Filter: max(person.age) < Int64(30)
+            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -916,11 +1250,16 @@ fn select_aggregate_with_group_by_with_having() {
                    FROM person
                    GROUP BY first_name
                    HAVING first_name = 'M'";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: person.first_name = Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: person.first_name = Utf8("M")
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -930,12 +1269,17 @@ fn select_aggregate_with_group_by_with_having_and_where() {
                    WHERE id > 5
                    GROUP BY first_name
                    HAVING MAX(age) < 100";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) < Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      Filter: person.id > Int64(5)\
-                        \n        TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) < Int64(100)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              Filter: person.id > Int64(5)
+                TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -945,12 +1289,17 @@ fn select_aggregate_with_group_by_with_having_and_where_filtering_on_aggregate_c
                    WHERE id > 5 AND age > 18
                    GROUP BY first_name
                    HAVING MAX(age) < 100";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) < Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      Filter: person.id > Int64(5) AND person.age > Int64(18)\
-                        \n        TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) < Int64(100)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              Filter: person.id > Int64(5) AND person.age > Int64(18)
+                TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -959,11 +1308,16 @@ fn select_aggregate_with_group_by_with_having_using_column_by_alias() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 2 AND fn = 'M'";
-    let expected = "Projection: person.first_name AS fn, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(2) AND person.first_name = Utf8(\"M\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name AS fn, max(person.age)
+          Filter: max(person.age) > Int64(2) AND person.first_name = Utf8("M")
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -973,11 +1327,16 @@ fn select_aggregate_with_group_by_with_having_using_columns_with_and_without_the
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 2 AND max_age < 5 AND first_name = 'M' AND fn = 'N'";
-    let expected = "Projection: person.first_name AS fn, max(person.age) AS max_age\
-                        \n  Filter: max(person.age) > Int64(2) AND max(person.age) < Int64(5) AND person.first_name = Utf8(\"M\") AND person.first_name = Utf8(\"N\")\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name AS fn, max(person.age) AS max_age
+          Filter: max(person.age) > Int64(2) AND max(person.age) < Int64(5) AND person.first_name = Utf8("M") AND person.first_name = Utf8("N")
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -986,11 +1345,16 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) > Int64(100)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1000,9 +1364,12 @@ fn select_aggregate_with_group_by_with_having_referencing_column_not_in_group_by
                    GROUP BY first_name
                    HAVING MAX(age) > 10 AND last_name = 'M'";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.last_name\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.first_name, max(person.age)\" appears in the SELECT clause satisfies this requirement",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in HAVING must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.last_name" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.first_name, max(person.age)" appears in the SELECT clause satisfies this requirement
+        "#
     );
 }
 
@@ -1012,11 +1379,16 @@ fn select_aggregate_with_group_by_with_having_that_reuses_aggregate_multiple_tim
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MAX(age) < 200";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(100) AND max(person.age) < Int64(200)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) > Int64(100) AND max(person.age) < Int64(200)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1025,11 +1397,16 @@ fn select_aggregate_with_group_by_with_having_using_aggregate_not_in_select() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MIN(id) < 50";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(100) AND min(person.id) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) > Int64(100) AND min(person.id) < Int64(50)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1039,11 +1416,16 @@ fn select_aggregate_aliased_with_group_by_with_having_referencing_aggregate_by_i
                    FROM person
                    GROUP BY first_name
                    HAVING max_age > 100";
-    let expected = "Projection: person.first_name, max(person.age) AS max_age\
-                        \n  Filter: max(person.age) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age) AS max_age
+          Filter: max(person.age) > Int64(100)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1053,11 +1435,16 @@ fn select_aggregate_compound_aliased_with_group_by_with_having_referencing_compo
                    FROM person
                    GROUP BY first_name
                    HAVING max_age_plus_one > 100";
-    let expected = "Projection: person.first_name, max(person.age) + Int64(1) AS max_age_plus_one\
-                        \n  Filter: max(person.age) + Int64(1) > Int64(100)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age) + Int64(1) AS max_age_plus_one
+          Filter: max(person.age) + Int64(1) > Int64(100)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1067,11 +1454,16 @@ fn select_aggregate_with_group_by_with_having_using_derived_column_aggregate_not
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND MIN(id - 2) < 50";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) > Int64(100) AND min(person.id - Int64(2)) < Int64(50)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), min(person.id - Int64(2))]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1080,46 +1472,67 @@ fn select_aggregate_with_group_by_with_having_using_count_star_not_in_select() {
                    FROM person
                    GROUP BY first_name
                    HAVING MAX(age) > 100 AND count(*) < 50";
-    let expected = "Projection: person.first_name, max(person.age)\
-                        \n  Filter: max(person.age) > Int64(100) AND count(*) < Int64(50)\
-                        \n    Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), count(*)]]\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.first_name, max(person.age)
+          Filter: max(person.age) > Int64(100) AND count(*) < Int64(50)
+            Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.age), count(*)]]
+              TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_binary_expr() {
     let sql = "SELECT age + salary from person";
-    let expected = "Projection: person.age + person.salary\
-                        \n  TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age + person.salary
+          TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_binary_expr_nested() {
     let sql = "SELECT (age + salary)/2 from person";
-    let expected = "Projection: (person.age + person.salary) / Int64(2)\
-                        \n  TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: (person.age + person.salary) / Int64(2)
+          TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_simple_aggregate() {
-    quick_test(
-        "SELECT MIN(age) FROM person",
-        "Projection: min(person.age)\
-            \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
-            \n    TableScan: person",
+    let plan = logical_plan("SELECT MIN(age) FROM person").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.age)
+          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn test_sum_aggregate() {
-    quick_test(
-        "SELECT sum(age) from person",
-        "Projection: sum(person.age)\
-            \n  Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]\
-            \n    TableScan: person",
+    let plan = logical_plan("SELECT sum(age) from person").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: sum(person.age)
+          Aggregate: groupBy=[[]], aggr=[[sum(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
@@ -1134,70 +1547,97 @@ fn select_simple_aggregate_column_does_not_exist() {
 fn select_simple_aggregate_repeated_aggregate() {
     let sql = "SELECT MIN(age), MIN(age) FROM person";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"min(person.age)\" at position 0 and \"min(person.age)\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 0 and "min(person.age)" at position 1 have the same name. Consider aliasing ("AS") one of them.
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_repeated_aggregate_with_single_alias() {
-    quick_test(
-        "SELECT MIN(age), MIN(age) AS a FROM person",
-        "Projection: min(person.age), min(person.age) AS a\
-             \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
+    let plan = logical_plan("SELECT MIN(age), MIN(age) AS a FROM person").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.age), min(person.age) AS a
+          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_repeated_aggregate_with_unique_aliases() {
-    quick_test(
-        "SELECT MIN(age) AS a, MIN(age) AS b FROM person",
-        "Projection: min(person.age) AS a, min(person.age) AS b\
-             \n  Aggregate: groupBy=[[]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
+    let plan = logical_plan("SELECT MIN(age) AS a, MIN(age) AS b FROM person").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.age) AS a, min(person.age) AS b
+          Aggregate: groupBy=[[]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_from_typed_string_values() {
-    quick_test(
-            "SELECT col1, col2 FROM (VALUES (TIMESTAMP '2021-06-10 17:01:00Z', DATE '2004-04-09')) as t (col1, col2)",
-            "Projection: t.col1, t.col2\
-            \n  SubqueryAlias: t\
-            \n    Projection: column1 AS col1, column2 AS col2\
-            \n      Values: (CAST(Utf8(\"2021-06-10 17:01:00Z\") AS Timestamp(Nanosecond, None)), CAST(Utf8(\"2004-04-09\") AS Date32))",
-        );
+    let plan = logical_plan(
+        "SELECT col1, col2 FROM (VALUES (TIMESTAMP '2021-06-10 17:01:00Z', DATE '2004-04-09')) as t (col1, col2)",
+    ).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: t.col1, t.col2
+          SubqueryAlias: t
+            Projection: column1 AS col1, column2 AS col2
+              Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(Nanosecond, None)), CAST(Utf8("2004-04-09") AS Date32))
+        "#
+    );
 }
 
 #[test]
 fn select_simple_aggregate_repeated_aggregate_with_repeated_aliases() {
     let sql = "SELECT MIN(age) AS a, MIN(age) AS a FROM person";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"min(person.age) AS a\" at position 0 and \"min(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Projections require unique expression names but the expression "min(person.age) AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them.
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby() {
-    quick_test(
-        "SELECT state, MIN(age), MAX(age) FROM person GROUP BY state",
-        "Projection: person.state, min(person.age), max(person.age)\
-            \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]\
-            \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT state, MIN(age), MAX(age) FROM person GROUP BY state")
+            .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, min(person.age), max(person.age)
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_with_aliases() {
-    quick_test(
-        "SELECT state AS a, MIN(age) AS b FROM person GROUP BY state",
-        "Projection: person.state AS a, min(person.age) AS b\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT state AS a, MIN(age) AS b FROM person GROUP BY state")
+            .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state AS a, min(person.age) AS b
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
@@ -1205,19 +1645,26 @@ fn select_simple_aggregate_with_groupby_with_aliases() {
 fn select_simple_aggregate_with_groupby_with_aliases_repeated() {
     let sql = "SELECT state AS a, MIN(age) AS a FROM person GROUP BY state";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"person.state AS a\" at position 0 and \"min(person.age) AS a\" at position 1 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Projections require unique expression names but the expression "person.state AS a" at position 0 and "min(person.age) AS a" at position 1 have the same name. Consider aliasing ("AS") one of them.
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_column_unselected() {
-    quick_test(
-        "SELECT MIN(age), MAX(age) FROM person GROUP BY state",
-        "Projection: min(person.age), max(person.age)\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT MIN(age), MAX(age) FROM person GROUP BY state").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.age), max(person.age)
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age), max(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
@@ -1225,11 +1672,13 @@ fn select_simple_aggregate_with_groupby_column_unselected() {
 fn select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist() {
     let sql = "SELECT sum(age) FROM person GROUP BY doesnotexist";
     let err = logical_plan(sql).expect_err("query should have failed");
-    let expected = "Schema error: No field named doesnotexist. \
-        Valid fields are \"sum(person.age)\", \
-        person.id, person.first_name, person.last_name, person.age, person.state, \
-        person.salary, person.birth_date, person.\"😀\".";
-    assert_eq!(err.strip_backtrace(), expected);
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Schema error: No field named doesnotexist. Valid fields are "sum(person.age)", person.id, person.first_name, person.last_name, person.age, person.state, person.salary, person.birth_date, person."😀".
+        "#
+    );
 }
 
 #[test]
@@ -1243,35 +1692,50 @@ fn select_simple_aggregate_with_groupby_and_column_in_aggregate_does_not_exist()
 fn select_interval_out_of_range() {
     let sql = "SELECT INTERVAL '100000000000000000 day'";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Arrow error: Invalid argument error: Unable to represent 100000000000000000 days in a signed 32-bit integer",
+
+    assert_snapshot!(
         err.strip_backtrace(),
+        @r#"
+        Arrow error: Invalid argument error: Unable to represent 100000000000000000 days in a signed 32-bit integer
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_and_column_is_in_aggregate_and_groupby() {
-    quick_test(
-        "SELECT MAX(first_name) FROM person GROUP BY first_name",
-        "Projection: max(person.first_name)\
-             \n  Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.first_name)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT MAX(first_name) FROM person GROUP BY first_name").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.first_name)
+          Aggregate: groupBy=[[person.first_name]], aggr=[[max(person.first_name)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_can_use_positions() {
-    quick_test(
-        "SELECT state, age AS b, count(1) FROM person GROUP BY 1, 2",
-        "Projection: person.state, person.age AS b, count(Int64(1))\
-             \n  Aggregate: groupBy=[[person.state, person.age]], aggr=[[count(Int64(1))]]\
-             \n    TableScan: person",
+    let plan = logical_plan("SELECT state, age AS b, count(1) FROM person GROUP BY 1, 2")
+        .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, person.age AS b, count(Int64(1))
+          Aggregate: groupBy=[[person.state, person.age]], aggr=[[count(Int64(1))]]
+            TableScan: person
+        "#
     );
-    quick_test(
-        "SELECT state, age AS b, count(1) FROM person GROUP BY 2, 1",
-        "Projection: person.state, person.age AS b, count(Int64(1))\
-             \n  Aggregate: groupBy=[[person.age, person.state]], aggr=[[count(Int64(1))]]\
-             \n    TableScan: person",
+    let plan = logical_plan("SELECT state, age AS b, count(1) FROM person GROUP BY 2, 1")
+        .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, person.age AS b, count(Int64(1))
+          Aggregate: groupBy=[[person.age, person.state]], aggr=[[count(Int64(1))]]
+            TableScan: person
+        "#
     );
 }
 
@@ -1279,26 +1743,36 @@ fn select_simple_aggregate_with_groupby_can_use_positions() {
 fn select_simple_aggregate_with_groupby_position_out_of_range() {
     let sql = "SELECT state, MIN(age) FROM person GROUP BY 0";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Cannot find column with position 0 in SELECT clause. Valid columns: 1 to 2",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Cannot find column with position 0 in SELECT clause. Valid columns: 1 to 2
+        "#
     );
 
     let sql2 = "SELECT state, MIN(age) FROM person GROUP BY 5";
     let err2 = logical_plan(sql2).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 2",
-        err2.strip_backtrace()
+
+    assert_snapshot!(
+        err2.strip_backtrace(),
+        @r#"
+        Error during planning: Cannot find column with position 5 in SELECT clause. Valid columns: 1 to 2
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_can_use_alias() {
-    quick_test(
-        "SELECT state AS a, MIN(age) AS b FROM person GROUP BY a",
-        "Projection: person.state AS a, min(person.age) AS b\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT state AS a, MIN(age) AS b FROM person GROUP BY a").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state AS a, min(person.age) AS b
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
@@ -1306,56 +1780,83 @@ fn select_simple_aggregate_with_groupby_can_use_alias() {
 fn select_simple_aggregate_with_groupby_aggregate_repeated() {
     let sql = "SELECT state, MIN(age), MIN(age) FROM person GROUP BY state";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Projections require unique expression names but the expression \"min(person.age)\" at position 1 and \"min(person.age)\" at position 2 have the same name. Consider aliasing (\"AS\") one of them.",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Projections require unique expression names but the expression "min(person.age)" at position 1 and "min(person.age)" at position 2 have the same name. Consider aliasing ("AS") one of them.
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_aggregate_repeated_and_one_has_alias() {
-    quick_test(
-        "SELECT state, MIN(age), MIN(age) AS ma FROM person GROUP BY state",
-        "Projection: person.state, min(person.age), min(person.age) AS ma\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
-    )
+    let plan =
+        logical_plan("SELECT state, MIN(age), MIN(age) AS ma FROM person GROUP BY state")
+            .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, min(person.age), min(person.age) AS ma
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_non_column_expression_unselected() {
-    quick_test(
-        "SELECT MIN(first_name) FROM person GROUP BY age + 1",
-        "Projection: min(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT MIN(first_name) FROM person GROUP BY age + 1").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.first_name)
+          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_non_column_expression_selected_and_resolvable() {
-    quick_test(
-        "SELECT age + 1, MIN(first_name) FROM person GROUP BY age + 1",
-        "Projection: person.age + Int64(1), min(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT age + 1, MIN(first_name) FROM person GROUP BY age + 1")
+            .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age + Int64(1), min(person.first_name)
+          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+            TableScan: person
+        "#
     );
-    quick_test(
-        "SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1",
-        "Projection: min(person.first_name), person.age + Int64(1)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT MIN(first_name), age + 1 FROM person GROUP BY age + 1")
+            .unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: min(person.first_name), person.age + Int64(1)
+          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_resolvable() {
-    quick_test(
-            "SELECT ((age + 1) / 2) * (age + 1), MIN(first_name) FROM person GROUP BY age + 1",
-            "Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]\
-             \n    TableScan: person",
-        );
+    let plan = logical_plan(
+        "SELECT ((age + 1) / 2) * (age + 1), MIN(first_name) FROM person GROUP BY age + 1"
+    ).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age + Int64(1) / Int64(2) * person.age + Int64(1), min(person.first_name)
+          Aggregate: groupBy=[[person.age + Int64(1)]], aggr=[[min(person.first_name)]]
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
@@ -1364,131 +1865,192 @@ fn select_simple_aggregate_with_groupby_non_column_expression_nested_and_not_res
     // The query should fail, because age + 9 is not in the group by.
     let sql = "SELECT ((age + 1) / 2) * (age + 9), MIN(first_name) FROM person GROUP BY age + 1";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement
+        "#
+    );
 }
 
 #[test]
 fn select_simple_aggregate_with_groupby_non_column_expression_and_its_column_selected() {
     let sql = "SELECT age, MIN(first_name) FROM person GROUP BY age + 1";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"person.age\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"person.age + Int64(1), min(person.first_name)\" appears in the SELECT clause satisfies this requirement",
-            err.strip_backtrace()
-        );
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "person.age" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "person.age + Int64(1), min(person.first_name)" appears in the SELECT clause satisfies this requirement
+        "#
+    );
 }
 
 #[test]
 fn select_simple_aggregate_nested_in_binary_expr_with_groupby() {
-    quick_test(
-        "SELECT state, MIN(age) < 10 FROM person GROUP BY state",
-        "Projection: person.state, min(person.age) < Int64(10)\
-             \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT state, MIN(age) < 10 FROM person GROUP BY state").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, min(person.age) < Int64(10)
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_simple_aggregate_and_nested_groupby_column() {
-    quick_test(
-        "SELECT age + 1, MAX(first_name) FROM person GROUP BY age",
-        "Projection: person.age + Int64(1), max(person.first_name)\
-             \n  Aggregate: groupBy=[[person.age]], aggr=[[max(person.first_name)]]\
-             \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT MAX(first_name), age + 1 FROM person GROUP BY age").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: max(person.first_name), person.age + Int64(1)
+          Aggregate: groupBy=[[person.age]], aggr=[[max(person.first_name)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_aggregate_compounded_with_groupby_column() {
-    quick_test(
-        "SELECT age + MIN(salary) FROM person GROUP BY age",
-        "Projection: person.age + min(person.salary)\
-             \n  Aggregate: groupBy=[[person.age]], aggr=[[min(person.salary)]]\
-             \n    TableScan: person",
+    let plan = logical_plan("SELECT age + MIN(salary) FROM person GROUP BY age").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.age + min(person.salary)
+          Aggregate: groupBy=[[person.age]], aggr=[[min(person.salary)]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_aggregate_with_non_column_inner_expression_with_groupby() {
-    quick_test(
-        "SELECT state, MIN(age + 1) FROM person GROUP BY state",
-        "Projection: person.state, min(person.age + Int64(1))\
-            \n  Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]]\
-            \n    TableScan: person",
+    let plan =
+        logical_plan("SELECT state, MIN(age + 1) FROM person GROUP BY state").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: person.state, min(person.age + Int64(1))
+          Aggregate: groupBy=[[person.state]], aggr=[[min(person.age + Int64(1))]]
+            TableScan: person
+        "#
     );
 }
 
 #[test]
 fn select_count_one() {
     let sql = "SELECT count(1) FROM person";
-    let expected = "Projection: count(Int64(1))\
-                        \n  Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: count(Int64(1))
+  Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_count_column() {
     let sql = "SELECT count(id) FROM person";
-    let expected = "Projection: count(person.id)\
-                        \n  Aggregate: groupBy=[[]], aggr=[[count(person.id)]]\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: count(person.id)
+  Aggregate: groupBy=[[]], aggr=[[count(person.id)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_approx_median() {
     let sql = "SELECT approx_median(age) FROM person";
-    let expected = "Projection: approx_median(person.age)\
-                        \n  Aggregate: groupBy=[[]], aggr=[[approx_median(person.age)]]\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: approx_median(person.age)
+  Aggregate: groupBy=[[]], aggr=[[approx_median(person.age)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_scalar_func() {
     let sql = "SELECT sqrt(age) FROM person";
-    let expected = "Projection: sqrt(person.age)\
-                        \n  TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: sqrt(person.age)
+  TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_aliased_scalar_func() {
     let sql = "SELECT sqrt(person.age) AS square_people FROM person";
-    let expected = "Projection: sqrt(person.age) AS square_people\
-                        \n  TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: sqrt(person.age) AS square_people
+  TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_where_nullif_division() {
     let sql = "SELECT c3/(c4+c5) \
                    FROM aggregate_test_100 WHERE c3/nullif(c4+c5, 0) > 0.1";
-    let expected = "Projection: aggregate_test_100.c3 / (aggregate_test_100.c4 + aggregate_test_100.c5)\
-            \n  Filter: aggregate_test_100.c3 / nullif(aggregate_test_100.c4 + aggregate_test_100.c5, Int64(0)) > Float64(0.1)\
-            \n    TableScan: aggregate_test_100";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: aggregate_test_100.c3 / (aggregate_test_100.c4 + aggregate_test_100.c5)
+  Filter: aggregate_test_100.c3 / nullif(aggregate_test_100.c4 + aggregate_test_100.c5, Int64(0)) > Float64(0.1)
+    TableScan: aggregate_test_100
+"#
+    );
 }
 
 #[test]
 fn select_where_with_negative_operator() {
     let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > -0.1 AND -c4 > 0";
-    let expected = "Projection: aggregate_test_100.c3\
-            \n  Filter: aggregate_test_100.c3 > Float64(-0.1) AND (- aggregate_test_100.c4) > Int64(0)\
-            \n    TableScan: aggregate_test_100";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: aggregate_test_100.c3
+  Filter: aggregate_test_100.c3 > Float64(-0.1) AND (- aggregate_test_100.c4) > Int64(0)
+    TableScan: aggregate_test_100
+"#
+    );
 }
 
 #[test]
 fn select_where_with_positive_operator() {
     let sql = "SELECT c3 FROM aggregate_test_100 WHERE c3 > +0.1 AND +c4 > 0";
-    let expected = "Projection: aggregate_test_100.c3\
-            \n  Filter: aggregate_test_100.c3 > Float64(0.1) AND aggregate_test_100.c4 > Int64(0)\
-            \n    TableScan: aggregate_test_100";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: aggregate_test_100.c3
+  Filter: aggregate_test_100.c3 > Float64(0.1) AND aggregate_test_100.c4 > Int64(0)
+    TableScan: aggregate_test_100
+"#
+    );
 }
 
 #[test]
@@ -1496,30 +2058,43 @@ fn select_where_compound_identifiers() {
     let sql = "SELECT aggregate_test_100.c3 \
     FROM public.aggregate_test_100 \
     WHERE aggregate_test_100.c3 > 0.1";
-    let expected = "Projection: public.aggregate_test_100.c3\
-            \n  Filter: public.aggregate_test_100.c3 > Float64(0.1)\
-            \n    TableScan: public.aggregate_test_100";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: public.aggregate_test_100.c3
+  Filter: public.aggregate_test_100.c3 > Float64(0.1)
+    TableScan: public.aggregate_test_100
+"#
+    );
 }
 
 #[test]
 fn select_order_by_index() {
     let sql = "SELECT id FROM person ORDER BY 1";
-    let expected = "Sort: person.id ASC NULLS LAST\
-                        \n  Projection: person.id\
-                        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id ASC NULLS LAST
+  Projection: person.id
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_order_by_multiple_index() {
     let sql = "SELECT id, state, age FROM person ORDER BY 1, 3";
-    let expected = "Sort: person.id ASC NULLS LAST, person.age ASC NULLS LAST\
-                        \n  Projection: person.id, person.state, person.age\
-                        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id ASC NULLS LAST, person.age ASC NULLS LAST
+  Projection: person.id, person.state, person.age
+    TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -1528,9 +2103,12 @@ fn select_order_by_index_of_0() {
     let err = logical_plan(sql)
         .expect_err("query should have failed")
         .strip_backtrace();
-    assert_eq!(
-        "Error during planning: Order by index starts at 1 for column indexes",
-        err
+
+    assert_snapshot!(
+        err,
+        @r#"
+        Error during planning: Order by index starts at 1 for column indexes
+        "#
     );
 }
 
@@ -1540,162 +2118,243 @@ fn select_order_by_index_oob() {
     let err = logical_plan(sql)
         .expect_err("query should have failed")
         .strip_backtrace();
-    assert_eq!(
-        "Error during planning: Order by column out of bounds, specified: 2, max: 1",
-        err
+
+    assert_snapshot!(
+        err,
+        @r#"
+        Error during planning: Order by column out of bounds, specified: 2, max: 1
+        "#
     );
 }
 
 #[test]
-fn select_order_by() {
+fn select_with_order_by() {
     let sql = "SELECT id FROM person ORDER BY id";
-    let expected = "Sort: person.id ASC NULLS LAST\
-                        \n  Projection: person.id\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id ASC NULLS LAST
+  Projection: person.id
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_order_by_desc() {
     let sql = "SELECT id FROM person ORDER BY id DESC";
-    let expected = "Sort: person.id DESC NULLS FIRST\
-                        \n  Projection: person.id\
-                        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id DESC NULLS FIRST
+  Projection: person.id
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_order_by_nulls_last() {
-    quick_test(
-        "SELECT id FROM person ORDER BY id DESC NULLS LAST",
-        "Sort: person.id DESC NULLS LAST\
-            \n  Projection: person.id\
-            \n    TableScan: person",
+    let plan = logical_plan("SELECT id FROM person ORDER BY id DESC NULLS LAST").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id DESC NULLS LAST
+  Projection: person.id
+    TableScan: person
+"#
     );
 
-    quick_test(
-        "SELECT id FROM person ORDER BY id NULLS LAST",
-        "Sort: person.id ASC NULLS LAST\
-            \n  Projection: person.id\
-            \n    TableScan: person",
+    let plan = logical_plan("SELECT id FROM person ORDER BY id NULLS LAST").unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: person.id ASC NULLS LAST
+  Projection: person.id
+    TableScan: person
+"#
     );
 }
 
 #[test]
 fn select_group_by() {
     let sql = "SELECT state FROM person GROUP BY state";
-    let expected = "Projection: person.state\
-                        \n  Aggregate: groupBy=[[person.state]], aggr=[[]]\
-                        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.state
+  Aggregate: groupBy=[[person.state]], aggr=[[]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_group_by_columns_not_in_select() {
     let sql = "SELECT MAX(age) FROM person GROUP BY state";
-    let expected = "Projection: max(person.age)\
-                        \n  Aggregate: groupBy=[[person.state]], aggr=[[max(person.age)]]\
-                        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: max(person.age)
+  Aggregate: groupBy=[[person.state]], aggr=[[max(person.age)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_group_by_count_star() {
     let sql = "SELECT state, count(*) FROM person GROUP BY state";
-    let expected = "Projection: person.state, count(*)\
-                        \n  Aggregate: groupBy=[[person.state]], aggr=[[count(*)]]\
-                        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.state, count(*)
+  Aggregate: groupBy=[[person.state]], aggr=[[count(*)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_group_by_needs_projection() {
     let sql = "SELECT count(state), state FROM person GROUP BY state";
-    let expected = "\
-        Projection: count(person.state), person.state\
-        \n  Aggregate: groupBy=[[person.state]], aggr=[[count(person.state)]]\
-        \n    TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+        Projection: count(person.state), person.state
+          Aggregate: groupBy=[[person.state]], aggr=[[count(person.state)]]
+            TableScan: person
+        "#
+    );
 }
 
 #[test]
 fn select_7480_1() {
     let sql = "SELECT c1, MIN(c12) FROM aggregate_test_100 GROUP BY c1, c13";
-    let expected = "Projection: aggregate_test_100.c1, min(aggregate_test_100.c12)\
-                       \n  Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[min(aggregate_test_100.c12)]]\
-                       \n    TableScan: aggregate_test_100";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: aggregate_test_100.c1, min(aggregate_test_100.c12)
+  Aggregate: groupBy=[[aggregate_test_100.c1, aggregate_test_100.c13]], aggr=[[min(aggregate_test_100.c12)]]
+    TableScan: aggregate_test_100
+"#
+    );
 }
 
 #[test]
 fn select_7480_2() {
     let sql = "SELECT c1, c13, MIN(c12) FROM aggregate_test_100 GROUP BY c1";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!(
-        "Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column \"aggregate_test_100.c13\" must appear in the GROUP BY clause or must be part of an aggregate function, currently only \"aggregate_test_100.c1, min(aggregate_test_100.c12)\" appears in the SELECT clause satisfies this requirement",
-        err.strip_backtrace()
+
+    assert_snapshot!(
+        err.strip_backtrace(),
+        @r#"
+        Error during planning: Column in SELECT must be in GROUP BY or an aggregate function: While expanding wildcard, column "aggregate_test_100.c13" must appear in the GROUP BY clause or must be part of an aggregate function, currently only "aggregate_test_100.c1, min(aggregate_test_100.c12)" appears in the SELECT clause satisfies this requirement
+        "#
     );
 }
 
 #[test]
 fn create_external_table_csv() {
     let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
-    let expected = "CreateExternalTable: Bare { table: \"t\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "t" }
+"#
+    );
 }
 
 #[test]
 fn create_external_table_with_pk() {
     let sql = "CREATE EXTERNAL TABLE t(c1 int, primary key(c1)) STORED AS CSV LOCATION 'foo.csv'";
-    let expected =
-        "CreateExternalTable: Bare { table: \"t\" } constraints=[PrimaryKey([0])]";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "t" } constraints=[PrimaryKey([0])]
+    "#
+    );
 }
 
 #[test]
 fn create_external_table_wih_schema() {
     let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 'foo.csv'";
-    let expected = "CreateExternalTable: Partial { schema: \"staging\", table: \"foo\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Partial { schema: "staging", table: "foo" }
+"#
+    );
 }
 
 #[test]
 fn create_schema_with_quoted_name() {
     let sql = "CREATE SCHEMA \"quoted_schema_name\"";
-    let expected = "CreateCatalogSchema: \"quoted_schema_name\"";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateCatalogSchema: "quoted_schema_name"
+"#
+    );
 }
 
 #[test]
 fn create_schema_with_quoted_unnormalized_name() {
     let sql = "CREATE SCHEMA \"Foo\"";
-    let expected = "CreateCatalogSchema: \"Foo\"";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateCatalogSchema: "Foo"
+"#
+    );
 }
 
 #[test]
 fn create_schema_with_unquoted_normalized_name() {
     let sql = "CREATE SCHEMA Foo";
-    let expected = "CreateCatalogSchema: \"foo\"";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateCatalogSchema: "foo"
+"#
+    );
 }
 
 #[test]
 fn create_external_table_custom() {
     let sql = "CREATE EXTERNAL TABLE dt STORED AS DELTATABLE LOCATION 's3://bucket/schema/table';";
-    let expected = r#"CreateExternalTable: Bare { table: "dt" }"#;
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "dt" }
+"#
+    );
 }
 
 #[test]
 fn create_external_table_csv_no_schema() {
     let sql = "CREATE EXTERNAL TABLE t STORED AS CSV LOCATION 'foo.csv'";
-    let expected = "CreateExternalTable: Bare { table: \"t\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "t" }
+"#
+    );
 }
 
 #[test]
@@ -1708,9 +2367,18 @@ fn create_external_table_with_compression_type() {
         "CREATE EXTERNAL TABLE t(c1 int) STORED AS JSON LOCATION 'foo.json.bz2' OPTIONS ('format.compression' 'bzip2')",
         "CREATE EXTERNAL TABLE t(c1 int) STORED AS NONSTANDARD LOCATION 'foo.unk' OPTIONS ('format.compression' 'gzip')",
          ];
-    for sql in sqls {
-        let expected = "CreateExternalTable: Bare { table: \"t\" }";
-        quick_test(sql, expected);
+
+    allow_duplicates! {
+        for sql in sqls {
+            let plan = logical_plan(sql).unwrap();
+            assert_snapshot!(
+                plan,
+                @r#"
+                CreateExternalTable: Bare { table: "t" }
+                "#
+            );
+        }
+
     }
 
     // negative case
@@ -1722,41 +2390,66 @@ fn create_external_table_with_compression_type() {
         "CREATE EXTERNAL TABLE t STORED AS ARROW LOCATION 'foo.arrow' OPTIONS ('format.compression' 'gzip')",
         "CREATE EXTERNAL TABLE t STORED AS ARROW LOCATION 'foo.arrow' OPTIONS ('format.compression' 'bzip2')",
     ];
-    for sql in sqls {
-        let err = logical_plan(sql).expect_err("query should have failed");
-        assert_eq!(
-            "Error during planning: File compression type cannot be set for PARQUET, AVRO, or ARROW files.",
-            err.strip_backtrace()
-        );
+
+    allow_duplicates! {
+        for sql in sqls {
+            let err = logical_plan(sql).expect_err("query should have failed");
+
+            assert_snapshot!(
+                err.strip_backtrace(),
+                @r#"
+                Error during planning: File compression type cannot be set for PARQUET, AVRO, or ARROW files.
+                "#
+            );
+
+        }
     }
 }
 
 #[test]
 fn create_external_table_parquet() {
     let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS PARQUET LOCATION 'foo.parquet'";
-    let expected = "CreateExternalTable: Bare { table: \"t\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "t" }
+"#
+    );
 }
 
 #[test]
 fn create_external_table_parquet_sort_order() {
     let sql = "create external table foo(a varchar, b varchar, c timestamp) stored as parquet location '/tmp/foo' with order (c)";
-    let expected = "CreateExternalTable: Bare { table: \"foo\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "foo" }
+"#
+    );
 }
 
 #[test]
 fn create_external_table_parquet_no_schema() {
     let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
-    let expected = "CreateExternalTable: Bare { table: \"t\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"CreateExternalTable: Bare { table: "t" }"#
+    );
 }
 
 #[test]
 fn create_external_table_parquet_no_schema_sort_order() {
     let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet' WITH ORDER (id)";
-    let expected = "CreateExternalTable: Bare { table: \"t\" }";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+CreateExternalTable: Bare { table: "t" }
+"#
+    );
 }
 
 #[test]
@@ -1765,11 +2458,16 @@ fn equijoin_explicit_syntax() {
             FROM person \
             JOIN orders \
             ON id = customer_id";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1778,12 +2476,16 @@ fn equijoin_with_condition() {
             FROM person \
             JOIN orders \
             ON id = customer_id AND order_id > 1 ";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1792,11 +2494,16 @@ fn left_equijoin_with_conditions() {
             FROM person \
             LEFT JOIN orders \
             ON id = customer_id AND order_id > 1 AND age < 30";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Left Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1) AND person.age < Int64(30)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Left Join:  Filter: person.id = orders.customer_id AND orders.order_id > Int64(1) AND person.age < Int64(30)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1805,12 +2512,16 @@ fn right_equijoin_with_conditions() {
             FROM person \
             RIGHT JOIN orders \
             ON id = customer_id AND id > 1 AND order_id < 100";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Right Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Right Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1819,11 +2530,16 @@ fn full_equijoin_with_conditions() {
             FROM person \
             FULL JOIN orders \
             ON id = customer_id AND id > 1 AND order_id < 100";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Full Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Full Join:  Filter: person.id = orders.customer_id AND person.id > Int64(1) AND orders.order_id < Int64(100)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1832,11 +2548,16 @@ fn join_with_table_name() {
             FROM person \
             JOIN orders \
             ON person.id = orders.customer_id";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -1845,12 +2566,17 @@ fn join_with_using() {
             FROM person \
             JOIN person as person2 \
             USING (id)";
-    let expected = "Projection: person.first_name, person.id\
-        \n  Inner Join: Using person.id = person2.id\
-        \n    TableScan: person\
-        \n    SubqueryAlias: person2\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.first_name, person.id
+  Inner Join: Using person.id = person2.id
+    TableScan: person
+    SubqueryAlias: person2
+      TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -1859,13 +2585,18 @@ fn equijoin_explicit_syntax_3_tables() {
             FROM person \
             JOIN orders ON id = customer_id \
             JOIN lineitem ON o_item_id = l_item_id";
-    let expected = "Projection: person.id, orders.order_id, lineitem.l_description\
-            \n  Inner Join:  Filter: orders.o_item_id = lineitem.l_item_id\
-            \n    Inner Join:  Filter: person.id = orders.customer_id\
-            \n      TableScan: person\
-            \n      TableScan: orders\
-            \n    TableScan: lineitem";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id, lineitem.l_description
+  Inner Join:  Filter: orders.o_item_id = lineitem.l_item_id
+    Inner Join:  Filter: person.id = orders.customer_id
+      TableScan: person
+      TableScan: orders
+    TableScan: lineitem
+"#
+    );
 }
 
 #[test]
@@ -1873,152 +2604,206 @@ fn boolean_literal_in_condition_expression() {
     let sql = "SELECT order_id \
         FROM orders \
         WHERE delivered = false OR delivered = true";
-    let expected = "Projection: orders.order_id\
-            \n  Filter: orders.delivered = Boolean(false) OR orders.delivered = Boolean(true)\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id
+  Filter: orders.delivered = Boolean(false) OR orders.delivered = Boolean(true)
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union() {
     let sql = "SELECT order_id from orders UNION SELECT order_id FROM orders";
-    let expected = "\
-        Distinct:\
-        \n  Union\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Distinct:
+  Union
+    Projection: orders.order_id
+      TableScan: orders
+    Projection: orders.order_id
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union_by_name_different_columns() {
     let sql = "SELECT order_id from orders UNION BY NAME SELECT order_id, 1 FROM orders";
-    let expected = "\
-        Distinct:\
-        \n  Union\
-        \n    Projection: order_id, NULL AS Int64(1)\
-        \n      Projection: orders.order_id\
-        \n        TableScan: orders\
-        \n    Projection: order_id, Int64(1)\
-        \n      Projection: orders.order_id, Int64(1)\
-        \n        TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Distinct:
+  Union
+    Projection: order_id, NULL AS Int64(1)
+      Projection: orders.order_id
+        TableScan: orders
+    Projection: order_id, Int64(1)
+      Projection: orders.order_id, Int64(1)
+        TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union_by_name_same_column_names() {
     let sql = "SELECT order_id from orders UNION SELECT order_id FROM orders";
-    let expected = "\
-        Distinct:\
-        \n  Union\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Distinct:
+  Union
+    Projection: orders.order_id
+      TableScan: orders
+    Projection: orders.order_id
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union_all() {
     let sql = "SELECT order_id from orders UNION ALL SELECT order_id FROM orders";
-    let expected = "Union\
-            \n  Projection: orders.order_id\
-            \n    TableScan: orders\
-            \n  Projection: orders.order_id\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Union
+  Projection: orders.order_id
+    TableScan: orders
+  Projection: orders.order_id
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union_all_by_name_different_columns() {
     let sql =
         "SELECT order_id from orders UNION ALL BY NAME SELECT order_id, 1 FROM orders";
-    let expected = "\
-        Union\
-        \n  Projection: order_id, NULL AS Int64(1)\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders\
-        \n  Projection: order_id, Int64(1)\
-        \n    Projection: orders.order_id, Int64(1)\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Union
+  Projection: order_id, NULL AS Int64(1)
+    Projection: orders.order_id
+      TableScan: orders
+  Projection: order_id, Int64(1)
+    Projection: orders.order_id, Int64(1)
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn union_all_by_name_same_column_names() {
     let sql = "SELECT order_id from orders UNION ALL BY NAME SELECT order_id FROM orders";
-    let expected = "\
-        Union\
-        \n  Projection: order_id\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders\
-        \n  Projection: order_id\
-        \n    Projection: orders.order_id\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Union
+  Projection: order_id
+    Projection: orders.order_id
+      TableScan: orders
+  Projection: order_id
+    Projection: orders.order_id
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over() {
     let sql = "SELECT order_id, MAX(order_id) OVER () from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over_with_alias() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER () max_oid from orders";
-    let expected = "\
-        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid\
-        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid
+  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over_dup_with_alias() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER () max_oid, MAX(order_id) OVER () max_oid_dup from orders";
-    let expected = "\
-        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup\
-        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS max_oid_dup
+  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over_dup_with_different_sort() {
     let sql = "SELECT order_id oid, MAX(order_id) OVER (), MAX(order_id) OVER (ORDER BY order_id) from orders";
-    let expected = "\
-        Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id AS oid, max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.order_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    WindowAggr: windowExpr=[[max(orders.order_id) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over_plus() {
     let sql = "SELECT order_id, MAX(qty * 1.1) OVER () from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[max(orders.qty * Float64(1.1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn empty_over_multiple() {
     let sql = "SELECT order_id, MAX(qty) OVER (), min(qty) over (), avg(qty) OVER () from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[max(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, avg(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2033,11 +2818,15 @@ fn empty_over_multiple() {
 #[test]
 fn over_partition_by() {
     let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2055,45 +2844,61 @@ fn over_partition_by() {
 #[test]
 fn over_order_by() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn over_order_by_with_window_frame_double_end() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS BETWEEN 3 PRECEDING and 3 FOLLOWING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING]]
+    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn over_order_by_with_window_frame_single_end() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id ROWS 3 PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] ROWS BETWEEN 3 PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn over_order_by_with_window_frame_single_end_groups() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id GROUPS 3 PRECEDING), MIN(qty) OVER (ORDER BY order_id DESC) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] GROUPS BETWEEN 3 PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2111,12 +2916,16 @@ fn over_order_by_with_window_frame_single_end_groups() {
 #[test]
 fn over_order_by_two_sort_keys() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), MIN(qty) OVER (ORDER BY (order_id + 1)) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id + Int64(1) ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2135,13 +2944,17 @@ fn over_order_by_two_sort_keys() {
 #[test]
 fn over_order_by_sort_keys_sorting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY qty, order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n        TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2158,13 +2971,17 @@ fn over_order_by_sort_keys_sorting() {
 #[test]
 fn over_order_by_sort_keys_sorting_prefix_compacting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n        TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2186,14 +3003,18 @@ fn over_order_by_sort_keys_sorting_prefix_compacting() {
 #[test]
 fn over_order_by_sort_keys_sorting_global_order_compacting() {
     let sql = "SELECT order_id, MAX(qty) OVER (ORDER BY qty, order_id), sum(qty) OVER (), MIN(qty) OVER (ORDER BY order_id, qty) from orders ORDER BY order_id";
-    let expected = "\
-        Sort: orders.order_id ASC NULLS LAST\
-        \n  Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n    WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n          TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: orders.order_id ASC NULLS LAST
+  Projection: orders.order_id, max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+    WindowAggr: windowExpr=[[sum(orders.qty) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+      WindowAggr: windowExpr=[[max(orders.qty) ORDER BY [orders.qty ASC NULLS LAST, orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        WindowAggr: windowExpr=[[min(orders.qty) ORDER BY [orders.order_id ASC NULLS LAST, orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+          TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2209,11 +3030,15 @@ fn over_order_by_sort_keys_sorting_global_order_compacting() {
 fn over_partition_by_order_by() {
     let sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2229,11 +3054,15 @@ fn over_partition_by_order_by() {
 fn over_partition_by_order_by_no_dup() {
     let sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2252,12 +3081,16 @@ fn over_partition_by_order_by_no_dup() {
 fn over_partition_by_order_by_mix_up() {
     let sql =
             "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id, qty ORDER BY qty), MIN(qty) OVER (PARTITION BY qty ORDER BY order_id) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.qty] ORDER BY [orders.order_id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 /// psql result
@@ -2275,90 +3108,121 @@ fn over_partition_by_order_by_mix_up() {
 fn over_partition_by_order_by_mix_up_prefix() {
     let sql =
             "SELECT order_id, MAX(qty) OVER (PARTITION BY order_id ORDER BY qty), MIN(qty) OVER (PARTITION BY order_id, qty ORDER BY price) from orders";
-    let expected = "\
-        Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW\
-        \n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n      TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ORDER BY [orders.qty ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    WindowAggr: windowExpr=[[min(orders.qty) PARTITION BY [orders.order_id, orders.qty] ORDER BY [orders.price ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn approx_median_window() {
     let sql =
         "SELECT order_id, APPROX_MEDIAN(qty) OVER(PARTITION BY order_id) from orders";
-    let expected = "\
-        Projection: orders.order_id, approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\
-        \n  WindowAggr: windowExpr=[[approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[approx_median(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn select_typed_date_string() {
     let sql = "SELECT date '2020-12-10' AS date";
-    let expected = "Projection: CAST(Utf8(\"2020-12-10\") AS Date32) AS date\
-            \n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: CAST(Utf8("2020-12-10") AS Date32) AS date
+  EmptyRelation
+"#
+    );
 }
 
 #[test]
 fn select_typed_time_string() {
     let sql = "SELECT TIME '08:09:10.123' AS time";
-    let expected =
-        "Projection: CAST(Utf8(\"08:09:10.123\") AS Time64(Nanosecond)) AS time\
-            \n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: CAST(Utf8("08:09:10.123") AS Time64(Nanosecond)) AS time
+  EmptyRelation
+"#
+    );
 }
 
 #[test]
 fn select_multibyte_column() {
     let sql = r#"SELECT "😀" FROM person"#;
-    let expected = "Projection: person.😀\
-            \n  TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.😀
+  TableScan: person
+"#
+    );
 }
 
 #[test]
 fn select_groupby_orderby() {
     // ensure that references are correctly resolved in the order by clause
     // see https://github.com/apache/datafusion/issues/4854
-    let sql = r#"SELECT
-  avg(age) AS "value",
-  date_trunc('month', birth_date) AS "birth_date"
-  FROM person GROUP BY birth_date ORDER BY birth_date;
-"#;
-    // expect that this is not an ambiguous reference
-    let expected =
-        "Sort: birth_date ASC NULLS LAST\
-         \n  Projection: avg(person.age) AS value, date_trunc(Utf8(\"month\"), person.birth_date) AS birth_date\
-         \n    Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]\
-         \n      TableScan: person";
-    quick_test(sql, expected);
-
-    // Use fully qualified `person.birth_date` as argument to date_trunc, plan should be the same
-    let sql = r#"SELECT
-  avg(age) AS "value",
-  date_trunc('month', person.birth_date) AS "birth_date"
-  FROM person GROUP BY birth_date ORDER BY birth_date;
-"#;
-    quick_test(sql, expected);
-
-    // Use fully qualified `person.birth_date` as group by, plan should be the same
-    let sql = r#"SELECT
-  avg(age) AS "value",
-  date_trunc('month', birth_date) AS "birth_date"
-  FROM person GROUP BY person.birth_date ORDER BY birth_date;
-"#;
-    quick_test(sql, expected);
 
-    // Use fully qualified `person.birth_date` in both group and date_trunc, plan should be the same
-    let sql = r#"SELECT
-  avg(age) AS "value",
-  date_trunc('month', person.birth_date) AS "birth_date"
-  FROM person GROUP BY person.birth_date ORDER BY birth_date;
-"#;
-    quick_test(sql, expected);
+    let sqls = vec![
+        r#"
+        SELECT
+            avg(age) AS "value",
+            date_trunc('month', birth_date) AS "birth_date"
+            FROM person GROUP BY birth_date ORDER BY birth_date;
+        "#,
+        // Use fully qualified `person.birth_date` as argument to date_trunc, plan should be the same
+        r#"
+        SELECT
+            avg(age) AS "value",
+            date_trunc('month', person.birth_date) AS "birth_date"
+            FROM person GROUP BY birth_date ORDER BY birth_date;
+        "#,
+        // Use fully qualified `person.birth_date` as group by, plan should be the same
+        r#"
+        SELECT
+            avg(age) AS "value",
+            date_trunc('month', birth_date) AS "birth_date"
+            FROM person GROUP BY person.birth_date ORDER BY birth_date;
+        "#,
+        // Use fully qualified `person.birth_date` in both group and date_trunc, plan should be the same
+        r#"
+        SELECT
+            avg(age) AS "value",
+            date_trunc('month', person.birth_date) AS "birth_date"
+            FROM person GROUP BY person.birth_date ORDER BY birth_date;
+        "#,
+    ];
+    for sql in sqls {
+        let plan = logical_plan(sql).unwrap();
+        allow_duplicates! {
+            assert_snapshot!(
+                plan,
+                // expect that this is not an ambiguous reference
+                @r#"
+        Sort: birth_date ASC NULLS LAST
+          Projection: avg(person.age) AS value, date_trunc(Utf8("month"), person.birth_date) AS birth_date
+            Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
+              TableScan: person
+        "#
+            );
+        }
+    }
 
     // Use columnized `avg(age)` in the order by
     let sql = r#"SELECT
@@ -2367,13 +3231,16 @@ fn select_groupby_orderby() {
   FROM person GROUP BY person.birth_date ORDER BY avg(age) + avg(age);
 "#;
 
-    let expected =
-        "Sort: avg(person.age) + avg(person.age) ASC NULLS LAST\
-        \n  Projection: avg(person.age) + avg(person.age), date_trunc(Utf8(\"month\"), person.birth_date) AS birth_date\
-        \n    Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]\
-        \n      TableScan: person";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: avg(person.age) + avg(person.age) ASC NULLS LAST
+  Projection: avg(person.age) + avg(person.age), date_trunc(Utf8("month"), person.birth_date) AS birth_date
+    Aggregate: groupBy=[[person.birth_date]], aggr=[[avg(person.age)]]
+      TableScan: person
+"#
+    );
 }
 
 fn logical_plan(sql: &str) -> Result<LogicalPlan> {
@@ -2488,114 +3355,149 @@ impl ScalarUDFImpl for DummyUDF {
     }
 }
 
-/// Create logical plan, write with formatter, compare to expected output
-fn quick_test(sql: &str, expected: &str) {
-    quick_test_with_options(sql, expected, ParserOptions::default())
+fn parse_decimals_parser_options() -> ParserOptions {
+    ParserOptions {
+        parse_float_as_decimal: true,
+        enable_ident_normalization: false,
+        support_varchar_with_length: false,
+        map_varchar_to_utf8view: false,
+        enable_options_value_normalization: false,
+        collect_spans: false,
+    }
 }
 
-fn quick_test_with_options(sql: &str, expected: &str, options: ParserOptions) {
-    let plan = logical_plan_with_options(sql, options).unwrap();
-    assert_eq!(format!("{plan}"), expected);
+fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions {
+    ParserOptions {
+        parse_float_as_decimal: true,
+        enable_ident_normalization: false,
+        support_varchar_with_length: false,
+        map_varchar_to_utf8view: false,
+        enable_options_value_normalization: false,
+        collect_spans: false,
+    }
 }
 
-fn prepare_stmt_quick_test(
-    sql: &str,
-    expected_plan: &str,
-    expected_data_types: &str,
-) -> LogicalPlan {
-    let plan = logical_plan(sql).unwrap();
-
-    let assert_plan = plan.clone();
-    // verify plan
-    assert_eq!(format!("{assert_plan}"), expected_plan);
-
-    // verify data types
-    if let LogicalPlan::Statement(Statement::Prepare(Prepare { data_types, .. })) =
-        assert_plan
-    {
-        let dt = format!("{data_types:?}");
-        assert_eq!(dt, expected_data_types);
+fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
+    ParserOptions {
+        parse_float_as_decimal: true,
+        enable_ident_normalization: true,
+        support_varchar_with_length: false,
+        map_varchar_to_utf8view: false,
+        enable_options_value_normalization: false,
+        collect_spans: false,
     }
-
-    plan
 }
 
-fn prepare_stmt_replace_params_quick_test(
-    plan: LogicalPlan,
-    param_values: impl Into<ParamValues>,
-    expected_plan: &str,
-) -> LogicalPlan {
-    // replace params
-    let plan = plan.with_param_values(param_values).unwrap();
-    assert_eq!(format!("{plan}"), expected_plan);
-
-    plan
+fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) {
+    let plan = logical_plan(sql).unwrap();
+    let data_types = match &plan {
+        LogicalPlan::Statement(Statement::Prepare(Prepare { data_types, .. })) => {
+            format!("{data_types:?}")
+        }
+        _ => panic!("Expected a Prepare statement"),
+    };
+    (plan, data_types)
 }
 
 #[test]
 fn select_partially_qualified_column() {
-    let sql = r#"SELECT person.first_name FROM public.person"#;
-    let expected = "Projection: public.person.first_name\
-            \n  TableScan: public.person";
-    quick_test(sql, expected);
+    let sql = "SELECT person.first_name FROM public.person";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: public.person.first_name
+  TableScan: public.person
+"#
+    );
 }
 
 #[test]
 fn cross_join_not_to_inner_join() {
     let sql =
         "select person.id from person, orders, lineitem where person.id = person.age;";
-    let expected = "Projection: person.id\
-                                    \n  Filter: person.id = person.age\
-                                    \n    Cross Join: \
-                                    \n      Cross Join: \
-                                    \n        TableScan: person\
-                                    \n        TableScan: orders\
-                                    \n      TableScan: lineitem";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id
+  Filter: person.id = person.age
+    Cross Join: 
+      Cross Join: 
+        TableScan: person
+        TableScan: orders
+      TableScan: lineitem
+"#
+    );
 }
 
 #[test]
 fn join_with_aliases() {
     let sql = "select peeps.id, folks.first_name from person as peeps join person as folks on peeps.id = folks.id";
-    let expected = "Projection: peeps.id, folks.first_name\
-            \n  Inner Join:  Filter: peeps.id = folks.id\
-            \n    SubqueryAlias: peeps\
-            \n      TableScan: person\
-            \n    SubqueryAlias: folks\
-            \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: peeps.id, folks.first_name
+  Inner Join:  Filter: peeps.id = folks.id
+    SubqueryAlias: peeps
+      TableScan: person
+    SubqueryAlias: folks
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn negative_interval_plus_interval_in_projection() {
     let sql = "select -interval '2 days' + interval '5 days';";
-    let expected =
-    "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }")
+  EmptyRelation
+"#
+    );
 }
 
 #[test]
 fn complex_interval_expression_in_projection() {
     let sql = "select -interval '2 days' + interval '5 days'+ (-interval '3 days' + interval '5 days');";
-    let expected =
-    "Projection: IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: -2, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: -3, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }")
+  EmptyRelation
+"#
+    );
 }
 
 #[test]
 fn negative_sum_intervals_in_projection() {
     let sql = "select -((interval '2 days' + interval '5 days') + -(interval '4 days' + interval '7 days'));";
-    let expected =
-    "Projection: (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\") + (- IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }\") + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }\")))\n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: (- IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 2, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }") + (- IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 4, nanoseconds: 0 }") + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 7, nanoseconds: 0 }")))
+  EmptyRelation
+"#
+    );
 }
 
 #[test]
 fn date_plus_interval_in_projection() {
     let sql = "select t_date32 + interval '5 days' FROM test";
-    let expected =
-        "Projection: test.t_date32 + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }\")\n  TableScan: test";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: test.t_date32 + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 5, nanoseconds: 0 }")
+  TableScan: test
+"#
+    );
 }
 
 #[test]
@@ -2604,11 +3506,15 @@ fn date_plus_interval_in_filter() {
                     WHERE t_date64 \
                     BETWEEN cast('1999-12-31' as date) \
                         AND cast('1999-12-31' as date) + interval '30 days'";
-    let expected =
-            "Projection: test.t_date64\
-            \n  Filter: test.t_date64 BETWEEN CAST(Utf8(\"1999-12-31\") AS Date32) AND CAST(Utf8(\"1999-12-31\") AS Date32) + IntervalMonthDayNano(\"IntervalMonthDayNano { months: 0, days: 30, nanoseconds: 0 }\")\
-            \n    TableScan: test";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: test.t_date64
+  Filter: test.t_date64 BETWEEN CAST(Utf8("1999-12-31") AS Date32) AND CAST(Utf8("1999-12-31") AS Date32) + IntervalMonthDayNano("IntervalMonthDayNano { months: 0, days: 30, nanoseconds: 0 }")
+    TableScan: test
+"#
+    );
 }
 
 #[test]
@@ -2617,16 +3523,20 @@ fn exists_subquery() {
             (SELECT first_name FROM person \
             WHERE last_name = p.last_name \
             AND state = p.state)";
-
-    let expected = "Projection: p.id\
-        \n  Filter: EXISTS (<subquery>)\
-        \n    Subquery:\
-        \n      Projection: person.first_name\
-        \n        Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)\
-        \n          TableScan: person\
-        \n    SubqueryAlias: p\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: p.id
+  Filter: EXISTS (<subquery>)
+    Subquery:
+      Projection: person.first_name
+        Filter: person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
+          TableScan: person
+    SubqueryAlias: p
+      TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -2638,68 +3548,84 @@ fn exists_subquery_schema_outer_schema_overlap() {
             WHERE person.id = p2.id \
             AND person.last_name = p.last_name \
             AND person.state = p.state)";
-
-    let expected = "Projection: person.id\
-        \n  Filter: person.id = p.id AND EXISTS (<subquery>)\
-        \n    Subquery:\
-        \n      Projection: person.first_name\
-        \n        Filter: person.id = p2.id AND person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)\
-        \n          Cross Join: \
-        \n            TableScan: person\
-        \n            SubqueryAlias: p2\
-        \n              TableScan: person\
-        \n    Cross Join: \
-        \n      TableScan: person\
-        \n      SubqueryAlias: p\
-        \n        TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id
+  Filter: person.id = p.id AND EXISTS (<subquery>)
+    Subquery:
+      Projection: person.first_name
+        Filter: person.id = p2.id AND person.last_name = outer_ref(p.last_name) AND person.state = outer_ref(p.state)
+          Cross Join: 
+            TableScan: person
+            SubqueryAlias: p2
+              TableScan: person
+    Cross Join: 
+      TableScan: person
+      SubqueryAlias: p
+        TableScan: person
+"#
+    );
 }
 
 #[test]
 fn in_subquery_uncorrelated() {
     let sql = "SELECT id FROM person p WHERE id IN \
             (SELECT id FROM person)";
-
-    let expected = "Projection: p.id\
-        \n  Filter: p.id IN (<subquery>)\
-        \n    Subquery:\
-        \n      Projection: person.id\
-        \n        TableScan: person\
-        \n    SubqueryAlias: p\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: p.id
+  Filter: p.id IN (<subquery>)
+    Subquery:
+      Projection: person.id
+        TableScan: person
+    SubqueryAlias: p
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn not_in_subquery_correlated() {
     let sql = "SELECT id FROM person p WHERE id NOT IN \
             (SELECT id FROM person WHERE last_name = p.last_name AND state = 'CO')";
-
-    let expected = "Projection: p.id\
-        \n  Filter: p.id NOT IN (<subquery>)\
-        \n    Subquery:\
-        \n      Projection: person.id\
-        \n        Filter: person.last_name = outer_ref(p.last_name) AND person.state = Utf8(\"CO\")\
-        \n          TableScan: person\
-        \n    SubqueryAlias: p\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: p.id
+  Filter: p.id NOT IN (<subquery>)
+    Subquery:
+      Projection: person.id
+        Filter: person.last_name = outer_ref(p.last_name) AND person.state = Utf8("CO")
+          TableScan: person
+    SubqueryAlias: p
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn scalar_subquery() {
     let sql =
         "SELECT p.id, (SELECT MAX(id) FROM person WHERE last_name = p.last_name) FROM person p";
-
-    let expected = "Projection: p.id, (<subquery>)\
-        \n  Subquery:\
-        \n    Projection: max(person.id)\
-        \n      Aggregate: groupBy=[[]], aggr=[[max(person.id)]]\
-        \n        Filter: person.last_name = outer_ref(p.last_name)\
-        \n          TableScan: person\
-        \n  SubqueryAlias: p\
-        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: p.id, (<subquery>)
+  Subquery:
+    Projection: max(person.id)
+      Aggregate: groupBy=[[]], aggr=[[max(person.id)]]
+        Filter: person.last_name = outer_ref(p.last_name)
+          TableScan: person
+  SubqueryAlias: p
+    TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -2711,41 +3637,54 @@ fn scalar_subquery_reference_outer_field() {
             FROM j1, j3 \
             WHERE j2_id = j1_id \
             AND j1_id = j3_id)";
-
-    let expected = "Projection: j1.j1_string, j2.j2_string\
-        \n  Filter: j1.j1_id = j2.j2_id - Int64(1) AND j2.j2_id < (<subquery>)\
-        \n    Subquery:\
-        \n      Projection: count(*)\
-        \n        Aggregate: groupBy=[[]], aggr=[[count(*)]]\
-        \n          Filter: outer_ref(j2.j2_id) = j1.j1_id AND j1.j1_id = j3.j3_id\
-        \n            Cross Join: \
-        \n              TableScan: j1\
-        \n              TableScan: j3\
-        \n    Cross Join: \
-        \n      TableScan: j1\
-        \n      TableScan: j2";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: j1.j1_string, j2.j2_string
+  Filter: j1.j1_id = j2.j2_id - Int64(1) AND j2.j2_id < (<subquery>)
+    Subquery:
+      Projection: count(*)
+        Aggregate: groupBy=[[]], aggr=[[count(*)]]
+          Filter: outer_ref(j2.j2_id) = j1.j1_id AND j1.j1_id = j3.j3_id
+            Cross Join: 
+              TableScan: j1
+              TableScan: j3
+    Cross Join: 
+      TableScan: j1
+      TableScan: j2
+"#
+    );
 }
 
 #[test]
 fn aggregate_with_rollup() {
     let sql =
         "SELECT id, state, age, count(*) FROM person GROUP BY id, ROLLUP (state, age)";
-    let expected = "Projection: person.id, person.state, person.age, count(*)\
-    \n  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[count(*)]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.state, person.age, count(*)
+  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[count(*)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn aggregate_with_rollup_with_grouping() {
     let sql = "SELECT id, state, age, grouping(state), grouping(age), grouping(state) + grouping(age), count(*) \
         FROM person GROUP BY id, ROLLUP (state, age)";
-    let expected = "Projection: person.id, person.state, person.age, grouping(person.state), grouping(person.age), grouping(person.state) + grouping(person.age), count(*)\
-    \n  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[grouping(person.state), grouping(person.age), count(*)]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.state, person.age, grouping(person.state), grouping(person.age), grouping(person.state) + grouping(person.age), count(*)
+  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.state, person.age))]], aggr=[[grouping(person.state), grouping(person.age), count(*)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -2763,38 +3702,58 @@ fn rank_partition_grouping() {
             from
                 person
             group by rollup(state, last_name)";
-    let expected = "Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank\
-        \n  WindowAggr: windowExpr=[[rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
-        \n    Aggregate: groupBy=[[ROLLUP (person.state, person.last_name)]], aggr=[[sum(person.age), grouping(person.state), grouping(person.last_name)]]\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: sum(person.age) AS total_sum, person.state, person.last_name, grouping(person.state) + grouping(person.last_name) AS x, rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS the_rank
+  WindowAggr: windowExpr=[[rank() PARTITION BY [grouping(person.state) + grouping(person.last_name), CASE WHEN grouping(person.last_name) = Int64(0) THEN person.state END] ORDER BY [sum(person.age) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    Aggregate: groupBy=[[ROLLUP (person.state, person.last_name)]], aggr=[[sum(person.age), grouping(person.state), grouping(person.last_name)]]
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn aggregate_with_cube() {
     let sql =
         "SELECT id, state, age, count(*) FROM person GROUP BY id, CUBE (state, age)";
-    let expected = "Projection: person.id, person.state, person.age, count(*)\
-    \n  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.age), (person.id, person.state, person.age))]], aggr=[[count(*)]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.state, person.age, count(*)
+  Aggregate: groupBy=[[GROUPING SETS ((person.id), (person.id, person.state), (person.id, person.age), (person.id, person.state, person.age))]], aggr=[[count(*)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn round_decimal() {
     let sql = "SELECT round(price/3, 2) FROM test_decimal";
-    let expected = "Projection: round(test_decimal.price / Int64(3), Int64(2))\
-        \n  TableScan: test_decimal";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: round(test_decimal.price / Int64(3), Int64(2))
+  TableScan: test_decimal
+"#
+    );
 }
 
 #[test]
 fn aggregate_with_grouping_sets() {
     let sql = "SELECT id, state, age, count(*) FROM person GROUP BY id, GROUPING SETS ((state), (state, age), (id, state))";
-    let expected = "Projection: person.id, person.state, person.age, count(*)\
-    \n  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.state), (person.id, person.state, person.age), (person.id, person.id, person.state))]], aggr=[[count(*)]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.state, person.age, count(*)
+  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.state), (person.id, person.state, person.age), (person.id, person.id, person.state))]], aggr=[[count(*)]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -2802,11 +3761,16 @@ fn join_on_disjunction_condition() {
     let sql = "SELECT id, order_id \
             FROM person \
             JOIN orders ON id = customer_id OR person.age > 30";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id OR person.age > Int64(30)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id OR person.age > Int64(30)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -2814,11 +3778,16 @@ fn join_on_complex_condition() {
     let sql = "SELECT id, order_id \
             FROM person \
             JOIN orders ON id = customer_id AND (person.age > 30 OR person.last_name = 'X')";
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id AND (person.age > Int64(30) OR person.last_name = Utf8(\"X\"))\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id AND (person.age > Int64(30) OR person.last_name = Utf8("X"))
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -2826,11 +3795,16 @@ fn hive_aggregate_with_filter() -> Result<()> {
     let dialect = &HiveDialect {};
     let sql = "SELECT sum(age) FILTER (WHERE age > 4) FROM person";
     let plan = logical_plan_with_dialect(sql, dialect)?;
-    let expected = "Projection: sum(person.age) FILTER (WHERE person.age > Int64(4))\
-        \n  Aggregate: groupBy=[[]], aggr=[[sum(person.age) FILTER (WHERE person.age > Int64(4))]]\
-        \n    TableScan: person"
-        .to_string();
-    assert_eq!(plan.display_indent().to_string(), expected);
+
+    assert_snapshot!(
+        plan,
+        @r##"
+        Projection: sum(person.age) FILTER (WHERE person.age > Int64(4))
+          Aggregate: groupBy=[[]], aggr=[[sum(person.age) FILTER (WHERE person.age > Int64(4))]]
+            TableScan: person
+        "##
+    );
+
     Ok(())
 }
 
@@ -2841,84 +3815,130 @@ fn order_by_unaliased_name() {
     // SchemaError(FieldNotFound { qualifier: Some("p"), name: "state", valid_fields: ["z", "q"] })
     let sql =
         "select p.state z, sum(age) q from person p group by p.state order by p.state";
-    let expected = "Projection: z, q\
-        \n  Sort: p.state ASC NULLS LAST\
-        \n    Projection: p.state AS z, sum(p.age) AS q, p.state\
-        \n      Aggregate: groupBy=[[p.state]], aggr=[[sum(p.age)]]\
-        \n        SubqueryAlias: p\
-        \n          TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: z, q
+  Sort: p.state ASC NULLS LAST
+    Projection: p.state AS z, sum(p.age) AS q, p.state
+      Aggregate: groupBy=[[p.state]], aggr=[[sum(p.age)]]
+        SubqueryAlias: p
+          TableScan: person
+"#
+    );
 }
 
 #[test]
 fn order_by_ambiguous_name() {
     let sql = "select * from person a join person b using (id) order by age";
-    let expected = "Schema error: Ambiguous reference to unqualified field age";
+    let err = logical_plan(sql).unwrap_err().strip_backtrace();
 
-    let err = logical_plan(sql).unwrap_err();
-    assert_eq!(err.strip_backtrace(), expected);
+    assert_snapshot!(
+        err,
+        @r###"
+        Schema error: Ambiguous reference to unqualified field age
+        "###
+    );
 }
 
 #[test]
 fn group_by_ambiguous_name() {
     let sql = "select max(id) from person a join person b using (id) group by age";
-    let expected = "Schema error: Ambiguous reference to unqualified field age";
+    let err = logical_plan(sql).unwrap_err().strip_backtrace();
 
-    let err = logical_plan(sql).unwrap_err();
-    assert_eq!(err.strip_backtrace(), expected);
+    assert_snapshot!(
+        err,
+        @r###"
+        Schema error: Ambiguous reference to unqualified field age
+        "###
+    );
 }
 
 #[test]
 fn test_zero_offset_with_limit() {
     let sql = "select id from person where person.id > 100 LIMIT 5 OFFSET 0;";
-    let expected = "Limit: skip=0, fetch=5\
-                                    \n  Projection: person.id\
-                                    \n    Filter: person.id > Int64(100)\
-                                    \n      TableScan: person";
-    quick_test(sql, expected);
-
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Limit: skip=0, fetch=5
+  Projection: person.id
+    Filter: person.id > Int64(100)
+      TableScan: person
+"#
+    );
     // Flip the order of LIMIT and OFFSET in the query. Plan should remain the same.
     let sql = "SELECT id FROM person WHERE person.id > 100 OFFSET 0 LIMIT 5;";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Limit: skip=0, fetch=5
+  Projection: person.id
+    Filter: person.id > Int64(100)
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_offset_no_limit() {
     let sql = "SELECT id FROM person WHERE person.id > 100 OFFSET 5;";
-    let expected = "Limit: skip=5, fetch=None\
-        \n  Projection: person.id\
-        \n    Filter: person.id > Int64(100)\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Limit: skip=5, fetch=None
+  Projection: person.id
+    Filter: person.id > Int64(100)
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_offset_after_limit() {
     let sql = "select id from person where person.id > 100 LIMIT 5 OFFSET 3;";
-    let expected = "Limit: skip=3, fetch=5\
-        \n  Projection: person.id\
-        \n    Filter: person.id > Int64(100)\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Limit: skip=3, fetch=5
+  Projection: person.id
+    Filter: person.id > Int64(100)
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_offset_before_limit() {
     let sql = "select id from person where person.id > 100 OFFSET 3 LIMIT 5;";
-    let expected = "Limit: skip=3, fetch=5\
-        \n  Projection: person.id\
-        \n    Filter: person.id > Int64(100)\
-        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Limit: skip=3, fetch=5
+  Projection: person.id
+    Filter: person.id > Int64(100)
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_distribute_by() {
     let sql = "select id from person distribute by state";
-    let expected = "Repartition: DistributeBy(person.state)\
-        \n  Projection: person.id\
-        \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Repartition: DistributeBy(person.state)
+  Projection: person.id
+    TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -2946,12 +3966,16 @@ fn test_constant_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON person.id = 10";
-
-    let expected = "Projection: person.id, orders.order_id\
-        \n  Inner Join:  Filter: person.id = Int64(10)\
-        \n    TableScan: person\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -2960,13 +3984,16 @@ fn test_right_left_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON orders.customer_id * 2 = person.id + 10";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -2975,12 +4002,16 @@ fn test_single_column_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON person.id + 10 = orders.customer_id * 2";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id + Int64(10) = orders.customer_id * Int64(2)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id + Int64(10) = orders.customer_id * Int64(2)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -2989,12 +4020,16 @@ fn test_multiple_column_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON person.id + person.age + 10 = orders.customer_id * 2 - orders.price";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id * Int64(2) - orders.price\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id * Int64(2) - orders.price
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3003,12 +4038,16 @@ fn test_left_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON person.id + person.age + 10 = orders.customer_id";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id + person.age + Int64(10) = orders.customer_id
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3017,12 +4056,16 @@ fn test_right_expr_eq_join() {
             FROM person \
             INNER JOIN orders \
             ON person.id = orders.customer_id * 2 - orders.price";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Inner Join:  Filter: person.id = orders.customer_id * Int64(2) - orders.price\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Inner Join:  Filter: person.id = orders.customer_id * Int64(2) - orders.price
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3031,38 +4074,58 @@ fn test_noneq_with_filter_join() {
     let sql = "SELECT person.id, person.first_name \
         FROM person INNER JOIN orders \
         ON person.age > 10";
-    let expected = "Projection: person.id, person.first_name\
-        \n  Inner Join:  Filter: person.age > Int64(10)\
-        \n    TableScan: person\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.first_name
+  Inner Join:  Filter: person.age > Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
     // left join
     let sql = "SELECT person.id, person.first_name \
         FROM person LEFT JOIN orders \
         ON person.age > 10";
-    let expected = "Projection: person.id, person.first_name\
-        \n  Left Join:  Filter: person.age > Int64(10)\
-        \n    TableScan: person\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.first_name
+  Left Join:  Filter: person.age > Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
     // right join
     let sql = "SELECT person.id, person.first_name \
         FROM person RIGHT JOIN orders \
         ON person.age > 10";
-    let expected = "Projection: person.id, person.first_name\
-        \n  Right Join:  Filter: person.age > Int64(10)\
-        \n    TableScan: person\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.first_name
+  Right Join:  Filter: person.age > Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
     // full join
     let sql = "SELECT person.id, person.first_name \
         FROM person FULL JOIN orders \
         ON person.age > 10";
-    let expected = "Projection: person.id, person.first_name\
-        \n  Full Join:  Filter: person.age > Int64(10)\
-        \n    TableScan: person\
-        \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.first_name
+  Full Join:  Filter: person.age > Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3073,12 +4136,16 @@ fn test_one_side_constant_full_join() {
             FROM person \
             FULL OUTER JOIN orders \
             ON person.id = 10";
-
-    let expected = "Projection: person.id, orders.order_id\
-            \n  Full Join:  Filter: person.id = Int64(10)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, orders.order_id
+  Full Join:  Filter: person.id = Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3087,34 +4154,48 @@ fn test_select_join_key_inner_join() {
             FROM person
             INNER JOIN orders
             ON orders.customer_id * 2 = person.id + 10";
-
-    let expected = "Projection: orders.customer_id * Int64(2), person.id + Int64(10)\
-            \n  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.customer_id * Int64(2), person.id + Int64(10)
+  Inner Join:  Filter: orders.customer_id * Int64(2) = person.id + Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn test_select_order_by() {
     let sql = "SELECT '1' from person order by id";
-
-    let expected = "Projection: Utf8(\"1\")\n  Sort: person.id ASC NULLS LAST\n    Projection: Utf8(\"1\"), person.id\n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: Utf8("1")
+  Sort: person.id ASC NULLS LAST
+    Projection: Utf8("1"), person.id
+      TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_select_distinct_order_by() {
     let sql = "SELECT distinct '1' from person order by id";
 
-    let expected =
-        "Error during planning: For SELECT DISTINCT, ORDER BY expressions person.id must appear in select list";
-
     // It should return error.
     let result = logical_plan(sql);
     assert!(result.is_err());
-    let err = result.err().unwrap();
-    assert_eq!(err.strip_backtrace(), expected);
+    let err = result.err().unwrap().strip_backtrace();
+
+    assert_snapshot!(
+        err,
+        @r###"
+        Error during planning: For SELECT DISTINCT, ORDER BY expressions person.id must appear in select list
+        "###
+    );
 }
 
 #[rstest]
@@ -3148,11 +4229,16 @@ fn test_select_unsupported_syntax_errors(#[case] sql: &str, #[case] error: &str)
 fn select_order_by_with_cast() {
     let sql =
         "SELECT first_name AS first_name FROM (SELECT first_name AS first_name FROM person) ORDER BY CAST(first_name as INT)";
-    let expected = "Sort: CAST(person.first_name AS Int32) ASC NULLS LAST\
-                        \n  Projection: person.first_name\
-                        \n    Projection: person.first_name\
-                        \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Sort: CAST(person.first_name AS Int32) ASC NULLS LAST
+  Projection: person.first_name
+    Projection: person.first_name
+      TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -3173,12 +4259,16 @@ fn test_duplicated_left_join_key_inner_join() {
             FROM person
             INNER JOIN orders
             ON person.id * 2 = orders.customer_id + 10 and person.id * 2 = orders.order_id";
-
-    let expected = "Projection: person.id, person.age\
-            \n  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id * Int64(2) = orders.order_id\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age
+  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id * Int64(2) = orders.order_id
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3188,12 +4278,16 @@ fn test_duplicated_right_join_key_inner_join() {
             FROM person
             INNER JOIN orders
             ON person.id * 2 = orders.customer_id + 10 and person.id =  orders.customer_id + 10";
-
-    let expected = "Projection: person.id, person.age\
-            \n  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id = orders.customer_id + Int64(10)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age
+  Inner Join:  Filter: person.id * Int64(2) = orders.customer_id + Int64(10) AND person.id = orders.customer_id + Int64(10)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3203,13 +4297,17 @@ fn test_ambiguous_column_references_in_on_join() {
             INNER JOIN person as p2
             ON id = 1";
 
-    let expected = "Schema error: Ambiguous reference to unqualified field id";
-
     // It should return error.
     let result = logical_plan(sql);
     assert!(result.is_err());
-    let err = result.err().unwrap();
-    assert_eq!(err.strip_backtrace(), expected);
+    let err = result.err().unwrap().strip_backtrace();
+
+    assert_snapshot!(
+        err,
+        @r###"
+        Schema error: Ambiguous reference to unqualified field id
+        "###
+    );
 }
 
 #[test]
@@ -3218,14 +4316,18 @@ fn test_ambiguous_column_references_with_in_using_join() {
             from person as p1
             INNER JOIN person as p2
             using(id)";
-
-    let expected = "Projection: p1.id, p1.age, p2.id\
-            \n  Inner Join: Using p1.id = p2.id\
-            \n    SubqueryAlias: p1\
-            \n      TableScan: person\
-            \n    SubqueryAlias: p2\
-            \n      TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: p1.id, p1.age, p2.id
+  Inner Join: Using p1.id = p2.id
+    SubqueryAlias: p1
+      TableScan: person
+    SubqueryAlias: p2
+      TableScan: person
+"#
+    );
 }
 
 #[test]
@@ -3233,9 +4335,12 @@ fn test_prepare_statement_to_plan_panic_param_format() {
     // param is not number following the $ sign
     // panic due to error returned from the parser
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = $foo";
-    assert_eq!(
+
+    assert_snapshot!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        "Error during planning: Invalid placeholder, not a number: $foo"
+        @r###"
+        Error during planning: Invalid placeholder, not a number: $foo
+        "###
     );
 }
 
@@ -3244,9 +4349,12 @@ fn test_prepare_statement_to_plan_panic_param_zero() {
     // param is zero following the $ sign
     // panic due to error returned from the parser
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = $0";
-    assert_eq!(
+
+    assert_snapshot!(
         logical_plan(sql).unwrap_err().strip_backtrace(),
-        "Error during planning: Invalid placeholder, zero is not a valid index: $0"
+        @r###"
+        Error during planning: Invalid placeholder, zero is not a valid index: $0
+        "###
     );
 }
 
@@ -3264,8 +4372,12 @@ fn test_prepare_statement_to_plan_panic_prepare_wrong_syntax() {
 #[test]
 fn test_prepare_statement_to_plan_panic_no_relation_and_constant_param() {
     let sql = "PREPARE my_plan(INT) AS SELECT id + $1";
-    let expected = "Schema error: No field named id.";
-    assert_eq!(logical_plan(sql).unwrap_err().strip_backtrace(), expected);
+
+    let plan = logical_plan(sql).unwrap_err().strip_backtrace();
+    assert_snapshot!(
+        plan,
+        @r"Schema error: No field named id."
+    );
 }
 
 #[test]
@@ -3307,46 +4419,58 @@ fn test_prepare_statement_to_plan_panic_is_param() {
 fn test_prepare_statement_to_plan_no_param() {
     // no embedded parameter but still declare it
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = 10";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32] \
-        \n  Projection: person.id, person.age\
-        \n    Filter: person.age = Int64(10)\
-        \n      TableScan: person";
-
-    let expected_dt = "[Int32]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32] 
+      Projection: person.id, person.age
+        Filter: person.age = Int64(10)
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32]"#);
 
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan = "Projection: person.id, person.age\
-        \n  Filter: person.age = Int64(10)\
-        \n    TableScan: person";
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = Int64(10)
+        TableScan: person
+    "
+    );
 
     //////////////////////////////////////////
     // no embedded parameter and no declare it
     let sql = "PREPARE my_plan AS SELECT id, age  FROM person WHERE age = 10";
-
-    let expected_plan = "Prepare: \"my_plan\" [] \
-        \n  Projection: person.id, person.age\
-        \n    Filter: person.age = Int64(10)\
-        \n      TableScan: person";
-
-    let expected_dt = "[]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [] 
+      Projection: person.id, person.age
+        Filter: person.age = Int64(10)
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[]"#);
 
     ///////////////////
     // replace params with values
     let param_values: Vec<ScalarValue> = vec![];
-    let expected_plan = "Projection: person.id, person.age\
-        \n  Filter: person.age = Int64(10)\
-        \n    TableScan: person";
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = Int64(10)
+        TableScan: person
+    "
+    );
 }
 
 #[test]
@@ -3356,12 +4480,14 @@ fn test_prepare_statement_to_plan_one_param_no_value_panic() {
     let plan = logical_plan(sql).unwrap();
     // declare 1 param but provide 0
     let param_values: Vec<ScalarValue> = vec![];
-    assert_eq!(
+
+    assert_snapshot!(
         plan.with_param_values(param_values)
-            .unwrap_err()
-            .strip_backtrace(),
-        "Error during planning: Expected 1 parameters, got 0"
-    );
+        .unwrap_err()
+        .strip_backtrace(),
+        @r###"
+        Error during planning: Expected 1 parameters, got 0
+        "###);
 }
 
 #[test]
@@ -3371,11 +4497,14 @@ fn test_prepare_statement_to_plan_one_param_one_value_different_type_panic() {
     let plan = logical_plan(sql).unwrap();
     // declare 1 param but provide 0
     let param_values = vec![ScalarValue::Float64(Some(20.0))];
-    assert_eq!(
+
+    assert_snapshot!(
         plan.with_param_values(param_values)
             .unwrap_err()
             .strip_backtrace(),
-        "Error during planning: Expected parameter of type Int32, got Float64 at index 0"
+        @r###"
+        Error during planning: Expected parameter of type Int32, got Float64 at index 0
+        "###
     );
 }
 
@@ -3386,56 +4515,80 @@ fn test_prepare_statement_to_plan_no_param_on_value_panic() {
     let plan = logical_plan(sql).unwrap();
     // declare 1 param but provide 0
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    assert_eq!(
+
+    assert_snapshot!(
         plan.with_param_values(param_values)
             .unwrap_err()
             .strip_backtrace(),
-        "Error during planning: Expected 0 parameters, got 1"
+        @r###"
+        Error during planning: Expected 0 parameters, got 1
+        "###
     );
 }
 
 #[test]
 fn test_prepare_statement_to_plan_params_as_constants() {
     let sql = "PREPARE my_plan(INT) AS SELECT $1";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32] \
-        \n  Projection: $1\n    EmptyRelation";
-    let expected_dt = "[Int32]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32] 
+      Projection: $1
+        EmptyRelation
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32]"#);
 
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan = "Projection: Int32(10) AS $1\n  EmptyRelation";
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: Int32(10) AS $1
+      EmptyRelation
+    "
+    );
 
     ///////////////////////////////////////
     let sql = "PREPARE my_plan(INT) AS SELECT 1 + $1";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32] \
-        \n  Projection: Int64(1) + $1\n    EmptyRelation";
-    let expected_dt = "[Int32]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32] 
+      Projection: Int64(1) + $1
+        EmptyRelation
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32]"#);
 
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan =
-        "Projection: Int64(1) + Int32(10) AS Int64(1) + $1\n  EmptyRelation";
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: Int64(1) + Int32(10) AS Int64(1) + $1
+      EmptyRelation
+    "
+    );
 
     ///////////////////////////////////////
     let sql = "PREPARE my_plan(INT, DOUBLE) AS SELECT 1 + $1 + $2";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32, Float64] \
-        \n  Projection: Int64(1) + $1 + $2\n    EmptyRelation";
-    let expected_dt = "[Int32, Float64]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32, Float64] 
+      Projection: Int64(1) + $1 + $2
+        EmptyRelation
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32, Float64]"#);
 
     ///////////////////
     // replace params with values
@@ -3443,91 +4596,95 @@ fn test_prepare_statement_to_plan_params_as_constants() {
         ScalarValue::Int32(Some(10)),
         ScalarValue::Float64(Some(10.0)),
     ];
-    let expected_plan =
-        "Projection: Int64(1) + Int32(10) + Float64(10) AS Int64(1) + $1 + $2\
-        \n  EmptyRelation";
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: Int64(1) + Int32(10) + Float64(10) AS Int64(1) + $1 + $2
+      EmptyRelation
+    "
+    );
 }
 
 #[test]
-fn test_prepare_statement_infer_types_from_join() {
+fn test_infer_types_from_join() {
     let sql =
         "SELECT id, order_id FROM person JOIN orders ON id = customer_id and age = $1";
 
-    let expected_plan = r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id AND person.age = $1
-    TableScan: person
-    TableScan: orders
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id AND person.age = $1
+        TableScan: person
+        TableScan: orders
     "#
-    .trim();
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([("$1".to_string(), Some(DataType::Int32))]);
     assert_eq!(actual_types, expected_types);
 
     // replace params with values
-    let param_values = vec![ScalarValue::Int32(Some(10))].into();
-    let expected_plan = r#"
-Projection: person.id, orders.order_id
-  Inner Join:  Filter: person.id = orders.customer_id AND person.age = Int32(10)
-    TableScan: person
-    TableScan: orders
-    "#
-    .trim();
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let param_values = vec![ScalarValue::Int32(Some(10))];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, orders.order_id
+      Inner Join:  Filter: person.id = orders.customer_id AND person.age = Int32(10)
+        TableScan: person
+        TableScan: orders
+    "
+    );
 }
 
 #[test]
-fn test_prepare_statement_infer_types_from_predicate() {
+fn test_infer_types_from_predicate() {
     let sql = "SELECT id, age FROM person WHERE age = $1";
-
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age = $1
-    TableScan: person
-        "#
-    .trim();
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: person.id, person.age
+      Filter: person.age = $1
+        TableScan: person
+    "#
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([("$1".to_string(), Some(DataType::Int32))]);
     assert_eq!(actual_types, expected_types);
 
     // replace params with values
-    let param_values = vec![ScalarValue::Int32(Some(10))].into();
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age = Int32(10)
-    TableScan: person
-        "#
-    .trim();
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let param_values = vec![ScalarValue::Int32(Some(10))];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = Int32(10)
+        TableScan: person
+    "
+    );
 }
 
 #[test]
-fn test_prepare_statement_infer_types_from_between_predicate() {
+fn test_infer_types_from_between_predicate() {
     let sql = "SELECT id, age FROM person WHERE age BETWEEN $1 AND $2";
 
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age BETWEEN $1 AND $2
-    TableScan: person
-        "#
-    .trim();
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: person.id, person.age
+      Filter: person.age BETWEEN $1 AND $2
+        TableScan: person
+    "#
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([
@@ -3537,74 +4694,75 @@ Projection: person.id, person.age
     assert_eq!(actual_types, expected_types);
 
     // replace params with values
-    let param_values =
-        vec![ScalarValue::Int32(Some(10)), ScalarValue::Int32(Some(30))].into();
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age BETWEEN Int32(10) AND Int32(30)
-    TableScan: person
-        "#
-    .trim();
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let param_values = vec![ScalarValue::Int32(Some(10)), ScalarValue::Int32(Some(30))];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age BETWEEN Int32(10) AND Int32(30)
+        TableScan: person
+    "
+    );
 }
 
 #[test]
-fn test_prepare_statement_infer_types_subquery() {
+fn test_infer_types_subquery() {
     let sql = "SELECT id, age FROM person WHERE age = (select max(age) from person where id = $1)";
 
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age = (<subquery>)
-    Subquery:
-      Projection: max(person.age)
-        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
-          Filter: person.id = $1
-            TableScan: person
-    TableScan: person
-        "#
-    .trim();
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: person.id, person.age
+      Filter: person.age = (<subquery>)
+        Subquery:
+          Projection: max(person.age)
+            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+              Filter: person.id = $1
+                TableScan: person
+        TableScan: person
+    "#
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([("$1".to_string(), Some(DataType::UInt32))]);
     assert_eq!(actual_types, expected_types);
 
     // replace params with values
-    let param_values = vec![ScalarValue::UInt32(Some(10))].into();
-    let expected_plan = r#"
-Projection: person.id, person.age
-  Filter: person.age = (<subquery>)
-    Subquery:
-      Projection: max(person.age)
-        Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
-          Filter: person.id = UInt32(10)
-            TableScan: person
-    TableScan: person
-        "#
-    .trim();
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let param_values = vec![ScalarValue::UInt32(Some(10))];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = (<subquery>)
+        Subquery:
+          Projection: max(person.age)
+            Aggregate: groupBy=[[]], aggr=[[max(person.age)]]
+              Filter: person.id = UInt32(10)
+                TableScan: person
+        TableScan: person
+        "
+    );
 }
 
 #[test]
-fn test_prepare_statement_update_infer() {
+fn test_update_infer() {
     let sql = "update person set age=$1 where id=$2";
 
-    let expected_plan = r#"
-Dml: op=[Update] table=[person]
-  Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, $1 AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
-    Filter: person.id = $2
-      TableScan: person
-        "#
-        .trim();
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Dml: op=[Update] table=[person]
+      Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, $1 AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
+        Filter: person.id = $2
+          TableScan: person
+    "#
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([
@@ -3614,32 +4772,32 @@ Dml: op=[Update] table=[person]
     assert_eq!(actual_types, expected_types);
 
     // replace params with values
-    let param_values =
-        vec![ScalarValue::Int32(Some(42)), ScalarValue::UInt32(Some(1))].into();
-    let expected_plan = r#"
-Dml: op=[Update] table=[person]
-  Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, Int32(42) AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
-    Filter: person.id = UInt32(1)
-      TableScan: person
-        "#
-        .trim();
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let param_values = vec![ScalarValue::Int32(Some(42)), ScalarValue::UInt32(Some(1))];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Dml: op=[Update] table=[person]
+      Projection: person.id AS id, person.first_name AS first_name, person.last_name AS last_name, Int32(42) AS age, person.state AS state, person.salary AS salary, person.birth_date AS birth_date, person.😀 AS 😀
+        Filter: person.id = UInt32(1)
+          TableScan: person
+        "
+    );
 }
 
 #[test]
-fn test_prepare_statement_insert_infer() {
+fn test_insert_infer() {
     let sql = "insert into person (id, first_name, last_name) values ($1, $2, $3)";
-
-    let expected_plan = "Dml: op=[Insert Into] table=[person]\
-                        \n  Projection: column1 AS id, column2 AS first_name, column3 AS last_name, \
-                                    CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, \
-                                    CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀\
-                        \n    Values: ($1, $2, $3)";
-
-    let expected_dt = "[Int32]";
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Dml: op=[Insert Into] table=[person]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Values: ($1, $2, $3)
+    "#
+    );
 
     let actual_types = plan.get_parameter_types().unwrap();
     let expected_types = HashMap::from([
@@ -3654,64 +4812,79 @@ fn test_prepare_statement_insert_infer() {
         ScalarValue::UInt32(Some(1)),
         ScalarValue::from("Alan"),
         ScalarValue::from("Turing"),
-    ]
-    .into();
-    let expected_plan = "Dml: op=[Insert Into] table=[person]\
-                        \n  Projection: column1 AS id, column2 AS first_name, column3 AS last_name, \
-                                    CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, \
-                                    CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀\
-                        \n    Values: (UInt32(1) AS $1, Utf8(\"Alan\") AS $2, Utf8(\"Turing\") AS $3)";
-    let plan = plan.replace_params_with_values(&param_values).unwrap();
-
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    ];
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r#"
+    Dml: op=[Insert Into] table=[person]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
+    "#
+    );
 }
 
 #[test]
 fn test_prepare_statement_to_plan_one_param() {
     let sql = "PREPARE my_plan(INT) AS SELECT id, age  FROM person WHERE age = $1";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32] \
-        \n  Projection: person.id, person.age\
-        \n    Filter: person.age = $1\
-        \n      TableScan: person";
-
-    let expected_dt = "[Int32]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32] 
+      Projection: person.id, person.age
+        Filter: person.age = $1
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32]"#);
 
     ///////////////////
     // replace params with values
     let param_values = vec![ScalarValue::Int32(Some(10))];
-    let expected_plan = "Projection: person.id, person.age\
-        \n  Filter: person.age = Int32(10)\
-        \n    TableScan: person";
 
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = Int32(10)
+        TableScan: person
+    "
+    );
 }
 
 #[test]
 fn test_prepare_statement_to_plan_data_type() {
     let sql = "PREPARE my_plan(DOUBLE) AS SELECT id, age  FROM person WHERE age = $1";
 
-    // age is defined as Int32 but prepare statement declares it as DOUBLE/Float64
-    // Prepare statement and its logical plan should be created successfully
-    let expected_plan = "Prepare: \"my_plan\" [Float64] \
-        \n  Projection: person.id, person.age\
-        \n    Filter: person.age = $1\
-        \n      TableScan: person";
-
-    let expected_dt = "[Float64]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        // age is defined as Int32 but prepare statement declares it as DOUBLE/Float64
+        // Prepare statement and its logical plan should be created successfully
+        @r#"
+    Prepare: "my_plan" [Float64] 
+      Projection: person.id, person.age
+        Filter: person.age = $1
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Float64]"#);
 
     ///////////////////
     // replace params with values still succeed and use Float64
     let param_values = vec![ScalarValue::Float64(Some(10.0))];
-    let expected_plan = "Projection: person.id, person.age\
-        \n  Filter: person.age = Float64(10)\
-        \n    TableScan: person";
 
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r"
+    Projection: person.id, person.age
+      Filter: person.age = Float64(10)
+        TableScan: person
+    "
+    );
 }
 
 #[test]
@@ -3720,15 +4893,17 @@ fn test_prepare_statement_to_plan_multi_params() {
         SELECT id, age, $6
         FROM person
         WHERE age IN ($1, $4) AND salary > $3 and salary < $5 OR first_name < $2";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32, Utf8, Float64, Int32, Float64, Utf8] \
-        \n  Projection: person.id, person.age, $6\
-        \n    Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2\
-        \n      TableScan: person";
-
-    let expected_dt = "[Int32, Utf8, Float64, Int32, Float64, Utf8]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32, Utf8, Float64, Int32, Float64, Utf8] 
+      Projection: person.id, person.age, $6
+        Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND person.salary < $5 OR person.first_name < $2
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32, Utf8, Float64, Int32, Float64, Utf8]"#);
 
     ///////////////////
     // replace params with values
@@ -3740,12 +4915,16 @@ fn test_prepare_statement_to_plan_multi_params() {
         ScalarValue::Float64(Some(200.0)),
         ScalarValue::from("xyz"),
     ];
-    let expected_plan =
-            "Projection: person.id, person.age, Utf8(\"xyz\") AS $6\
-        \n  Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > Float64(100) AND person.salary < Float64(200) OR person.first_name < Utf8(\"abc\")\
-        \n    TableScan: person";
 
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r#"
+    Projection: person.id, person.age, Utf8("xyz") AS $6
+      Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > Float64(100) AND person.salary < Float64(200) OR person.first_name < Utf8("abc")
+        TableScan: person
+    "#
+    );
 }
 
 #[test]
@@ -3757,17 +4936,19 @@ fn test_prepare_statement_to_plan_having() {
         GROUP BY id
         HAVING sum(age) < $1 AND sum(age) > 10 OR sum(age) in ($3, $4)\
         ";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int32, Float64, Float64, Float64] \
-        \n  Projection: person.id, sum(person.age)\
-        \n    Filter: sum(person.age) < $1 AND sum(person.age) > Int64(10) OR sum(person.age) IN ([$3, $4])\
-        \n      Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]]\
-        \n        Filter: person.salary > $2\
-        \n          TableScan: person";
-
-    let expected_dt = "[Int32, Float64, Float64, Float64]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int32, Float64, Float64, Float64] 
+      Projection: person.id, sum(person.age)
+        Filter: sum(person.age) < $1 AND sum(person.age) > Int64(10) OR sum(person.age) IN ([$3, $4])
+          Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]]
+            Filter: person.salary > $2
+              TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int32, Float64, Float64, Float64]"#);
 
     ///////////////////
     // replace params with values
@@ -3777,14 +4958,18 @@ fn test_prepare_statement_to_plan_having() {
         ScalarValue::Float64(Some(200.0)),
         ScalarValue::Float64(Some(300.0)),
     ];
-    let expected_plan =
-            "Projection: person.id, sum(person.age)\
-        \n  Filter: sum(person.age) < Int32(10) AND sum(person.age) > Int64(10) OR sum(person.age) IN ([Float64(200), Float64(300)])\
-        \n    Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]]\
-        \n      Filter: person.salary > Float64(100)\
-        \n        TableScan: person";
 
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r#"
+    Projection: person.id, sum(person.age)
+      Filter: sum(person.age) < Int32(10) AND sum(person.age) > Int64(10) OR sum(person.age) IN ([Float64(200), Float64(300)])
+        Aggregate: groupBy=[[person.id]], aggr=[[sum(person.age)]]
+          Filter: person.salary > Float64(100)
+            TableScan: person
+    "#
+    );
 }
 
 #[test]
@@ -3792,22 +4977,29 @@ fn test_prepare_statement_to_plan_limit() {
     let sql = "PREPARE my_plan(BIGINT, BIGINT) AS
         SELECT id FROM person \
         OFFSET $1 LIMIT $2";
-
-    let expected_plan = "Prepare: \"my_plan\" [Int64, Int64] \
-        \n  Limit: skip=$1, fetch=$2\
-        \n    Projection: person.id\
-        \n      TableScan: person";
-
-    let expected_dt = "[Int64, Int64]";
-
-    let plan = prepare_stmt_quick_test(sql, expected_plan, expected_dt);
+    let (plan, dt) = generate_prepare_stmt_and_data_types(sql);
+    assert_snapshot!(
+        plan,
+        @r#"
+    Prepare: "my_plan" [Int64, Int64] 
+      Limit: skip=$1, fetch=$2
+        Projection: person.id
+          TableScan: person
+    "#
+    );
+    assert_snapshot!(dt, @r#"[Int64, Int64]"#);
 
     // replace params with values
     let param_values = vec![ScalarValue::Int64(Some(10)), ScalarValue::Int64(Some(200))];
-    let expected_plan = "Limit: skip=10, fetch=200\
-        \n  Projection: person.id\
-        \n    TableScan: person";
-    prepare_stmt_replace_params_quick_test(plan, param_values, expected_plan);
+    let plan_with_params = plan.with_param_values(param_values).unwrap();
+    assert_snapshot!(
+        plan_with_params,
+        @r#"
+    Limit: skip=10, fetch=200
+      Projection: person.id
+        TableScan: person
+    "#
+    );
 }
 
 #[test]
@@ -3850,12 +5042,16 @@ fn test_inner_join_with_cast_key() {
             FROM person
             INNER JOIN orders
             ON cast(person.id as Int) = cast(orders.customer_id as Int)";
-
-    let expected = "Projection: person.id, person.age\
-            \n  Inner Join:  Filter: CAST(person.id AS Int32) = CAST(orders.customer_id AS Int32)\
-            \n    TableScan: person\
-            \n    TableScan: orders";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age
+  Inner Join:  Filter: CAST(person.id AS Int32) = CAST(orders.customer_id AS Int32)
+    TableScan: person
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
@@ -3865,74 +5061,107 @@ fn test_multi_grouping_sets() {
             GROUP BY
                 person.id,
                 GROUPING SETS ((person.age,person.salary),(person.age))";
-
-    let expected = "Projection: person.id, person.age\
-    \n  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age))]], aggr=[[]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
-
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age
+  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age))]], aggr=[[]]
+    TableScan: person
+"#
+    );
     let sql = "SELECT person.id, person.age
             FROM person
             GROUP BY
                 person.id,
                 GROUPING SETS ((person.age, person.salary),(person.age)),
                 ROLLUP(person.state, person.birth_date)";
-
-    let expected = "Projection: person.id, person.age\
-    \n  Aggregate: groupBy=[[GROUPING SETS (\
-        (person.id, person.age, person.salary), \
-        (person.id, person.age, person.salary, person.state), \
-        (person.id, person.age, person.salary, person.state, person.birth_date), \
-        (person.id, person.age), \
-        (person.id, person.age, person.state), \
-        (person.id, person.age, person.state, person.birth_date))]], aggr=[[]]\
-    \n    TableScan: person";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age
+  Aggregate: groupBy=[[GROUPING SETS ((person.id, person.age, person.salary), (person.id, person.age, person.salary, person.state), (person.id, person.age, person.salary, person.state, person.birth_date), (person.id, person.age), (person.id, person.age, person.state), (person.id, person.age, person.state, person.birth_date))]], aggr=[[]]
+    TableScan: person
+"#
+    );
 }
 
 #[test]
 fn test_field_not_found_window_function() {
     let order_by_sql = "SELECT count() OVER (order by a);";
-    let order_by_err = logical_plan(order_by_sql).expect_err("query should have failed");
-    let expected = "Schema error: No field named a.";
-    assert_eq!(order_by_err.strip_backtrace(), expected);
+    let order_by_err = logical_plan(order_by_sql)
+        .expect_err("query should have failed")
+        .strip_backtrace();
+
+    assert_snapshot!(
+        order_by_err,
+        @r###"
+        Schema error: No field named a.
+        "###
+    );
 
     let partition_by_sql = "SELECT count() OVER (PARTITION BY a);";
-    let partition_by_err =
-        logical_plan(partition_by_sql).expect_err("query should have failed");
-    let expected = "Schema error: No field named a.";
-    assert_eq!(partition_by_err.strip_backtrace(), expected);
+    let partition_by_err = logical_plan(partition_by_sql)
+        .expect_err("query should have failed")
+        .strip_backtrace();
+
+    assert_snapshot!(
+        partition_by_err,
+        @r###"
+        Schema error: No field named a.
+        "###
+    );
 
-    let qualified_sql =
-        "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from orders";
-    let expected = "Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING\n  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]\n    TableScan: orders";
-    quick_test(qualified_sql, expected);
+    let sql = "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from orders";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: orders.order_id, max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  WindowAggr: windowExpr=[[max(orders.qty) PARTITION BY [orders.order_id] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+    TableScan: orders
+"#
+    );
 }
 
 #[test]
 fn test_parse_escaped_string_literal_value() {
     let sql = r"SELECT character_length('\r\n') AS len";
-    let expected = "Projection: character_length(Utf8(\"\\r\\n\")) AS len\
-    \n  EmptyRelation";
-    quick_test(sql, expected);
-
-    let sql = r"SELECT character_length(E'\r\n') AS len";
-    let expected = "Projection: character_length(Utf8(\"\r\n\")) AS len\
-    \n  EmptyRelation";
-    quick_test(sql, expected);
-
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: character_length(Utf8("\r\n")) AS len
+      EmptyRelation
+    "#
+    );
+    let sql = "SELECT character_length(E'\r\n') AS len";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: character_length(Utf8("
+")) AS len
+  EmptyRelation
+"#
+    );
     let sql =
         r"SELECT character_length(E'\445') AS len, E'\x4B' AS hex, E'\u0001' AS unicode";
-    let expected =
-        "Projection: character_length(Utf8(\"%\")) AS len, Utf8(\"\u{004b}\") AS hex, Utf8(\"\u{0001}\") AS unicode\
-    \n  EmptyRelation";
-    quick_test(sql, expected);
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @"Projection: character_length(Utf8(\"%\")) AS len, Utf8(\"K\") AS hex, Utf8(\"\u{1}\") AS unicode\n  EmptyRelation"
+    );
 
     let sql = r"SELECT character_length(E'\000') AS len";
-    assert_eq!(
-        logical_plan(sql).unwrap_err().strip_backtrace(),
-        "SQL error: TokenizerError(\"Unterminated encoded string literal at Line: 1, Column: 25\")"
-    )
+
+    assert_snapshot!(
+        logical_plan(sql).unwrap_err(),
+        @r###"
+        SQL error: TokenizerError("Unterminated encoded string literal at Line: 1, Column: 25")
+        "###
+    );
 }
 
 #[test]
@@ -4048,22 +5277,36 @@ fn test_custom_type_plan() -> Result<()> {
     }
 
     let plan = plan_sql(sql);
-    let expected =
-        "Projection: CAST(Utf8(\"2001-01-01 18:00:00\") AS Timestamp(Nanosecond, None))\
-    \n  EmptyRelation";
-    assert_eq!(plan.to_string(), expected);
+
+    assert_snapshot!(
+        plan,
+        @r###"
+        Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None))
+          EmptyRelation
+        "###
+    );
 
     let plan = plan_sql("SELECT CAST(TIMESTAMP '2001-01-01 18:00:00' AS DATETIME)");
-    let expected = "Projection: CAST(CAST(Utf8(\"2001-01-01 18:00:00\") AS Timestamp(Nanosecond, None)) AS Timestamp(Nanosecond, None))\
-    \n  EmptyRelation";
-    assert_eq!(plan.to_string(), expected);
+
+    assert_snapshot!(
+        plan,
+        @r###"
+        Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)) AS Timestamp(Nanosecond, None))
+          EmptyRelation
+        "###
+    );
 
     let plan = plan_sql(
         "SELECT ARRAY[DATETIME '2001-01-01 18:00:00', DATETIME '2001-01-02 18:00:00']",
     );
-    let expected = "Projection: make_array(CAST(Utf8(\"2001-01-01 18:00:00\") AS Timestamp(Nanosecond, None)), CAST(Utf8(\"2001-01-02 18:00:00\") AS Timestamp(Nanosecond, None)))\
-    \n  EmptyRelation";
-    assert_eq!(plan.to_string(), expected);
+
+    assert_snapshot!(
+        plan,
+        @r###"
+        Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(Nanosecond, None)))
+          EmptyRelation
+        "###
+    );
 
     Ok(())
 }
@@ -4094,7 +5337,7 @@ fn test_error_message_invalid_scalar_function_signature() {
 fn test_error_message_invalid_aggregate_function_signature() {
     error_message_test(
         "select sum()",
-        "Error during planning: 'sum' does not support zero arguments",
+        "Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
     );
     // We keep two different prefixes because they clarify each other.
     // It might be incorrect, and we should consider keeping only one.
@@ -4116,7 +5359,7 @@ fn test_error_message_invalid_window_function_signature() {
 fn test_error_message_invalid_window_aggregate_function_signature() {
     error_message_test(
         "select sum() over()",
-        "Error during planning: 'sum' does not support zero arguments",
+        "Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"",
     );
 }
 
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index 4c7ee6c1bb865..b2e02d7d5dd87 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -42,7 +42,7 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.5.34", features = ["derive", "env"] }
+clap = { version = "4.5.36", features = ["derive", "env"] }
 datafusion = { workspace = true, default-features = true, features = ["avro"] }
 futures = { workspace = true }
 half = { workspace = true, default-features = true }
@@ -55,7 +55,7 @@ postgres-types = { version = "0.2.8", features = ["derive", "with-chrono-0_4"],
 rust_decimal = { version = "1.37.1", features = ["tokio-pg"] }
 # When updating the following dependency verify that sqlite test file regeneration works correctly
 # by running the regenerate_sqlite_files.sh script.
-sqllogictest = "0.28.0"
+sqllogictest = "0.28.1"
 sqlparser = { workspace = true }
 tempfile = { workspace = true }
 testcontainers = { version = "0.23", features = ["default"], optional = true }
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index 5894ec056a2eb..21dfe2ee08f4e 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -175,12 +175,7 @@ async fn run_tests() -> Result<()> {
             futures::stream::iter(match result {
                 // Tokio panic error
                 Err(e) => Some(DataFusionError::External(Box::new(e))),
-                Ok(thread_result) => match thread_result {
-                    // Test run error
-                    Err(e) => Some(e),
-                    // success
-                    Ok(_) => None,
-                },
+                Ok(thread_result) => thread_result.err(),
             })
         })
         .collect()
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 621e212ebc718..1f63e5fcad5c7 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -133,36 +133,50 @@ SELECT approx_distinct(c9) count_c9, approx_distinct(cast(c9 as varchar)) count_
 
 # csv_query_approx_percentile_cont_with_weight
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function: coercion from \[Utf8, Int8, Float64\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont_with_weight(c1, c2, 0.95) FROM aggregate_test_100
+SELECT approx_percentile_cont_with_weight(c2, 0.95) WITHIN GROUP (ORDER BY c1) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function: coercion from \[Int16, Utf8, Float64\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont_with_weight(c3, c1, 0.95) FROM aggregate_test_100
+SELECT approx_percentile_cont_with_weight(c1, 0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont_with_weight' function: coercion from \[Int16, Int8, Utf8\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont_with_weight(c3, c2, c1) FROM aggregate_test_100
+SELECT approx_percentile_cont_with_weight(c2, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 # csv_query_approx_percentile_cont_with_histogram_bins
 statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
-SELECT c1, approx_percentile_cont(c3, 0.95, -1000) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont(0.95, -1000) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from \[Int16, Float64, Utf8\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont(c3, 0.95, c1) FROM aggregate_test_100
+SELECT approx_percentile_cont(0.95, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from \[Int16, Float64, Float64\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont(c3, 0.95, 111.1) FROM aggregate_test_100
+SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from \[Float64, Float64, Float64\] to the signature OneOf(.*) failed(.|\n)*
-SELECT approx_percentile_cont(c12, 0.95, 111.1) FROM aggregate_test_100
+SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
 statement error DataFusion error: This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
-SELECT approx_percentile_cont(c12, c12) FROM aggregate_test_100
+SELECT approx_percentile_cont(c12) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
 statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
-SELECT approx_percentile_cont(c12, 0.95, c5) FROM aggregate_test_100
+SELECT approx_percentile_cont(0.95, c5) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
+
+statement error DataFusion error: This feature is not implemented: Conflicting ordering requirements in aggregate functions is not supported
+SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c5), approx_percentile_cont(0.2) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for approx_percentile_cont
+SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c5) IGNORE NULLS FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for approx_percentile_cont
+SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c5) RESPECT NULLS FROM aggregate_test_100
+
+statement error DataFusion error: This feature is not implemented: Only a single ordering expression is permitted in a WITHIN GROUP clause
+SELECT approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c5, c12) FROM aggregate_test_100
 
 # Not supported over sliding windows
-query error This feature is not implemented: Aggregate can not be used as a sliding accumulator because `retract_batch` is not implemented
-SELECT approx_percentile_cont(c3, 0.5) OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) 
+query error DataFusion error: Error during planning: OVER and WITHIN GROUP clause are can not be used together. OVER is for window function, whereas WITHIN GROUP is for ordered set aggregate function
+SELECT approx_percentile_cont(0.5)
+WITHIN GROUP (ORDER BY c3)
+OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW)
 FROM aggregate_test_100
 
 # array agg can use order by
@@ -1276,173 +1290,173 @@ SELECT approx_distinct(c9) AS a, approx_distinct(c9) AS b FROM aggregate_test_10
 
 #csv_query_approx_percentile_cont (c2)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.1) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 1.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.5) AS DOUBLE) / 3.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 3.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c2, 0.9) AS DOUBLE) / 5.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c2) AS DOUBLE) / 5.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c3)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.1) AS DOUBLE) / -95.3) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / -95.3) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.5) AS DOUBLE) / 15.5) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / 15.5) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c3, 0.9) AS DOUBLE) / 102.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c3) AS DOUBLE) / 102.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c4)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.1) AS DOUBLE) / -22925.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c4) AS DOUBLE) / -22925.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.5) AS DOUBLE) / 4599.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c4) AS DOUBLE) / 4599.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c4, 0.9) AS DOUBLE) / 25334.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c4) AS DOUBLE) / 25334.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c5)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.1) AS DOUBLE) / -1882606710.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c5) AS DOUBLE) / -1882606710.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.5) AS DOUBLE) / 377164262.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c5) AS DOUBLE) / 377164262.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c5, 0.9) AS DOUBLE) / 1991374996.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c5) AS DOUBLE) / 1991374996.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c6)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.1) AS DOUBLE) / -7250000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c6) AS DOUBLE) / -7250000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.5) AS DOUBLE) / 1130000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c6) AS DOUBLE) / 1130000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c6, 0.9) AS DOUBLE) / 7370000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c6) AS DOUBLE) / 7370000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c7)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.1) AS DOUBLE) / 18.9) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c7) AS DOUBLE) / 18.9) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.5) AS DOUBLE) / 134.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c7) AS DOUBLE) / 134.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c7, 0.9) AS DOUBLE) / 231.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c7) AS DOUBLE) / 231.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c8)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.1) AS DOUBLE) / 2671.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c8) AS DOUBLE) / 2671.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.5) AS DOUBLE) / 30634.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c8) AS DOUBLE) / 30634.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c8, 0.9) AS DOUBLE) / 57518.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c8) AS DOUBLE) / 57518.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c9)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.1) AS DOUBLE) / 472608672.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c9) AS DOUBLE) / 472608672.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.5) AS DOUBLE) / 2365817608.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c9) AS DOUBLE) / 2365817608.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c9, 0.9) AS DOUBLE) / 3776538487.0) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c9) AS DOUBLE) / 3776538487.0) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c10)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.1) AS DOUBLE) / 1830000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c10) AS DOUBLE) / 1830000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.5) AS DOUBLE) / 9300000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c10) AS DOUBLE) / 9300000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c10, 0.9) AS DOUBLE) / 16100000000000000000) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c10) AS DOUBLE) / 16100000000000000000) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # csv_query_approx_percentile_cont (c11)
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.1) AS DOUBLE) /  0.109) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.1) WITHIN GROUP (ORDER BY c11) AS DOUBLE) /  0.109) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.5) AS DOUBLE) / 0.491) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c11) AS DOUBLE) / 0.491) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 query B
-SELECT (ABS(1 - CAST(approx_percentile_cont(c11, 0.9) AS DOUBLE) / 0.834) < 0.05) AS q FROM aggregate_test_100
+SELECT (ABS(1 - CAST(approx_percentile_cont(0.9) WITHIN GROUP (ORDER BY c11) AS DOUBLE) / 0.834) < 0.05) AS q FROM aggregate_test_100
 ----
 true
 
 # percentile_cont_with_nulls
 query I
-SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
+SELECT APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v);
 ----
 2
 
 # percentile_cont_with_nulls_only
 query I
-SELECT APPROX_PERCENTILE_CONT(v, 0.5) FROM (VALUES (CAST(NULL as INT))) as t (v);
+SELECT APPROX_PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (CAST(NULL as INT))) as t (v);
 ----
 NULL
 
@@ -1465,7 +1479,7 @@ NaN
 
 # ISSUE: https://github.com/apache/datafusion/issues/11870
 query R
-select APPROX_PERCENTILE_CONT(v2, 0.8) from tmp_percentile_cont;
+select APPROX_PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY v2) from tmp_percentile_cont;
 ----
 NaN
 
@@ -1473,10 +1487,10 @@ NaN
 # Note: `approx_percentile_cont_with_weight()` uses the same implementation as `approx_percentile_cont()`
 query R
 SELECT APPROX_PERCENTILE_CONT_WITH_WEIGHT(
-    v2,
     '+Inf'::Double,
     0.9
 )
+WITHIN GROUP (ORDER BY v2)
 FROM tmp_percentile_cont;
 ----
 NaN
@@ -1495,7 +1509,7 @@ INSERT INTO t1 VALUES (TRUE);
 # ISSUE: https://github.com/apache/datafusion/issues/12716
 # This test verifies that approx_percentile_cont_with_weight does not panic when given 'NaN' and returns 'inf'
 query R
-SELECT approx_percentile_cont_with_weight('NaN'::DOUBLE, 0, 0) FROM t1 WHERE t1.v1;
+SELECT approx_percentile_cont_with_weight(0, 0) WITHIN GROUP (ORDER BY 'NaN'::DOUBLE) FROM t1 WHERE t1.v1;
 ----
 Infinity
 
@@ -1722,7 +1736,7 @@ b NULL NULL 7732.315789473684
 
 # csv_query_approx_percentile_cont_with_weight
 query TI
-SELECT c1, approx_percentile_cont(c3, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 ----
 a 73
 b 68
@@ -1730,9 +1744,18 @@ c 122
 d 124
 e 115
 
+query TI
+SELECT c1, approx_percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a -101
+b -114
+c -109
+d -98
+e -93
+
 # csv_query_approx_percentile_cont_with_weight (2)
 query TI
-SELECT c1, approx_percentile_cont_with_weight(c3, 1, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 ----
 a 73
 b 68
@@ -1740,9 +1763,18 @@ c 122
 d 124
 e 115
 
+query TI
+SELECT c1, approx_percentile_cont_with_weight(1, 0.95) WITHIN GROUP (ORDER BY c3 DESC) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+----
+a -101
+b -114
+c -109
+d -98
+e -93
+
 # csv_query_approx_percentile_cont_with_histogram_bins
 query TI
-SELECT c1, approx_percentile_cont(c3, 0.95, 200) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont(0.95, 200) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 ----
 a 73
 b 68
@@ -1751,7 +1783,7 @@ d 124
 e 115
 
 query TI
-SELECT c1, approx_percentile_cont_with_weight(c3, c2, 0.95) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont_with_weight(c2, 0.95) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 ----
 a 74
 b 68
@@ -3041,7 +3073,7 @@ SELECT COUNT(DISTINCT c1) FROM test
 
 # test_approx_percentile_cont_decimal_support
 query TI
-SELECT c1, approx_percentile_cont(c2, cast(0.85 as decimal(10,2))) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
+SELECT c1, approx_percentile_cont(cast(0.85 as decimal(10,2))) WITHIN GROUP (ORDER BY c2) apc FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 ----
 a 4
 b 5
@@ -4969,6 +5001,73 @@ select count(distinct column1), count(distinct column2) from dict_test group by
 statement ok
 drop table dict_test;
 
+# avg_duration
+
+statement ok
+create table d as values 
+  (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)'), 1),
+  (arrow_cast(11, 'Duration(Second)'), arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)'), 1);
+
+query ????
+SELECT avg(column1), avg(column2), avg(column3), avg(column4) FROM d;
+----
+0 days 0 hours 0 mins 6 secs 0 days 0 hours 0 mins 0.012 secs 0 days 0 hours 0 mins 0.000018 secs 0 days 0 hours 0 mins 0.000000024 secs
+
+query ????I
+SELECT avg(column1), avg(column2), avg(column3), avg(column4), column5 FROM d GROUP BY column5;
+----
+0 days 0 hours 0 mins 6 secs 0 days 0 hours 0 mins 0.012 secs 0 days 0 hours 0 mins 0.000018 secs 0 days 0 hours 0 mins 0.000000024 secs 1
+
+statement ok
+drop table d;
+
+statement ok
+create table d as values
+  (arrow_cast(1, 'Duration(Second)'), arrow_cast(2, 'Duration(Millisecond)'), arrow_cast(3, 'Duration(Microsecond)'), arrow_cast(4, 'Duration(Nanosecond)'), 1),
+  (arrow_cast(11, 'Duration(Second)'), arrow_cast(22, 'Duration(Millisecond)'), arrow_cast(33, 'Duration(Microsecond)'), arrow_cast(44, 'Duration(Nanosecond)'), 1),
+  (arrow_cast(5, 'Duration(Second)'), arrow_cast(10, 'Duration(Millisecond)'), arrow_cast(15, 'Duration(Microsecond)'), arrow_cast(20, 'Duration(Nanosecond)'), 2),
+  (arrow_cast(25, 'Duration(Second)'), arrow_cast(50, 'Duration(Millisecond)'), arrow_cast(75, 'Duration(Microsecond)'), arrow_cast(100, 'Duration(Nanosecond)'), 2),
+  (NULL, NULL, NULL, NULL, 1),
+  (NULL, NULL, NULL, NULL, 2);
+
+
+query I? rowsort
+SELECT column5, avg(column1) FROM d GROUP BY column5;
+----
+1 0 days 0 hours 0 mins 6 secs
+2 0 days 0 hours 0 mins 15 secs
+
+query I?? rowsort
+SELECT column5, column1, avg(column1) OVER (PARTITION BY column5 ORDER BY column1 ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) as window_avg
+FROM d WHERE column1 IS NOT NULL;
+----
+1 0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 1 secs
+1 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 6 secs
+2 0 days 0 hours 0 mins 25 secs 0 days 0 hours 0 mins 15 secs
+2 0 days 0 hours 0 mins 5 secs 0 days 0 hours 0 mins 5 secs
+
+# Cumulative average window function
+query I??
+SELECT column5, column1, avg(column1) OVER (ORDER BY column5, column1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as cumulative_avg
+FROM d WHERE column1 IS NOT NULL;
+----
+1 0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 1 secs
+1 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 6 secs
+2 0 days 0 hours 0 mins 5 secs 0 days 0 hours 0 mins 5 secs
+2 0 days 0 hours 0 mins 25 secs 0 days 0 hours 0 mins 10 secs
+
+# Centered average window function
+query I??
+SELECT column5, column1, avg(column1) OVER (ORDER BY column5 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) as centered_avg 
+FROM d WHERE column1 IS NOT NULL;
+----
+1 0 days 0 hours 0 mins 1 secs 0 days 0 hours 0 mins 6 secs
+1 0 days 0 hours 0 mins 11 secs 0 days 0 hours 0 mins 5 secs
+2 0 days 0 hours 0 mins 5 secs 0 days 0 hours 0 mins 13 secs
+2 0 days 0 hours 0 mins 25 secs 0 days 0 hours 0 mins 15 secs
+
+statement ok
+drop table d;
 
 # Prepare the table with dictionary values for testing
 statement ok
@@ -5005,8 +5104,10 @@ select avg(distinct x_dict) from value_dict;
 ----
 3
 
-query error
+query RR
 select avg(x_dict), avg(distinct x_dict) from value_dict;
+----
+2.625 3
 
 query I
 select min(x_dict) from value_dict;
@@ -5620,6 +5721,11 @@ SELECT STRING_AGG(column1, '|') FROM (values (''), (null), (''));
 ----
 |
 
+query T
+SELECT STRING_AGG(DISTINCT column1, '|') FROM (values (''), (null), (''));
+----
+(empty)
+
 statement ok
 CREATE TABLE strings(g INTEGER, x VARCHAR, y VARCHAR)
 
@@ -5641,6 +5747,22 @@ SELECT STRING_AGG(x,',') FROM strings WHERE g > 100
 ----
 NULL
 
+query T
+SELECT STRING_AGG(DISTINCT x,',') FROM strings WHERE g > 100
+----
+NULL
+
+query T
+SELECT STRING_AGG(DISTINCT x,'|' ORDER BY x) FROM strings
+----
+a|b|i|j|p|x|y|z
+
+query error This feature is not implemented: The second argument of the string_agg function must be a string literal
+SELECT STRING_AGG(DISTINCT x,y) FROM strings
+
+query error Execution error: In an aggregate with DISTINCT, ORDER BY expressions must appear in argument list
+SELECT STRING_AGG(DISTINCT x,'|' ORDER BY y) FROM strings
+
 statement ok
 drop table strings
 
@@ -5655,6 +5777,17 @@ FROM my_data
 ----
 text1, text1, text1
 
+query T
+WITH my_data as (
+SELECT 'text1'::varchar(1000) as my_column union all
+SELECT 'text1'::varchar(1000) as my_column union all
+SELECT 'text1'::varchar(1000) as my_column
+)
+SELECT string_agg(DISTINCT my_column,', ') as my_string_agg
+FROM my_data
+----
+text1
+
 query T
 WITH my_data as (
 SELECT 1 as dummy,  'text1'::varchar(1000) as my_column union all
@@ -5667,6 +5800,18 @@ GROUP BY dummy
 ----
 text1, text1, text1
 
+query T
+WITH my_data as (
+SELECT 1 as dummy,  'text1'::varchar(1000) as my_column union all
+SELECT 1 as dummy,  'text1'::varchar(1000) as my_column union all
+SELECT 1 as dummy,  'text1'::varchar(1000) as my_column
+)
+SELECT string_agg(DISTINCT my_column,', ') as my_string_agg
+FROM my_data
+GROUP BY dummy
+----
+text1
+
 # Tests for aggregating with NaN values
 statement ok
 CREATE TABLE float_table (
@@ -6714,7 +6859,7 @@ group1 0.0003
 # median with all nulls
 statement ok
 create table group_median_all_nulls(
-  a STRING NOT NULL, 
+  a STRING NOT NULL,
   b INT
 ) AS VALUES
 ( 'group0', NULL),
@@ -6729,3 +6874,57 @@ SELECT a, median(b), arrow_typeof(median(b)) FROM group_median_all_nulls GROUP B
 ----
 group0 NULL Int32
 group1 NULL Int32
+
+query I
+with test AS (SELECT i as c1, i + 1 as c2 FROM generate_series(1, 10) t(i))
+select count(*) from test WHERE 1 = 1;
+----
+10
+
+query I
+with test AS (SELECT i as c1, i + 1 as c2 FROM generate_series(1, 10) t(i))
+select count(c1) from test WHERE 1 = 1;
+----
+10
+
+query II rowsort
+with test AS (SELECT i as c1, i + 1 as c2 FROM generate_series(1, 5) t(i))
+select c2, count(*) from test WHERE 1 = 1 group by c2;
+----
+2 1
+3 1
+4 1
+5 1
+6 1
+
+statement ok
+create table t_decimal (c decimal(10, 4)) as values (100.00), (125.00), (175.00), (200.00), (200.00), (300.00), (null), (null);
+
+# Test avg_distinct for Decimal128
+query RT
+select avg(distinct c), arrow_typeof(avg(distinct c)) from t_decimal;
+----
+180 Decimal128(14, 8)
+
+statement ok
+drop table t_decimal;
+
+# Test avg_distinct for Decimal256
+statement ok
+create table t_decimal256 (c decimal(50, 2)) as values
+  (100.00),
+  (125.00),
+  (175.00),
+  (200.00),
+  (200.00),
+  (300.00),
+  (null),
+  (null);
+
+query RT
+select avg(distinct c), arrow_typeof(avg(distinct c)) from t_decimal256;
+----
+180 Decimal256(54, 6)
+
+statement ok
+drop table t_decimal256;
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index cb56686b64373..9772de3db3657 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -2396,6 +2396,11 @@ NULL NULL
 NULL NULL
 NULL NULL
 
+query ?
+select array_sort([struct('foo', 3), struct('foo', 1), struct('bar', 1)])
+----
+[{c0: bar, c1: 1}, {c0: foo, c1: 1}, {c0: foo, c1: 3}]
+
 ## test with argument of incorrect types
 query error DataFusion error: Execution error: the second parameter of array_sort expects DESC or ASC
 select array_sort([1, 3, null, 5, NULL, -5], 1), array_sort([1, 3, null, 5, NULL, -5], 'DESC', 1), array_sort([1, 3, null, 5, NULL, -5], 1, 1);
@@ -5987,7 +5992,7 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
+05)--------Projection: 
 06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
@@ -6016,7 +6021,7 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
+05)--------Projection: 
 06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
@@ -6045,7 +6050,7 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
+05)--------Projection: 
 06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
@@ -6076,7 +6081,7 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
+05)--------Projection: 
 06)----------Filter: array_has(LargeList([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, c]), substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)))
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
@@ -6105,7 +6110,7 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
+05)--------Projection: 
 06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
@@ -6125,7 +6130,8 @@ select count(*) from test WHERE array_has([needle], needle);
 ----
 100000
 
-# TODO: this should probably be possible to completely remove the filter as always true?
+# The optimizer does not currently eliminate the filter;
+# Instead, it's rewritten as `IS NULL OR NOT NULL` due to SQL null semantics
 query TT
 explain with test AS (SELECT substr(md5(i::text)::text, 1, 32) as needle FROM generate_series(1, 100000) t(i))
 select count(*) from test WHERE array_has([needle], needle);
@@ -6135,10 +6141,9 @@ logical_plan
 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
-05)--------Projection:
-06)----------Filter: __common_expr_3 = __common_expr_3
-07)------------Projection: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) AS __common_expr_3
-08)--------------TableScan: tmp_table projection=[value]
+05)--------Projection: 
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL)
+07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
 02)--AggregateExec: mode=Final, gby=[], aggr=[count(Int64(1))]
@@ -6146,10 +6151,9 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: __common_expr_3@0 = __common_expr_3@0
-08)--------------ProjectionExec: expr=[substr(md5(CAST(value@0 AS Utf8)), 1, 32) as __common_expr_3]
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IS NOT NULL OR NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
 # any operator
 query ?
@@ -7281,12 +7285,10 @@ select array_concat(column1, [7]) from arrays_values_v2;
 
 # flatten
 
-#TODO: https://github.com/apache/datafusion/issues/7142
-# follow DuckDB
-#query ?
-#select flatten(NULL);
-#----
-#NULL
+query ?
+select flatten(NULL);
+----
+NULL
 
 # flatten with scalar values #1
 query ???
@@ -7294,21 +7296,21 @@ select flatten(make_array(1, 2, 1, 3, 2)),
        flatten(make_array([1], [2, 3], [null], make_array(4, null, 5))),
        flatten(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]));
 ----
-[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4]
+[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]]
 
 query ???
 select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')),
        flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')),
        flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))'));
 ----
-[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4]
+[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]]
 
 query ???
 select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')),
        flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')),
        flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))'));
 ----
-[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [1.1, 2.2, 3.3, 4.4]
+[1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]]
 
 # flatten with column values
 query ????
@@ -7318,8 +7320,8 @@ select flatten(column1),
        flatten(column4)
 from flatten_table;
 ----
-[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
-[1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
+[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
 
 query ????
 select flatten(column1),
@@ -7328,8 +7330,8 @@ select flatten(column1),
        flatten(column4)
 from large_flatten_table;
 ----
-[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
-[1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
+[1, 2, 3, 4, 5, 6] [[8]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
 
 query ????
 select flatten(column1),
@@ -7338,8 +7340,19 @@ select flatten(column1),
        flatten(column4)
 from fixed_size_flatten_table;
 ----
-[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
-[1, 2, 3, 4, 5, 6] [8, 9, 10, 11, 12, 13] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+[1, 2, 3] [[1, 2, 3], [4, 5], [6]] [[[1]], [[2, 3]]] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
+[1, 2, 3, 4, 5, 6] [[8], [9, 10], [11, 12, 13]] [[[1, 2]], [[3]]] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+
+# flatten with different inner list type
+query ??????
+select flatten(arrow_cast(make_array([1, 2], [3, 4]), 'List(FixedSizeList(2, Int64))')),
+       flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'List(FixedSizeList(1, List(Int64)))')),
+       flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(List(Int64))')),
+       flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(List(List(Int64)))')),
+       flatten(arrow_cast(make_array([1, 2], [3, 4]), 'LargeList(FixedSizeList(2, Int64))')),
+       flatten(arrow_cast(make_array([[1, 2]], [[3, 4]]), 'LargeList(FixedSizeList(1, List(Int64)))'))
+----
+[1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]] [1, 2, 3, 4] [[1, 2], [3, 4]]
 
 ## empty (aliases: `array_empty`, `list_empty`)
 # empty scalar function #1
diff --git a/datafusion/sqllogictest/test_files/binary.slt b/datafusion/sqllogictest/test_files/binary.slt
index 5c5f9d510e554..5ac13779acd74 100644
--- a/datafusion/sqllogictest/test_files/binary.slt
+++ b/datafusion/sqllogictest/test_files/binary.slt
@@ -147,8 +147,36 @@ query error DataFusion error: Error during planning: Cannot infer common argumen
 SELECT column1, column1 = arrow_cast(X'0102', 'FixedSizeBinary(2)') FROM t
 
 # Comparison to different sized Binary
-query error DataFusion error: Error during planning: Cannot infer common argument type for comparison operation FixedSizeBinary\(3\) = Binary
+query ?B
 SELECT column1, column1 = X'0102' FROM t
+----
+000102 false
+003102 false
+NULL NULL
+ff0102 false
+000102 false
+
+query ?B
+SELECT column1, column1 = X'000102' FROM t
+----
+000102 true
+003102 false
+NULL NULL
+ff0102 false
+000102 true
+
+# Plan should not have a cast of the column (should have casted the literal
+# to FixedSizeBinary as that is much faster)
+
+query TT
+explain SELECT column1, column1 = X'000102' FROM t
+----
+logical_plan
+01)Projection: t.column1, t.column1 = FixedSizeBinary(3, "0,1,2") AS t.column1 = Binary("0,1,2")
+02)--TableScan: t projection=[column1]
+physical_plan
+01)ProjectionExec: expr=[column1@0 as column1, column1@0 = 000102 as t.column1 = Binary("0,1,2")]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table t_source
diff --git a/datafusion/sqllogictest/test_files/clickbench.slt b/datafusion/sqllogictest/test_files/clickbench.slt
index dfcd924758574..4c60a4365ee26 100644
--- a/datafusion/sqllogictest/test_files/clickbench.slt
+++ b/datafusion/sqllogictest/test_files/clickbench.slt
@@ -64,10 +64,10 @@ SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
 ----
 1
 
-query DD
-SELECT MIN("EventDate"::INT::DATE), MAX("EventDate"::INT::DATE) FROM hits;
+query II
+SELECT MIN("EventDate"), MAX("EventDate") FROM hits;
 ----
-2013-07-15 2013-07-15
+15901 15901
 
 query II
 SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
@@ -168,11 +168,11 @@ SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "
 ----
 
 query IITIIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIII
-SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
+SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10;
 ----
 
 query T
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10;
 ----
 
 query T
@@ -180,7 +180,7 @@ SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhras
 ----
 
 query T
-SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10;
 ----
 
 query IRI
@@ -247,31 +247,31 @@ SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c
 1615432634 1615432633 1615432632 1615432631 1
 
 query TI
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
 ----
 
 query TI
-SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
+SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
 ----
 
 query TI
-SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
 ----
 
 query IIITTI
-SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
 ----
 
-query IDI
-SELECT "URLHash", "EventDate"::INT::DATE, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate"::INT::DATE ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+query III
+SELECT "URLHash", "EventDate", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
 ----
 
 query III
-SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
 ----
 
 query PI
-SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
+SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
 ----
 
 # Clickbench "Extended" queries that test count distinct
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 925f96bd4ac0c..5eeb05e814ace 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -637,7 +637,7 @@ query error DataFusion error: SQL error: ParserError\("Expected: \), found: EOF"
 COPY (select col2, sum(col1) from source_table
 
 # Copy from table with non literal
-query error DataFusion error: SQL error: ParserError\("Unexpected token \("\)
+query error DataFusion error: SQL error: ParserError\("Expected: end of statement or ;, found: \( at Line: 1, Column: 44"\)
 COPY source_table  to '/tmp/table.parquet' (row_group_size 55 + 102);
 
 # Copy using execution.keep_partition_by_columns with an invalid value
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
index bb66aef2514c9..03cb5edb5fcce 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -77,7 +77,7 @@ statement error DataFusion error: SQL error: ParserError\("Expected: HEADER, fou
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH LOCATION 'foo.csv';
 
 # Unrecognized random clause
-statement error DataFusion error: SQL error: ParserError\("Unexpected token FOOBAR"\)
+statement error DataFusion error: SQL error: ParserError\("Expected: end of statement or ;, found: FOOBAR at Line: 1, Column: 47"\)
 CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV FOOBAR BARBAR BARFOO LOCATION 'foo.csv';
 
 # Missing partition column
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index e019af9775a42..32320a06f4fb0 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -722,7 +722,7 @@ logical_plan
 03)----Projection: Int64(1) AS val
 04)------EmptyRelation
 05)----Projection: Int64(2) AS val
-06)------Cross Join:
+06)------Cross Join: 
 07)--------Filter: recursive_cte.val < Int64(2)
 08)----------TableScan: recursive_cte
 09)--------SubqueryAlias: sub_cte
diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt
index 4425eee333735..148f0dfe64bb7 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -183,7 +183,7 @@ query error input contains invalid characters
 SELECT to_date('2020-09-08 12/00/00+00:00', '%c', '%+')
 
 # to_date with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_date('2020-09-08 12/00/00+00:00', '%q')
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index 778b3537d1bff..d241e61f33ffd 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -450,3 +450,10 @@ query I
 select dense_rank() over (order by arrow_cast('abc', 'Dictionary(UInt16, Utf8)'));
 ----
 1
+
+# Test dictionary encoded column to partition column casting
+statement ok
+CREATE TABLE test0 AS VALUES ('foo',1), ('bar',2), ('foo',3);
+
+statement ok
+COPY (SELECT arrow_cast(column1, 'Dictionary(Int32, Utf8)') AS column1, column2 FROM test0) TO 'test_files/scratch/copy/part_dict_test' STORED AS PARQUET PARTITIONED BY (column1);
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index deff793e51106..ba2596551f1d5 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -237,6 +237,7 @@ physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
 physical_plan after OutputRequirements DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
+physical_plan after PushdownFilter SAME TEXT AS ABOVE
 physical_plan after LimitPushdown SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
@@ -313,6 +314,7 @@ physical_plan after OutputRequirements
 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
+physical_plan after PushdownFilter SAME TEXT AS ABOVE
 physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
@@ -353,6 +355,7 @@ physical_plan after OutputRequirements
 01)GlobalLimitExec: skip=0, fetch=10
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
+physical_plan after PushdownFilter SAME TEXT AS ABOVE
 physical_plan after LimitPushdown DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after SanityCheckPlan SAME TEXT AS ABOVE
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt
index 7a0e322eb8bcd..15bf615765713 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -180,8 +180,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -218,8 +218,8 @@ physical_plan
 18)┌─────────────┴─────────────┐
 19)│      RepartitionExec      │
 20)│    --------------------   │
-21)│  output_partition_count:  │
-22)│             4             │
+21)│ partition_count(in->out): │
+22)│           4 -> 4          │
 23)│                           │
 24)│    partitioning_scheme:   │
 25)│  Hash([string_col@0], 4)  │
@@ -236,8 +236,8 @@ physical_plan
 36)┌─────────────┴─────────────┐
 37)│      RepartitionExec      │
 38)│    --------------------   │
-39)│  output_partition_count:  │
-40)│             1             │
+39)│ partition_count(in->out): │
+40)│           1 -> 4          │
 41)│                           │
 42)│    partitioning_scheme:   │
 43)│     RoundRobinBatch(4)    │
@@ -311,8 +311,8 @@ physical_plan
 19)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 20)│      RepartitionExec      ││      RepartitionExec      │
 21)│    --------------------   ││    --------------------   │
-22)│  output_partition_count:  ││  output_partition_count:  │
-23)│             4             ││             4             │
+22)│ partition_count(in->out): ││ partition_count(in->out): │
+23)│           4 -> 4          ││           4 -> 4          │
 24)│                           ││                           │
 25)│    partitioning_scheme:   ││    partitioning_scheme:   │
 26)│    Hash([int_col@0], 4)   ││    Hash([int_col@0], 4)   │
@@ -320,8 +320,8 @@ physical_plan
 28)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 29)│      RepartitionExec      ││      RepartitionExec      │
 30)│    --------------------   ││    --------------------   │
-31)│  output_partition_count:  ││  output_partition_count:  │
-32)│             1             ││             1             │
+31)│ partition_count(in->out): ││ partition_count(in->out): │
+32)│           1 -> 4          ││           1 -> 4          │
 33)│                           ││                           │
 34)│    partitioning_scheme:   ││    partitioning_scheme:   │
 35)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
@@ -386,8 +386,8 @@ physical_plan
 40)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 41)-----------------------------│      RepartitionExec      ││      RepartitionExec      │
 42)-----------------------------│    --------------------   ││    --------------------   │
-43)-----------------------------│  output_partition_count:  ││  output_partition_count:  │
-44)-----------------------------│             4             ││             4             │
+43)-----------------------------│ partition_count(in->out): ││ partition_count(in->out): │
+44)-----------------------------│           4 -> 4          ││           4 -> 4          │
 45)-----------------------------│                           ││                           │
 46)-----------------------------│    partitioning_scheme:   ││    partitioning_scheme:   │
 47)-----------------------------│    Hash([int_col@0], 4)   ││    Hash([int_col@0], 4)   │
@@ -395,8 +395,8 @@ physical_plan
 49)-----------------------------┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 50)-----------------------------│      RepartitionExec      ││      RepartitionExec      │
 51)-----------------------------│    --------------------   ││    --------------------   │
-52)-----------------------------│  output_partition_count:  ││  output_partition_count:  │
-53)-----------------------------│             1             ││             1             │
+52)-----------------------------│ partition_count(in->out): ││ partition_count(in->out): │
+53)-----------------------------│           1 -> 4          ││           1 -> 4          │
 54)-----------------------------│                           ││                           │
 55)-----------------------------│    partitioning_scheme:   ││    partitioning_scheme:   │
 56)-----------------------------│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
@@ -434,8 +434,8 @@ physical_plan
 17)┌─────────────┴─────────────┐
 18)│      RepartitionExec      │
 19)│    --------------------   │
-20)│  output_partition_count:  │
-21)│             1             │
+20)│ partition_count(in->out): │
+21)│           1 -> 4          │
 22)│                           │
 23)│    partitioning_scheme:   │
 24)│     RoundRobinBatch(4)    │
@@ -496,8 +496,8 @@ physical_plan
 41)┌─────────────┴─────────────┐
 42)│      RepartitionExec      │
 43)│    --------------------   │
-44)│  output_partition_count:  │
-45)│             1             │
+44)│ partition_count(in->out): │
+45)│           1 -> 4          │
 46)│                           │
 47)│    partitioning_scheme:   │
 48)│     RoundRobinBatch(4)    │
@@ -530,8 +530,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -566,8 +566,8 @@ physical_plan
 15)┌─────────────┴─────────────┐
 16)│      RepartitionExec      │
 17)│    --------------------   │
-18)│  output_partition_count:  │
-19)│             1             │
+18)│ partition_count(in->out): │
+19)│           1 -> 4          │
 20)│                           │
 21)│    partitioning_scheme:   │
 22)│     RoundRobinBatch(4)    │
@@ -599,8 +599,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -633,8 +633,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -694,8 +694,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -727,8 +727,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -889,7 +889,7 @@ explain SELECT * FROM table1 ORDER BY string_col LIMIT 1;
 ----
 physical_plan
 01)┌───────────────────────────┐
-02)│          SortExec         │
+02)│       SortExec(TopK)      │
 03)│    --------------------   │
 04)│          limit: 1         │
 05)│                           │
@@ -922,8 +922,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -1031,8 +1031,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -1089,8 +1089,8 @@ physical_plan
 12)┌─────────────┴─────────────┐
 13)│      RepartitionExec      │
 14)│    --------------------   │
-15)│  output_partition_count:  │
-16)│             1             │
+15)│ partition_count(in->out): │
+16)│           1 -> 4          │
 17)│                           │
 18)│    partitioning_scheme:   │
 19)│     RoundRobinBatch(4)    │
@@ -1123,8 +1123,8 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│      RepartitionExec      │
 15)│    --------------------   │
-16)│  output_partition_count:  │
-17)│             1             │
+16)│ partition_count(in->out): │
+17)│           1 -> 4          │
 18)│                           │
 19)│    partitioning_scheme:   │
 20)│     RoundRobinBatch(4)    │
@@ -1209,8 +1209,8 @@ physical_plan
 22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 23)│      RepartitionExec      ││      RepartitionExec      │
 24)│    --------------------   ││    --------------------   │
-25)│  output_partition_count:  ││  output_partition_count:  │
-26)│             4             ││             4             │
+25)│ partition_count(in->out): ││ partition_count(in->out): │
+26)│           4 -> 4          ││           4 -> 4          │
 27)│                           ││                           │
 28)│    partitioning_scheme:   ││    partitioning_scheme:   │
 29)│   Hash([int_col@0, CAST   ││      Hash([int_col@0,     │
@@ -1220,8 +1220,8 @@ physical_plan
 33)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 34)│       ProjectionExec      ││      RepartitionExec      │
 35)│    --------------------   ││    --------------------   │
-36)│ CAST(table1.string_col AS ││  output_partition_count:  │
-37)│         Utf8View):        ││             1             │
+36)│ CAST(table1.string_col AS ││ partition_count(in->out): │
+37)│         Utf8View):        ││           1 -> 4          │
 38)│     CAST(string_col AS    ││                           │
 39)│          Utf8View)        ││    partitioning_scheme:   │
 40)│                           ││     RoundRobinBatch(4)    │
@@ -1237,8 +1237,8 @@ physical_plan
 50)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 51)│      RepartitionExec      ││       DataSourceExec      │
 52)│    --------------------   ││    --------------------   │
-53)│  output_partition_count:  ││          files: 1         │
-54)│             1             ││      format: parquet      │
+53)│ partition_count(in->out): ││          files: 1         │
+54)│           1 -> 4          ││      format: parquet      │
 55)│                           ││                           │
 56)│    partitioning_scheme:   ││                           │
 57)│     RoundRobinBatch(4)    ││                           │
@@ -1281,8 +1281,8 @@ physical_plan
 24)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 25)│      RepartitionExec      ││      RepartitionExec      │
 26)│    --------------------   ││    --------------------   │
-27)│  output_partition_count:  ││  output_partition_count:  │
-28)│             4             ││             4             │
+27)│ partition_count(in->out): ││ partition_count(in->out): │
+28)│           4 -> 4          ││           4 -> 4          │
 29)│                           ││                           │
 30)│    partitioning_scheme:   ││    partitioning_scheme:   │
 31)│   Hash([int_col@0, CAST   ││      Hash([int_col@0,     │
@@ -1292,8 +1292,8 @@ physical_plan
 35)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 36)│       ProjectionExec      ││      RepartitionExec      │
 37)│    --------------------   ││    --------------------   │
-38)│ CAST(table1.string_col AS ││  output_partition_count:  │
-39)│         Utf8View):        ││             1             │
+38)│ CAST(table1.string_col AS ││ partition_count(in->out): │
+39)│         Utf8View):        ││           1 -> 4          │
 40)│     CAST(string_col AS    ││                           │
 41)│          Utf8View)        ││    partitioning_scheme:   │
 42)│                           ││     RoundRobinBatch(4)    │
@@ -1309,8 +1309,8 @@ physical_plan
 52)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 53)│      RepartitionExec      ││       DataSourceExec      │
 54)│    --------------------   ││    --------------------   │
-55)│  output_partition_count:  ││          files: 1         │
-56)│             1             ││      format: parquet      │
+55)│ partition_count(in->out): ││          files: 1         │
+56)│           1 -> 4          ││      format: parquet      │
 57)│                           ││                           │
 58)│    partitioning_scheme:   ││                           │
 59)│     RoundRobinBatch(4)    ││                           │
@@ -1356,8 +1356,8 @@ physical_plan
 27)-----------------------------┌─────────────┴─────────────┐
 28)-----------------------------│      RepartitionExec      │
 29)-----------------------------│    --------------------   │
-30)-----------------------------│  output_partition_count:  │
-31)-----------------------------│             1             │
+30)-----------------------------│ partition_count(in->out): │
+31)-----------------------------│           1 -> 4          │
 32)-----------------------------│                           │
 33)-----------------------------│    partitioning_scheme:   │
 34)-----------------------------│     RoundRobinBatch(4)    │
@@ -1380,8 +1380,8 @@ physical_plan
 04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 05)│       DataSourceExec      ││      RepartitionExec      │
 06)│    --------------------   ││    --------------------   │
-07)│          files: 1         ││  output_partition_count:  │
-08)│        format: csv        ││             1             │
+07)│          files: 1         ││ partition_count(in->out): │
+08)│        format: csv        ││           1 -> 4          │
 09)│                           ││                           │
 10)│                           ││    partitioning_scheme:   │
 11)│                           ││     RoundRobinBatch(4)    │
@@ -1505,8 +1505,8 @@ physical_plan
 33)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 34)│      RepartitionExec      ││      RepartitionExec      │
 35)│    --------------------   ││    --------------------   │
-36)│  output_partition_count:  ││  output_partition_count:  │
-37)│             4             ││             4             │
+36)│ partition_count(in->out): ││ partition_count(in->out): │
+37)│           4 -> 4          ││           4 -> 4          │
 38)│                           ││                           │
 39)│    partitioning_scheme:   ││    partitioning_scheme:   │
 40)│     Hash([name@0], 4)     ││     Hash([name@0], 4)     │
@@ -1514,8 +1514,8 @@ physical_plan
 42)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 43)│      RepartitionExec      ││      RepartitionExec      │
 44)│    --------------------   ││    --------------------   │
-45)│  output_partition_count:  ││  output_partition_count:  │
-46)│             1             ││             1             │
+45)│ partition_count(in->out): ││ partition_count(in->out): │
+46)│           1 -> 4          ││           1 -> 4          │
 47)│                           ││                           │
 48)│    partitioning_scheme:   ││    partitioning_scheme:   │
 49)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
@@ -1606,8 +1606,8 @@ physical_plan
 18)┌─────────────┴─────────────┐
 19)│      RepartitionExec      │
 20)│    --------------------   │
-21)│  output_partition_count:  │
-22)│             1             │
+21)│ partition_count(in->out): │
+22)│           1 -> 4          │
 23)│                           │
 24)│    partitioning_scheme:   │
 25)│     RoundRobinBatch(4)    │
@@ -1648,8 +1648,8 @@ physical_plan
 19)┌─────────────┴─────────────┐
 20)│      RepartitionExec      │
 21)│    --------------------   │
-22)│  output_partition_count:  │
-23)│             1             │
+22)│ partition_count(in->out): │
+23)│           1 -> 4          │
 24)│                           │
 25)│    partitioning_scheme:   │
 26)│     RoundRobinBatch(4)    │
@@ -1689,8 +1689,8 @@ physical_plan
 19)┌─────────────┴─────────────┐
 20)│      RepartitionExec      │
 21)│    --------------------   │
-22)│  output_partition_count:  │
-23)│             1             │
+22)│ partition_count(in->out): │
+23)│           1 -> 4          │
 24)│                           │
 25)│    partitioning_scheme:   │
 26)│     RoundRobinBatch(4)    │
@@ -1728,8 +1728,8 @@ physical_plan
 17)┌─────────────┴─────────────┐
 18)│      RepartitionExec      │
 19)│    --------------------   │
-20)│  output_partition_count:  │
-21)│             1             │
+20)│ partition_count(in->out): │
+21)│           1 -> 4          │
 22)│                           │
 23)│    partitioning_scheme:   │
 24)│     RoundRobinBatch(4)    │
@@ -1771,8 +1771,8 @@ physical_plan
 20)┌─────────────┴─────────────┐
 21)│      RepartitionExec      │
 22)│    --------------------   │
-23)│  output_partition_count:  │
-24)│             1             │
+23)│ partition_count(in->out): │
+24)│           1 -> 4          │
 25)│                           │
 26)│    partitioning_scheme:   │
 27)│     RoundRobinBatch(4)    │
@@ -1815,8 +1815,8 @@ physical_plan
 19)┌─────────────┴─────────────┐
 20)│      RepartitionExec      │
 21)│    --------------------   │
-22)│  output_partition_count:  │
-23)│             1             │
+22)│ partition_count(in->out): │
+23)│           1 -> 4          │
 24)│                           │
 25)│    partitioning_scheme:   │
 26)│     RoundRobinBatch(4)    │
@@ -1869,8 +1869,8 @@ physical_plan
 25)-----------------------------┌─────────────┴─────────────┐
 26)-----------------------------│      RepartitionExec      │
 27)-----------------------------│    --------------------   │
-28)-----------------------------│  output_partition_count:  │
-29)-----------------------------│             1             │
+28)-----------------------------│ partition_count(in->out): │
+29)-----------------------------│           1 -> 4          │
 30)-----------------------------│                           │
 31)-----------------------------│    partitioning_scheme:   │
 32)-----------------------------│     RoundRobinBatch(4)    │
@@ -1983,8 +1983,8 @@ physical_plan
 22)┌─────────────┴─────────────┐
 23)│      RepartitionExec      │
 24)│    --------------------   │
-25)│  output_partition_count:  │
-26)│             1             │
+25)│ partition_count(in->out): │
+26)│           1 -> 4          │
 27)│                           │
 28)│    partitioning_scheme:   │
 29)│     RoundRobinBatch(4)    │
@@ -2062,8 +2062,8 @@ physical_plan
 19)┌─────────────┴─────────────┐
 20)│      RepartitionExec      │
 21)│    --------------------   │
-22)│  output_partition_count:  │
-23)│             1             │
+22)│ partition_count(in->out): │
+23)│           1 -> 4          │
 24)│                           │
 25)│    partitioning_scheme:   │
 26)│     RoundRobinBatch(4)    │
diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/expr/date_part.slt
index dec796aa59cb5..39c42cbe1e97f 100644
--- a/datafusion/sqllogictest/test_files/expr/date_part.slt
+++ b/datafusion/sqllogictest/test_files/expr/date_part.slt
@@ -884,7 +884,7 @@ SELECT extract(day from arrow_cast('14400 minutes', 'Interval(DayTime)'))
 query I
 SELECT extract(minute from arrow_cast('14400 minutes', 'Interval(DayTime)'))
 ----
-14400
+0
 
 query I
 SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
@@ -894,7 +894,7 @@ SELECT extract(second from arrow_cast('5.1 seconds', 'Interval(DayTime)'))
 query I
 SELECT extract(second from arrow_cast('14400 minutes', 'Interval(DayTime)'))
 ----
-864000
+0
 
 query I
 SELECT extract(second from arrow_cast('2 months', 'Interval(MonthDayNano)'))
@@ -954,7 +954,7 @@ from t
 order by id;
 ----
 0 0 5
-1 0 15
+1 0 3
 2 0 0
 3 2 0
 4 0 8
diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt
index de1dbf74c29bf..20f79622a62c6 100644
--- a/datafusion/sqllogictest/test_files/functions.slt
+++ b/datafusion/sqllogictest/test_files/functions.slt
@@ -858,7 +858,7 @@ SELECT greatest(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0))
 123456789
 
 
-query error 'greatest' does not support zero argument
+query error Function 'greatest' user-defined coercion failed with "Error during planning: greatest was called without any arguments. It requires at least 1."
 SELECT greatest()
 
 query I
@@ -1056,7 +1056,7 @@ SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0))
 -1
 
 
-query error 'least' does not support zero arguments
+query error Function 'least' user-defined coercion failed with "Error during planning: least was called without any arguments. It requires at least 1."
 SELECT least()
 
 query I
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 4c4999a364d12..9e67018ecd0b9 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2232,7 +2232,7 @@ physical_plan
 03)----StreamingTableExec: partition_sizes=1, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
 query III
-SELECT a, b, LAST_VALUE(c) as last_c
+SELECT a, b, LAST_VALUE(c order by c) as last_c
   FROM annotated_data_infinite2
   GROUP BY a, b
 ----
@@ -2706,6 +2706,29 @@ select k, first_value(val order by o) respect NULLS from first_null group by k;
 1 1
 
 
+statement ok
+CREATE TABLE last_null (
+          k INT,
+          val INT,
+          o int
+        ) as VALUES
+          (0, NULL, 9),
+          (0, 1, 1),
+          (1, 1, 1);
+
+query II rowsort
+select k, last_value(val order by o) IGNORE NULLS from last_null group by k;
+----
+0 1
+1 1
+
+query II rowsort
+select k, last_value(val order by o) respect NULLS from last_null group by k;
+----
+0 NULL
+1 1
+
+
 query TT
 EXPLAIN SELECT country, ARRAY_AGG(amount ORDER BY amount DESC) AS amounts,
   FIRST_VALUE(amount ORDER BY amount ASC) AS fv1,
@@ -3775,7 +3798,7 @@ ORDER BY x;
 2 2
 
 query II
-SELECT y, LAST_VALUE(x)
+SELECT y, LAST_VALUE(x order by x desc)
 FROM FOO
 GROUP BY y
 ORDER BY y;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 496f24abf6ed7..87abaadb516f3 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -149,6 +149,39 @@ drop table t
 statement ok
 drop table t2
 
+
+############
+## 0 to represent the default value (target_partitions and planning_concurrency)
+###########
+
+statement ok
+SET datafusion.execution.target_partitions = 3;
+
+statement ok
+SET datafusion.execution.planning_concurrency = 3;
+
+# when setting target_partitions and planning_concurrency to 3, their values will be 3
+query TB rowsort
+SELECT name, value = 3 FROM information_schema.df_settings WHERE name IN ('datafusion.execution.target_partitions', 'datafusion.execution.planning_concurrency');
+----
+datafusion.execution.planning_concurrency true
+datafusion.execution.target_partitions true
+
+statement ok
+SET datafusion.execution.target_partitions = 0;
+
+statement ok
+SET datafusion.execution.planning_concurrency = 0;
+
+# when setting target_partitions and planning_concurrency to 0, their values will be equal to the
+# default values, which are different from 0 (which is invalid)
+query TB rowsort
+SELECT name, value = 0 FROM information_schema.df_settings WHERE name IN ('datafusion.execution.target_partitions', 'datafusion.execution.planning_concurrency');
+----
+datafusion.execution.planning_concurrency false
+datafusion.execution.target_partitions false
+
+
 ############
 ## SHOW VARIABLES should work
 ###########
@@ -197,6 +230,7 @@ datafusion.execution.parquet.bloom_filter_fpp NULL
 datafusion.execution.parquet.bloom_filter_ndv NULL
 datafusion.execution.parquet.bloom_filter_on_read true
 datafusion.execution.parquet.bloom_filter_on_write false
+datafusion.execution.parquet.coerce_int96 NULL
 datafusion.execution.parquet.column_index_truncate_length 64
 datafusion.execution.parquet.compression zstd(3)
 datafusion.execution.parquet.created_by datafusion
@@ -296,6 +330,7 @@ datafusion.execution.parquet.bloom_filter_fpp NULL (writing) Sets bloom filter f
 datafusion.execution.parquet.bloom_filter_ndv NULL (writing) Sets bloom filter number of distinct values. If NULL, uses default parquet writer setting
 datafusion.execution.parquet.bloom_filter_on_read true (writing) Use any available bloom filters when reading parquet files
 datafusion.execution.parquet.bloom_filter_on_write false (writing) Write bloom filters for all columns when creating parquet files
+datafusion.execution.parquet.coerce_int96 NULL (reading) If true, parquet reader will read columns of physical type int96 as originating from a different resolution than nanosecond. This is useful for reading data from systems like Spark which stores microsecond resolution timestamps in an int96 allowing it to write values with a larger date range than 64-bit timestamps with nanosecond resolution.
 datafusion.execution.parquet.column_index_truncate_length 64 (writing) Sets column index truncate length
 datafusion.execution.parquet.compression zstd(3) (writing) Sets default parquet compression codec. Valid values are: uncompressed, snappy, gzip(level), lzo, brotli(level), lz4, zstd(level), and lz4_raw. These values are not case sensitive. If NULL, uses default parquet writer setting Note that this default setting is not the same as the default parquet writer setting.
 datafusion.execution.parquet.created_by datafusion (writing) Sets "created by" property
@@ -649,7 +684,7 @@ datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestam
 datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Second, None) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
 datafusion public date_trunc datafusion public date_trunc FUNCTION true Timestamp(Second, Some("+TZ")) SCALAR Truncates a timestamp value to a specified precision. date_trunc(precision, expression)
 datafusion public rank datafusion public rank FUNCTION true NULL WINDOW Returns the rank of the current row within its partition, allowing gaps between ranks. This function provides a ranking similar to `row_number`, but skips ranks for identical values. rank()
-datafusion public string_agg datafusion public string_agg FUNCTION true LargeUtf8 AGGREGATE Concatenates the values of string expressions and places separator values between them. string_agg(expression, delimiter)
+datafusion public string_agg datafusion public string_agg FUNCTION true LargeUtf8 AGGREGATE Concatenates the values of string expressions and places separator values between them. If ordering is required, strings are concatenated in the specified order. This aggregation function can only mix DISTINCT and ORDER BY if the ordering expression is exactly the same as the first argument expression. string_agg([DISTINCT] expression, delimiter [ORDER BY expression])
 
 query B
 select is_deterministic from information_schema.routines where routine_name = 'now';
@@ -717,6 +752,15 @@ datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 1
 datafusion public string_agg 1 IN expression LargeUtf8 NULL false 2
 datafusion public string_agg 2 IN delimiter Null NULL false 2
 datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 2
+datafusion public string_agg 1 IN expression Utf8 NULL false 3
+datafusion public string_agg 2 IN delimiter Utf8 NULL false 3
+datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 3
+datafusion public string_agg 1 IN expression Utf8 NULL false 4
+datafusion public string_agg 2 IN delimiter LargeUtf8 NULL false 4
+datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 4
+datafusion public string_agg 1 IN expression Utf8 NULL false 5
+datafusion public string_agg 2 IN delimiter Null NULL false 5
+datafusion public string_agg 1 OUT NULL LargeUtf8 NULL false 5
 
 # test variable length arguments
 query TTTBI rowsort
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index ca86dbfcc3c16..ddf701ba04efe 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -4742,3 +4742,51 @@ drop table person;
 
 statement count 0
 drop table orders;
+
+# Create tables for testing compound field access in JOIN conditions
+statement ok
+CREATE TABLE compound_field_table_t
+AS VALUES
+({r: 'a', c: 1}),
+({r: 'b', c: 2.3});
+
+statement ok
+CREATE TABLE compound_field_table_u
+AS VALUES
+({r: 'a', c: 1}),
+({r: 'b', c: 2.3});
+
+# Test compound field access in JOIN condition with table aliases
+query ??
+SELECT * FROM compound_field_table_t tee JOIN compound_field_table_u you ON tee.column1['r'] = you.column1['r']
+----
+{r: a, c: 1.0} {r: a, c: 1.0}
+{r: b, c: 2.3} {r: b, c: 2.3}
+
+# Test compound field access in JOIN condition without table aliases
+query ??
+SELECT * FROM compound_field_table_t JOIN compound_field_table_u ON compound_field_table_t.column1['r'] = compound_field_table_u.column1['r']
+----
+{r: a, c: 1.0} {r: a, c: 1.0}
+{r: b, c: 2.3} {r: b, c: 2.3}
+
+# Test compound field access with numeric field access
+query ??
+SELECT * FROM compound_field_table_t tee JOIN compound_field_table_u you ON tee.column1['c'] = you.column1['c']
+----
+{r: a, c: 1.0} {r: a, c: 1.0}
+{r: b, c: 2.3} {r: b, c: 2.3}
+
+# Test compound field access with mixed field types
+query ??
+SELECT * FROM compound_field_table_t tee JOIN compound_field_table_u you ON tee.column1['r'] = you.column1['r'] AND tee.column1['c'] = you.column1['c']
+----
+{r: a, c: 1.0} {r: a, c: 1.0}
+{r: b, c: 2.3} {r: b, c: 2.3}
+
+# Clean up compound field tables
+statement ok
+DROP TABLE compound_field_table_t;
+
+statement ok
+DROP TABLE compound_field_table_u;
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 2970b2effb3e9..0823a9218268e 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -629,3 +629,21 @@ physical_plan
 
 statement ok
 drop table foo
+
+
+statement ok
+set datafusion.execution.parquet.coerce_int96 = ms;
+
+statement ok
+CREATE EXTERNAL TABLE int96_from_spark
+STORED AS PARQUET
+LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
+
+# Print schema
+query TTT
+describe int96_from_spark;
+----
+a Timestamp(Millisecond, None) YES
+
+statement ok
+set datafusion.execution.parquet.coerce_int96 = ns;
diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
index d325ca423daca..a10243f627209 100644
--- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
+++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt
@@ -109,7 +109,9 @@ ORDER BY int_col, bigint_col;
 logical_plan
 01)Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST
 02)--TableScan: test_table projection=[int_col, bigint_col]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[int_col, bigint_col], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
+physical_plan
+01)SortPreservingMergeExec: [int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST]
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col], output_ordering=[int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
 
 # Another planning test, but project on a column with unsupported statistics
 # We should be able to ignore this and look at only the relevant statistics
@@ -123,7 +125,10 @@ logical_plan
 02)--Sort: test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST
 03)----Projection: test_table.string_col, test_table.int_col, test_table.bigint_col
 04)------TableScan: test_table projection=[int_col, string_col, bigint_col]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[string_col], file_type=parquet
+physical_plan
+01)ProjectionExec: expr=[string_col@0 as string_col]
+02)--SortPreservingMergeExec: [int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST]
+03)----DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[string_col, int_col, bigint_col], output_ordering=[int_col@1 ASC NULLS LAST, bigint_col@2 ASC NULLS LAST], file_type=parquet
 
 # Clean up & recreate but sort on descending column
 statement ok
@@ -155,7 +160,9 @@ ORDER BY descending_col DESC NULLS LAST, bigint_col ASC NULLS LAST;
 logical_plan
 01)Sort: test_table.descending_col DESC NULLS LAST, test_table.bigint_col ASC NULLS LAST
 02)--TableScan: test_table projection=[descending_col, bigint_col]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet]]}, projection=[descending_col, bigint_col], output_ordering=[descending_col@0 DESC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
+physical_plan
+01)SortPreservingMergeExec: [descending_col@0 DESC NULLS LAST, bigint_col@1 ASC NULLS LAST]
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[descending_col, bigint_col], output_ordering=[descending_col@0 DESC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
 
 # Clean up & re-create with partition columns in sort order
 statement ok
@@ -189,7 +196,9 @@ ORDER BY partition_col, int_col, bigint_col;
 logical_plan
 01)Sort: test_table.partition_col ASC NULLS LAST, test_table.int_col ASC NULLS LAST, test_table.bigint_col ASC NULLS LAST
 02)--TableScan: test_table projection=[int_col, bigint_col, partition_col]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[int_col, bigint_col, partition_col], output_ordering=[partition_col@2 ASC NULLS LAST, int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
+physical_plan
+01)SortPreservingMergeExec: [partition_col@2 ASC NULLS LAST, int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST]
+02)--DataSourceExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet]]}, projection=[int_col, bigint_col, partition_col], output_ordering=[partition_col@2 ASC NULLS LAST, int_col@0 ASC NULLS LAST, bigint_col@1 ASC NULLS LAST], file_type=parquet
 
 # Clean up & re-create with overlapping column in sort order
 # This will test the ability to sort files with overlapping statistics
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
deleted file mode 100644
index 44ba61e877d97..0000000000000
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ /dev/null
@@ -1,898 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-
-#   http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-statement ok
-CREATE TABLE t (str varchar, pattern varchar, start int, flags varchar) AS VALUES
-    ('abc',       '^(a)', 1, 'i'),
-    ('ABC',       '^(A).*', 1, 'i'),
-    ('aBc',       '(b|d)', 1, 'i'),
-    ('AbC',       '(B|D)', 2, null),
-    ('aBC',       '^(b|c)', 3, null),
-    ('4000',      '\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b', 1, null),
-    ('4010',      '\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b', 2, null),
-    ('Düsseldorf','[\p{Letter}-]+', 3, null),
-    ('Москва',    '[\p{L}-]+', 4, null),
-    ('Köln',      '[a-zA-Z]ö[a-zA-Z]{2}', 1, null),
-    ('إسرائيل',   '^\p{Arabic}+$', 2, null);
-
-#
-# regexp_like tests
-#
-
-query B
-SELECT regexp_like(str, pattern, flags) FROM t;
-----
-true
-true
-true
-false
-false
-false
-true
-true
-true
-true
-true
-
-query B
-SELECT str ~ NULL FROM t;
-----
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-
-query B
-select str ~ right('foo', NULL) FROM t;
-----
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-
-query B
-select right('foo', NULL) !~ str FROM t;
-----
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-NULL
-
-query B
-SELECT regexp_like('foobarbequebaz', '');
-----
-true
-
-query B
-SELECT regexp_like('', '');
-----
-true
-
-query B
-SELECT regexp_like('foobarbequebaz', '(bar)(beque)');
-----
-true
-
-query B
-SELECT regexp_like('fooBarb
-eQuebaz', '(bar).*(que)', 'is');
-----
-true
-
-query B
-SELECT regexp_like('foobarbequebaz', '(ba3r)(bequ34e)');
-----
-false
-
-query B
-SELECT regexp_like('foobarbequebaz', '^.*(barbequ[0-9]*e).*$', 'm');
-----
-true
-
-query B
-SELECT regexp_like('aaa-0', '.*-(\d)');
-----
-true
-
-query B
-SELECT regexp_like('bb-1', '.*-(\d)');
-----
-true
-
-query B
-SELECT regexp_like('aa', '.*-(\d)');
-----
-false
-
-query B
-SELECT regexp_like(NULL, '.*-(\d)');
-----
-NULL
-
-query B
-SELECT regexp_like('aaa-0', NULL);
-----
-NULL
-
-query B
-SELECT regexp_like(null, '.*-(\d)');
-----
-NULL
-
-query error Error during planning: regexp_like\(\) does not support the "global" option
-SELECT regexp_like('bb-1', '.*-(\d)', 'g');
-
-query error Error during planning: regexp_like\(\) does not support the "global" option
-SELECT regexp_like('bb-1', '.*-(\d)', 'g');
-
-query error Arrow error: Compute error: Regular expression did not compile: CompiledTooBig\(10485760\)
-SELECT regexp_like('aaaaa', 'a{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}');
-
-# look-around is not supported and will just return false
-query B
-SELECT regexp_like('(?<=[A-Z]\w )Smith', 'John Smith', 'i');
-----
-false
-
-query B
-select regexp_like('aaa-555', '.*-(\d*)');
-----
-true
-
-#
-# regexp_match tests
-#
-
-query ?
-SELECT regexp_match(str, pattern, flags) FROM t;
-----
-[a]
-[A]
-[B]
-NULL
-NULL
-NULL
-[010]
-[Düsseldorf]
-[Москва]
-[Köln]
-[إسرائيل]
-
-# test string view
-statement ok
-CREATE TABLE t_stringview AS
-SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM t;
-
-query ?
-SELECT regexp_match(str, pattern, flags) FROM t_stringview;
-----
-[a]
-[A]
-[B]
-NULL
-NULL
-NULL
-[010]
-[Düsseldorf]
-[Москва]
-[Köln]
-[إسرائيل]
-
-statement ok
-DROP TABLE t_stringview;
-
-query ?
-SELECT regexp_match('foobarbequebaz', '');
-----
-[]
-
-query ?
-SELECT regexp_match('', '');
-----
-[]
-
-query ?
-SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
-----
-[bar, beque]
-
-query ?
-SELECT regexp_match('fooBarb
-eQuebaz', '(bar).*(que)', 'is');
-----
-[Bar, Que]
-
-query ?
-SELECT regexp_match('foobarbequebaz', '(ba3r)(bequ34e)');
-----
-NULL
-
-query ?
-SELECT regexp_match('foobarbequebaz', '^.*(barbequ[0-9]*e).*$', 'm');
-----
-[barbeque]
-
-query ?
-SELECT regexp_match('aaa-0', '.*-(\d)');
-----
-[0]
-
-query ?
-SELECT regexp_match('bb-1', '.*-(\d)');
-----
-[1]
-
-query ?
-SELECT regexp_match('aa', '.*-(\d)');
-----
-NULL
-
-query ?
-SELECT regexp_match(NULL, '.*-(\d)');
-----
-NULL
-
-query ?
-SELECT regexp_match('aaa-0', NULL);
-----
-NULL
-
-query ?
-SELECT regexp_match(null, '.*-(\d)');
-----
-NULL
-
-query error Error during planning: regexp_match\(\) does not support the "global" option
-SELECT regexp_match('bb-1', '.*-(\d)', 'g');
-
-query error Error during planning: regexp_match\(\) does not support the "global" option
-SELECT regexp_match('bb-1', '.*-(\d)', 'g');
-
-query error Arrow error: Compute error: Regular expression did not compile: CompiledTooBig\(10485760\)
-SELECT regexp_match('aaaaa', 'a{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}');
-
-# look-around is not supported and will just return null
-query ?
-SELECT regexp_match('(?<=[A-Z]\w )Smith', 'John Smith', 'i');
-----
-NULL
-
-# ported test
-query ?
-SELECT regexp_match('aaa-555', '.*-(\d*)');
-----
-[555]
-
-query B
-select 'abc' ~ null;
-----
-NULL
-
-query B
-select null ~ null;
-----
-NULL
-
-query B
-select null ~ 'abc';
-----
-NULL
-
-query B
-select 'abc' ~* null;
-----
-NULL
-
-query B
-select null ~* null;
-----
-NULL
-
-query B
-select null ~* 'abc';
-----
-NULL
-
-query B
-select 'abc' !~ null;
-----
-NULL
-
-query B
-select null !~ null;
-----
-NULL
-
-query B
-select null !~ 'abc';
-----
-NULL
-
-query B
-select 'abc' !~* null;
-----
-NULL
-
-query B
-select null !~* null;
-----
-NULL
-
-query B
-select null !~* 'abc';
-----
-NULL
-
-#
-# regexp_replace tests
-#
-
-query T
-SELECT regexp_replace(str, pattern, 'X', concat('g', flags)) FROM t;
-----
-Xbc
-X
-aXc
-AbC
-aBC
-4000
-X
-X
-X
-X
-X
-
-# test string view
-statement ok
-CREATE TABLE t_stringview AS
-SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM t;
-
-query T
-SELECT regexp_replace(str, pattern, 'X', concat('g', flags)) FROM t_stringview;
-----
-Xbc
-X
-aXc
-AbC
-aBC
-4000
-X
-X
-X
-X
-X
-
-statement ok
-DROP TABLE t_stringview;
-
-query T
-SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'gi');
-----
-XXX
-
-query T
-SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'i');
-----
-XabcABC
-
-query T
-SELECT regexp_replace('foobarbaz', 'b..', 'X', 'g');
-----
-fooXX
-
-query T
-SELECT regexp_replace('foobarbaz', 'b..', 'X');
-----
-fooXbaz
-
-query T
-SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
-----
-fooXarYXazY
-
-query T
-SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL);
-----
-NULL
-
-query T
-SELECT regexp_replace('foobarbaz', 'b(..)', NULL, 'g');
-----
-NULL
-
-query T
-SELECT regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g');
-----
-NULL
-
-query T
-SELECT regexp_replace('Thomas', '.[mN]a.', 'M');
-----
-ThM
-
-query T
-SELECT regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g');
-----
-NULL
-
-query T
-SELECT regexp_replace('foobar', 'bar', 'xx', 'gi')
-----
-fooxx
-
-query T
-SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'xx', 'gi')
-----
-fooxx
-
-query TTT
-select
-    regexp_replace(col, NULL, 'c'),
-    regexp_replace(col, 'a', NULL),
-    regexp_replace(col, 'a', 'c', NULL)
-from (values ('a'), ('b')) as tbl(col);
-----
-NULL NULL NULL
-NULL NULL NULL
-
-# multiline string
-query B
-SELECT 'foo\nbar\nbaz' ~ 'bar';
-----
-true
-
-statement error
-Error during planning: Cannot infer common argument type for regex operation List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata
-: {} }) ~ List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata: {} })
-select [1,2] ~ [3];
-
-query B
-SELECT 'foo\nbar\nbaz' LIKE '%bar%';
-----
-true
-
-query B
-SELECT NULL LIKE NULL;
-----
-NULL
-
-query B
-SELECT NULL iLIKE NULL;
-----
-NULL
-
-query B
-SELECT NULL not LIKE NULL;
-----
-NULL
-
-query B
-SELECT NULL not iLIKE NULL;
-----
-NULL
-
-# regexp_count tests
-
-# regexp_count tests from postgresql
-# https://github.com/postgres/postgres/blob/56d23855c864b7384970724f3ad93fb0fc319e51/src/test/regress/sql/strings.sql#L226-L235
-
-query I
-SELECT regexp_count('123123123123123', '(12)3');
-----
-5
-
-query I
-SELECT regexp_count('123123123123', '123', 1);
-----
-4
-
-query I
-SELECT regexp_count('123123123123', '123', 3);
-----
-3
-
-query I
-SELECT regexp_count('123123123123', '123', 33);
-----
-0
-
-query I
-SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
-----
-0
-
-query I
-SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
-----
-4
-
-statement error
-External error: query failed: DataFusion error: Arrow error: Compute error: regexp_count() requires start to be 1 based
-SELECT regexp_count('123123123123', '123', 0);
-
-statement error
-External error: query failed: DataFusion error: Arrow error: Compute error: regexp_count() requires start to be 1 based
-SELECT regexp_count('123123123123', '123', -3);
-
-statement error
-External error: statement failed: DataFusion error: Arrow error: Compute error: regexp_count() does not support global flag
-SELECT regexp_count('123123123123', '123', 1, 'g');
-
-query I
-SELECT regexp_count(str, '\w') from t;
-----
-3
-3
-3
-3
-3
-4
-4
-10
-6
-4
-7
-
-query I
-SELECT regexp_count(str, '\w{2}', start) from t;
-----
-1
-1
-1
-1
-0
-2
-1
-4
-1
-2
-3
-
-query I
-SELECT regexp_count(str, 'ab', 1, 'i') from t;
-----
-1
-1
-1
-1
-1
-0
-0
-0
-0
-0
-0
-
-
-query I
-SELECT regexp_count(str, pattern) from t;
-----
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-1
-
-query I
-SELECT regexp_count(str, pattern, start) from t;
-----
-1
-1
-0
-0
-0
-0
-0
-1
-1
-1
-1
-
-query I
-SELECT regexp_count(str, pattern, start, flags) from t;
-----
-1
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-
-# test type coercion
-query I
-SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'),  arrow_cast(start, 'Int32'), flags) from t;
-----
-1
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-
-# test string views
-
-statement ok
-CREATE TABLE t_stringview AS
-SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(start, 'Int64') as start, arrow_cast(flags, 'Utf8View') as flags FROM t;
-
-query I
-SELECT regexp_count(str, '\w') from t_stringview;
-----
-3
-3
-3
-3
-3
-4
-4
-10
-6
-4
-7
-
-query I
-SELECT regexp_count(str, '\w{2}', start) from t_stringview;
-----
-1
-1
-1
-1
-0
-2
-1
-4
-1
-2
-3
-
-query I
-SELECT regexp_count(str, 'ab', 1, 'i') from t_stringview;
-----
-1
-1
-1
-1
-1
-0
-0
-0
-0
-0
-0
-
-
-query I
-SELECT regexp_count(str, pattern) from t_stringview;
-----
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-1
-
-query I
-SELECT regexp_count(str, pattern, start) from t_stringview;
-----
-1
-1
-0
-0
-0
-0
-0
-1
-1
-1
-1
-
-query I
-SELECT regexp_count(str, pattern, start, flags) from t_stringview;
-----
-1
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-
-# test type coercion
-query I
-SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'),  arrow_cast(start, 'Int32'), flags) from t_stringview;
-----
-1
-1
-1
-0
-0
-0
-0
-1
-1
-1
-1
-
-# NULL tests
-
-query I
-SELECT regexp_count(NULL, NULL);
-----
-0
-
-query I
-SELECT regexp_count(NULL, 'a');
-----
-0
-
-query I
-SELECT regexp_count('a', NULL);
-----
-0
-
-query I
-SELECT regexp_count(NULL, NULL, NULL, NULL);
-----
-0
-
-statement ok
-CREATE TABLE empty_table (str varchar, pattern varchar, start int, flags varchar);
-
-query I
-SELECT regexp_count(str, pattern, start, flags) from empty_table;
-----
-
-statement ok
-INSERT INTO empty_table VALUES ('a', NULL, 1, 'i'), (NULL, 'a', 1, 'i'), (NULL, NULL, 1, 'i'), (NULL, NULL, NULL, 'i');
-
-query I
-SELECT regexp_count(str, pattern, start, flags) from empty_table;
-----
-0
-0
-0
-0
-
-statement ok
-drop table t;
-
-statement ok
-create or replace table strings as values
-  ('FooBar'),
-  ('Foo'),
-  ('Foo'),
-  ('Bar'),
-  ('FooBar'),
-  ('Bar'),
-  ('Baz');
-
-statement ok
-create or replace table dict_table as
-select arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1
-from strings;
-
-query T
-select column1 from dict_table where column1 LIKE '%oo%';
-----
-FooBar
-Foo
-Foo
-FooBar
-
-query T
-select column1 from dict_table where column1 NOT LIKE '%oo%';
-----
-Bar
-Bar
-Baz
-
-query T
-select column1 from dict_table where column1 ILIKE '%oO%';
-----
-FooBar
-Foo
-Foo
-FooBar
-
-query T
-select column1 from dict_table where column1 NOT ILIKE '%oO%';
-----
-Bar
-Bar
-Baz
-
-
-# plan should not cast the column, instead it should use the dictionary directly
-query TT
-explain select column1 from dict_table where column1 LIKE '%oo%';
-----
-logical_plan
-01)Filter: dict_table.column1 LIKE Utf8("%oo%")
-02)--TableScan: dict_table projection=[column1]
-physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: column1@0 LIKE %oo%
-03)----DataSourceExec: partitions=1, partition_sizes=[1]
-
-# Ensure casting / coercion works for all operators
-# (there should be no casts to Utf8)
-query TT
-explain select
-  column1 LIKE '%oo%',
-  column1 NOT LIKE '%oo%',
-  column1 ILIKE '%oo%',
-  column1 NOT ILIKE '%oo%'
-from dict_table;
-----
-logical_plan
-01)Projection: dict_table.column1 LIKE Utf8("%oo%"), dict_table.column1 NOT LIKE Utf8("%oo%"), dict_table.column1 ILIKE Utf8("%oo%"), dict_table.column1 NOT ILIKE Utf8("%oo%")
-02)--TableScan: dict_table projection=[column1]
-physical_plan
-01)ProjectionExec: expr=[column1@0 LIKE %oo% as dict_table.column1 LIKE Utf8("%oo%"), column1@0 NOT LIKE %oo% as dict_table.column1 NOT LIKE Utf8("%oo%"), column1@0 ILIKE %oo% as dict_table.column1 ILIKE Utf8("%oo%"), column1@0 NOT ILIKE %oo% as dict_table.column1 NOT ILIKE Utf8("%oo%")]
-02)--DataSourceExec: partitions=1, partition_sizes=[1]
-
-statement ok
-drop table strings
-
-statement ok
-drop table dict_table
diff --git a/datafusion/sqllogictest/test_files/regexp/README.md b/datafusion/sqllogictest/test_files/regexp/README.md
new file mode 100644
index 0000000000000..7e5efc5b5ddf2
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/README.md
@@ -0,0 +1,59 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Regexp Test Files
+
+This directory contains test files for regular expression (regexp) functions in DataFusion.
+
+## Directory Structure
+
+```
+regexp/
+    - init_data.slt.part      // Shared test data for regexp functions
+    - regexp_like.slt         // Tests for regexp_like function
+    - regexp_count.slt        // Tests for regexp_count function
+    - regexp_match.slt        // Tests for regexp_match function
+    - regexp_replace.slt      // Tests for regexp_replace function
+```
+
+## Tested Functions
+
+1. `regexp_like`: Check if a string matches a regular expression
+2. `regexp_count`: Count occurrences of a pattern in a string
+3. `regexp_match`: Extract matching substrings
+4. `regexp_replace`: Replace matched substrings
+
+## Test Data
+
+Test data is centralized in the `init_data.slt.part` file and imported into each test file using the `include` directive. This approach ensures:
+
+Consistent test data across different regexp function tests
+Easy maintenance of test data
+Reduced duplication
+
+## Test Coverage
+
+Each test file covers:
+
+Basic functionality
+Case-insensitive matching
+Null handling
+Start position tests
+Capture group handling
+Different string types (UTF-8, Unicode)
diff --git a/datafusion/sqllogictest/test_files/regexp/init_data.slt.part b/datafusion/sqllogictest/test_files/regexp/init_data.slt.part
new file mode 100644
index 0000000000000..ed6fb0e872df9
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/init_data.slt.part
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+create table regexp_test_data (str varchar, pattern varchar, start int, flags varchar) as values
+    (NULL,        '^(a)', 1, 'i'),
+    ('abc',       '^(a)', 1, 'i'),
+    ('ABC',       '^(A).*', 1, 'i'),
+    ('aBc',       '(b|d)', 1, 'i'),
+    ('AbC',       '(B|D)', 2, null),
+    ('aBC',       '^(b|c)', 3, null),
+    ('4000',      '\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b', 1, null),
+    ('4010',      '\b4([1-9]\d\d|\d[1-9]\d|\d\d[1-9])\b', 2, null),
+    ('Düsseldorf','[\p{Letter}-]+', 3, null),
+    ('Москва',    '[\p{L}-]+', 4, null),
+    ('Köln',      '[a-zA-Z]ö[a-zA-Z]{2}', 1, null),
+    ('إسرائيل',   '^\p{Arabic}+$', 2, null);
diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_count.slt b/datafusion/sqllogictest/test_files/regexp/regexp_count.slt
new file mode 100644
index 0000000000000..f64705429bfa4
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/regexp_count.slt
@@ -0,0 +1,344 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Import common test data
+include ./init_data.slt.part
+
+# regexp_count tests from postgresql
+# https://github.com/postgres/postgres/blob/56d23855c864b7384970724f3ad93fb0fc319e51/src/test/regress/sql/strings.sql#L226-L235
+
+query I
+SELECT regexp_count('123123123123123', '(12)3');
+----
+5
+
+query I
+SELECT regexp_count('123123123123', '123', 1);
+----
+4
+
+query I
+SELECT regexp_count('123123123123', '123', 3);
+----
+3
+
+query I
+SELECT regexp_count('123123123123', '123', 33);
+----
+0
+
+query I
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
+----
+0
+
+query I
+SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
+----
+4
+
+statement error
+External error: query failed: DataFusion error: Arrow error: Compute error: regexp_count() requires start to be 1 based
+SELECT regexp_count('123123123123', '123', 0);
+
+statement error
+External error: query failed: DataFusion error: Arrow error: Compute error: regexp_count() requires start to be 1 based
+SELECT regexp_count('123123123123', '123', -3);
+
+statement error
+External error: statement failed: DataFusion error: Arrow error: Compute error: regexp_count() does not support global flag
+SELECT regexp_count('123123123123', '123', 1, 'g');
+
+query I
+SELECT regexp_count(str, '\w') from regexp_test_data;
+----
+0
+3
+3
+3
+3
+3
+4
+4
+10
+6
+4
+7
+
+query I
+SELECT regexp_count(str, '\w{2}', start) from regexp_test_data;
+----
+0
+1
+1
+1
+1
+0
+2
+1
+4
+1
+2
+3
+
+query I
+SELECT regexp_count(str, 'ab', 1, 'i') from regexp_test_data;
+----
+0
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+
+
+query I
+SELECT regexp_count(str, pattern) from regexp_test_data;
+----
+0
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+1
+
+query I
+SELECT regexp_count(str, pattern, start) from regexp_test_data;
+----
+0
+1
+1
+0
+0
+0
+0
+0
+1
+1
+1
+1
+
+query I
+SELECT regexp_count(str, pattern, start, flags) from regexp_test_data;
+----
+0
+1
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+
+# test type coercion
+query I
+SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'),  arrow_cast(start, 'Int32'), flags) from regexp_test_data;
+----
+0
+1
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+
+# test string views
+
+statement ok
+CREATE TABLE t_stringview AS
+SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(start, 'Int64') as start, arrow_cast(flags, 'Utf8View') as flags FROM regexp_test_data;
+
+query I
+SELECT regexp_count(str, '\w') from t_stringview;
+----
+0
+3
+3
+3
+3
+3
+4
+4
+10
+6
+4
+7
+
+query I
+SELECT regexp_count(str, '\w{2}', start) from t_stringview;
+----
+0
+1
+1
+1
+1
+0
+2
+1
+4
+1
+2
+3
+
+query I
+SELECT regexp_count(str, 'ab', 1, 'i') from t_stringview;
+----
+0
+1
+1
+1
+1
+1
+0
+0
+0
+0
+0
+0
+
+
+query I
+SELECT regexp_count(str, pattern) from t_stringview;
+----
+0
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+1
+
+query I
+SELECT regexp_count(str, pattern, start) from t_stringview;
+----
+0
+1
+1
+0
+0
+0
+0
+0
+1
+1
+1
+1
+
+query I
+SELECT regexp_count(str, pattern, start, flags) from t_stringview;
+----
+0
+1
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+
+# test type coercion
+query I
+SELECT regexp_count(arrow_cast(str, 'Utf8'), arrow_cast(pattern, 'LargeUtf8'),  arrow_cast(start, 'Int32'), flags) from t_stringview;
+----
+0
+1
+1
+1
+0
+0
+0
+0
+1
+1
+1
+1
+
+# NULL tests
+
+query I
+SELECT regexp_count(NULL, NULL);
+----
+0
+
+query I
+SELECT regexp_count(NULL, 'a');
+----
+0
+
+query I
+SELECT regexp_count('a', NULL);
+----
+0
+
+query I
+SELECT regexp_count(NULL, NULL, NULL, NULL);
+----
+0
+
+statement ok
+CREATE TABLE empty_table (str varchar, pattern varchar, start int, flags varchar);
+
+query I
+SELECT regexp_count(str, pattern, start, flags) from empty_table;
+----
+
+statement ok
+INSERT INTO empty_table VALUES ('a', NULL, 1, 'i'), (NULL, 'a', 1, 'i'), (NULL, NULL, 1, 'i'), (NULL, NULL, NULL, 'i');
+
+query I
+SELECT regexp_count(str, pattern, start, flags) from empty_table;
+----
+0
+0
+0
+0
+
+statement ok
+drop table t_stringview;
+
+statement ok
+drop table empty_table;
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_like.slt b/datafusion/sqllogictest/test_files/regexp/regexp_like.slt
new file mode 100644
index 0000000000000..ec48d62499c84
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/regexp_like.slt
@@ -0,0 +1,280 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Import common test data
+include ./init_data.slt.part
+
+query B
+SELECT regexp_like(str, pattern, flags) FROM regexp_test_data;
+----
+NULL
+true
+true
+true
+false
+false
+false
+true
+true
+true
+true
+true
+
+query B
+SELECT str ~ NULL FROM regexp_test_data;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+query B
+select str ~ right('foo', NULL) FROM regexp_test_data;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+query B
+select right('foo', NULL) !~ str FROM regexp_test_data;
+----
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+NULL
+
+query B
+SELECT regexp_like('foobarbequebaz', '');
+----
+true
+
+query B
+SELECT regexp_like('', '');
+----
+true
+
+query B
+SELECT regexp_like('foobarbequebaz', '(bar)(beque)');
+----
+true
+
+query B
+SELECT regexp_like('fooBarbeQuebaz', '(bar).*(que)', 'is');
+----
+true
+
+query B
+SELECT regexp_like('foobarbequebaz', '(ba3r)(bequ34e)');
+----
+false
+
+query B
+SELECT regexp_like('foobarbequebaz', '^.*(barbequ[0-9]*e).*$', 'm');
+----
+true
+
+query B
+SELECT regexp_like('aaa-0', '.*-(\d)');
+----
+true
+
+query B
+SELECT regexp_like('bb-1', '.*-(\d)');
+----
+true
+
+query B
+SELECT regexp_like('aa', '.*-(\d)');
+----
+false
+
+query B
+SELECT regexp_like(NULL, '.*-(\d)');
+----
+NULL
+
+query B
+SELECT regexp_like('aaa-0', NULL);
+----
+NULL
+
+query B
+SELECT regexp_like(null, '.*-(\d)');
+----
+NULL
+
+query error Error during planning: regexp_like\(\) does not support the "global" option
+SELECT regexp_like('bb-1', '.*-(\d)', 'g');
+
+query error Error during planning: regexp_like\(\) does not support the "global" option
+SELECT regexp_like('bb-1', '.*-(\d)', 'g');
+
+query error Arrow error: Compute error: Regular expression did not compile: CompiledTooBig\(10485760\)
+SELECT regexp_like('aaaaa', 'a{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}');
+
+# look-around is not supported and will just return false
+query B
+SELECT regexp_like('(?<=[A-Z]\w )Smith', 'John Smith', 'i');
+----
+false
+
+query B
+select regexp_like('aaa-555', '.*-(\d*)');
+----
+true
+
+# multiline string
+query B
+SELECT 'foo\nbar\nbaz' ~ 'bar';
+----
+true
+
+statement error
+Error during planning: Cannot infer common argument type for regex operation List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata
+: {} }) ~ List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata: {} })
+select [1,2] ~ [3];
+
+query B
+SELECT 'foo\nbar\nbaz' LIKE '%bar%';
+----
+true
+
+query B
+SELECT NULL LIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL iLIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL not LIKE NULL;
+----
+NULL
+
+query B
+SELECT NULL not iLIKE NULL;
+----
+NULL
+
+statement ok
+create or replace table strings as values
+  ('FooBar'),
+  ('Foo'),
+  ('Foo'),
+  ('Bar'),
+  ('FooBar'),
+  ('Bar'),
+  ('Baz');
+
+statement ok
+create or replace table dict_table as
+select arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1
+from strings;
+
+query T
+select column1 from dict_table where column1 LIKE '%oo%';
+----
+FooBar
+Foo
+Foo
+FooBar
+
+query T
+select column1 from dict_table where column1 NOT LIKE '%oo%';
+----
+Bar
+Bar
+Baz
+
+query T
+select column1 from dict_table where column1 ILIKE '%oO%';
+----
+FooBar
+Foo
+Foo
+FooBar
+
+query T
+select column1 from dict_table where column1 NOT ILIKE '%oO%';
+----
+Bar
+Bar
+Baz
+
+
+# plan should not cast the column, instead it should use the dictionary directly
+query TT
+explain select column1 from dict_table where column1 LIKE '%oo%';
+----
+logical_plan
+01)Filter: dict_table.column1 LIKE Utf8("%oo%")
+02)--TableScan: dict_table projection=[column1]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: column1@0 LIKE %oo%
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+
+# Ensure casting / coercion works for all operators
+# (there should be no casts to Utf8)
+query TT
+explain select
+  column1 LIKE '%oo%',
+  column1 NOT LIKE '%oo%',
+  column1 ILIKE '%oo%',
+  column1 NOT ILIKE '%oo%'
+from dict_table;
+----
+logical_plan
+01)Projection: dict_table.column1 LIKE Utf8("%oo%"), dict_table.column1 NOT LIKE Utf8("%oo%"), dict_table.column1 ILIKE Utf8("%oo%"), dict_table.column1 NOT ILIKE Utf8("%oo%")
+02)--TableScan: dict_table projection=[column1]
+physical_plan
+01)ProjectionExec: expr=[column1@0 LIKE %oo% as dict_table.column1 LIKE Utf8("%oo%"), column1@0 NOT LIKE %oo% as dict_table.column1 NOT LIKE Utf8("%oo%"), column1@0 ILIKE %oo% as dict_table.column1 ILIKE Utf8("%oo%"), column1@0 NOT ILIKE %oo% as dict_table.column1 NOT ILIKE Utf8("%oo%")]
+02)--DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+drop table strings
+
+statement ok
+drop table dict_table
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_match.slt b/datafusion/sqllogictest/test_files/regexp/regexp_match.slt
new file mode 100644
index 0000000000000..4b4cf4f134d8e
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/regexp_match.slt
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Import common test data
+include ./init_data.slt.part
+
+query ?
+SELECT regexp_match(str, pattern, flags) FROM regexp_test_data;
+----
+NULL
+[a]
+[A]
+[B]
+NULL
+NULL
+NULL
+[010]
+[Düsseldorf]
+[Москва]
+[Köln]
+[إسرائيل]
+
+# test string view
+statement ok
+CREATE TABLE t_stringview AS
+SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM regexp_test_data;
+
+query ?
+SELECT regexp_match(str, pattern, flags) FROM t_stringview;
+----
+NULL
+[a]
+[A]
+[B]
+NULL
+NULL
+NULL
+[010]
+[Düsseldorf]
+[Москва]
+[Köln]
+[إسرائيل]
+
+statement ok
+DROP TABLE t_stringview;
+
+query ?
+SELECT regexp_match('foobarbequebaz', '');
+----
+[]
+
+query ?
+SELECT regexp_match('', '');
+----
+[]
+
+query ?
+SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
+----
+[bar, beque]
+
+query ?
+SELECT regexp_match('fooBarb
+eQuebaz', '(bar).*(que)', 'is');
+----
+[Bar, Que]
+
+query ?
+SELECT regexp_match('foobarbequebaz', '(ba3r)(bequ34e)');
+----
+NULL
+
+query ?
+SELECT regexp_match('foobarbequebaz', '^.*(barbequ[0-9]*e).*$', 'm');
+----
+[barbeque]
+
+query ?
+SELECT regexp_match('aaa-0', '.*-(\d)');
+----
+[0]
+
+query ?
+SELECT regexp_match('bb-1', '.*-(\d)');
+----
+[1]
+
+query ?
+SELECT regexp_match('aa', '.*-(\d)');
+----
+NULL
+
+query ?
+SELECT regexp_match(NULL, '.*-(\d)');
+----
+NULL
+
+query ?
+SELECT regexp_match('aaa-0', NULL);
+----
+NULL
+
+query ?
+SELECT regexp_match(null, '.*-(\d)');
+----
+NULL
+
+query error Error during planning: regexp_match\(\) does not support the "global" option
+SELECT regexp_match('bb-1', '.*-(\d)', 'g');
+
+query error Error during planning: regexp_match\(\) does not support the "global" option
+SELECT regexp_match('bb-1', '.*-(\d)', 'g');
+
+query error Arrow error: Compute error: Regular expression did not compile: CompiledTooBig\(10485760\)
+SELECT regexp_match('aaaaa', 'a{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}{5}');
+
+# look-around is not supported and will just return null
+query ?
+SELECT regexp_match('(?<=[A-Z]\w )Smith', 'John Smith', 'i');
+----
+NULL
+
+# ported test
+query ?
+SELECT regexp_match('aaa-555', '.*-(\d*)');
+----
+[555]
+
+query B
+select 'abc' ~ null;
+----
+NULL
+
+query B
+select null ~ null;
+----
+NULL
+
+query B
+select null ~ 'abc';
+----
+NULL
+
+query B
+select 'abc' ~* null;
+----
+NULL
+
+query B
+select null ~* null;
+----
+NULL
+
+query B
+select null ~* 'abc';
+----
+NULL
+
+query B
+select 'abc' !~ null;
+----
+NULL
+
+query B
+select null !~ null;
+----
+NULL
+
+query B
+select null !~ 'abc';
+----
+NULL
+
+query B
+select 'abc' !~* null;
+----
+NULL
+
+query B
+select null !~* null;
+----
+NULL
+
+query B
+select null !~* 'abc';
+----
+NULL
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/regexp/regexp_replace.slt b/datafusion/sqllogictest/test_files/regexp/regexp_replace.slt
new file mode 100644
index 0000000000000..d54261f02b81a
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/regexp/regexp_replace.slt
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Import common test data
+include ./init_data.slt.part
+
+query T
+SELECT regexp_replace(str, pattern, 'X', concat('g', flags)) FROM regexp_test_data;
+----
+NULL
+Xbc
+X
+aXc
+AbC
+aBC
+4000
+X
+X
+X
+X
+X
+
+# test string view
+statement ok
+CREATE TABLE t_stringview AS
+SELECT arrow_cast(str, 'Utf8View') as str, arrow_cast(pattern, 'Utf8View') as pattern, arrow_cast(flags, 'Utf8View') as flags FROM regexp_test_data;
+
+query T
+SELECT regexp_replace(str, pattern, 'X', concat('g', flags)) FROM t_stringview;
+----
+NULL
+Xbc
+X
+aXc
+AbC
+aBC
+4000
+X
+X
+X
+X
+X
+
+statement ok
+DROP TABLE t_stringview;
+
+query T
+SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'gi');
+----
+XXX
+
+query T
+SELECT regexp_replace('ABCabcABC', '(abc)', 'X', 'i');
+----
+XabcABC
+
+query T
+SELECT regexp_replace('foobarbaz', 'b..', 'X', 'g');
+----
+fooXX
+
+query T
+SELECT regexp_replace('foobarbaz', 'b..', 'X');
+----
+fooXbaz
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g');
+----
+fooXarYXazY
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', NULL);
+----
+NULL
+
+query T
+SELECT regexp_replace('foobarbaz', 'b(..)', NULL, 'g');
+----
+NULL
+
+query T
+SELECT regexp_replace('foobarbaz', NULL, 'X\\1Y', 'g');
+----
+NULL
+
+query T
+SELECT regexp_replace('Thomas', '.[mN]a.', 'M');
+----
+ThM
+
+query T
+SELECT regexp_replace(NULL, 'b(..)', 'X\\1Y', 'g');
+----
+NULL
+
+query T
+SELECT regexp_replace('foobar', 'bar', 'xx', 'gi')
+----
+fooxx
+
+query T
+SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'xx', 'gi')
+----
+fooxx
+
+query TTT
+select
+    regexp_replace(col, NULL, 'c'),
+    regexp_replace(col, 'a', NULL),
+    regexp_replace(col, 'a', 'c', NULL)
+from (values ('a'), ('b')) as tbl(col);
+----
+NULL NULL NULL
+NULL NULL NULL
\ No newline at end of file
diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt b/datafusion/sqllogictest/test_files/simplify_expr.slt
index 43193fb41cfad..075ccafcfd2e0 100644
--- a/datafusion/sqllogictest/test_files/simplify_expr.slt
+++ b/datafusion/sqllogictest/test_files/simplify_expr.slt
@@ -63,5 +63,47 @@ query T
 select b from t where b !~ '.*'
 ----
 
+query TT
+explain select * from t where a = a;
+----
+logical_plan
+01)Filter: t.a IS NOT NULL OR Boolean(NULL)
+02)--TableScan: t projection=[a, b]
+physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--FilterExec: a@0 IS NOT NULL OR NULL
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+
 statement ok
 drop table t;
+
+# test decimal precision
+query B
+SELECT a * 1.000::DECIMAL(4,3) > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+false
+
+query B
+SELECT 1.000::DECIMAL(4,3) * a > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+false
+
+query B
+SELECT NULL::DECIMAL(4,3) * a > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+NULL
+
+query B
+SELECT a * NULL::DECIMAL(4,3) > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+NULL
+
+query B
+SELECT a / 1.000::DECIMAL(4,3) > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+false
+
+query B
+SELECT a / NULL::DECIMAL(4,3) > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
+----
+NULL
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index aaccaaa43ce49..a0ac15b740d72 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -921,7 +921,7 @@ query TT
 explain SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE t2.t2_int = t1.t1_int having count(*) = 0) from t1
 ----
 logical_plan
-01)Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) WHEN __scalar_sq_1.count(Int64(1)) != Int64(0) THEN NULL ELSE __scalar_sq_1.cnt_plus_2 END AS cnt_plus_2
+01)Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) WHEN __scalar_sq_1.count(Int64(1)) != Int64(0) THEN Int64(NULL) ELSE __scalar_sq_1.cnt_plus_2 END AS cnt_plus_2
 02)--Left Join: t1.t1_int = __scalar_sq_1.t2_int
 03)----TableScan: t1 projection=[t1_id, t1_int]
 04)----SubqueryAlias: __scalar_sq_1
@@ -995,7 +995,7 @@ select t1.t1_int from t1 where (
 ----
 logical_plan
 01)Projection: t1.t1_int
-02)--Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) WHEN __scalar_sq_1.count(Int64(1)) != Int64(0) THEN NULL ELSE __scalar_sq_1.cnt_plus_two END = Int64(2)
+02)--Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) WHEN __scalar_sq_1.count(Int64(1)) != Int64(0) THEN Int64(NULL) ELSE __scalar_sq_1.cnt_plus_two END = Int64(2)
 03)----Projection: t1.t1_int, __scalar_sq_1.cnt_plus_two, __scalar_sq_1.count(Int64(1)), __scalar_sq_1.__always_true
 04)------Left Join: t1.t1_int = __scalar_sq_1.t2_int
 05)--------TableScan: t1 projection=[t1_int]
@@ -1049,6 +1049,46 @@ false
 true
 true
 
+query IT rowsort
+SELECT t1_id, (SELECT case when max(t2.t2_id) > 1 then 'a' else 'b' end FROM t2 WHERE t2.t2_int = t1.t1_int) x from t1
+----
+11 a
+22 b
+33 a
+44 b
+
+query IB rowsort
+SELECT t1_id, (SELECT max(t2.t2_id) is null FROM t2 WHERE t2.t2_int = t1.t1_int) x from t1
+----
+11 false
+22 true
+33 false
+44 true
+
+query TT
+explain SELECT t1_id, (SELECT max(t2.t2_id) is null FROM t2 WHERE t2.t2_int = t1.t1_int) x from t1
+----
+logical_plan
+01)Projection: t1.t1_id, __scalar_sq_1.__always_true IS NULL OR __scalar_sq_1.__always_true IS NOT NULL AND __scalar_sq_1.max(t2.t2_id) IS NULL AS x
+02)--Left Join: t1.t1_int = __scalar_sq_1.t2_int
+03)----TableScan: t1 projection=[t1_id, t1_int]
+04)----SubqueryAlias: __scalar_sq_1
+05)------Projection: max(t2.t2_id) IS NULL, t2.t2_int, Boolean(true) AS __always_true
+06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[max(t2.t2_id)]]
+07)----------TableScan: t2 projection=[t2_id, t2_int]
+
+query TT
+explain SELECT t1_id, (SELECT max(t2.t2_id) FROM t2 WHERE t2.t2_int = t1.t1_int) x from t1
+----
+logical_plan
+01)Projection: t1.t1_id, __scalar_sq_1.max(t2.t2_id) AS x
+02)--Left Join: t1.t1_int = __scalar_sq_1.t2_int
+03)----TableScan: t1 projection=[t1_id, t1_int]
+04)----SubqueryAlias: __scalar_sq_1
+05)------Projection: max(t2.t2_id), t2.t2_int
+06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[max(t2.t2_id)]]
+07)----------TableScan: t2 projection=[t2_id, t2_int]
+
 # in_subquery_to_join_with_correlated_outer_filter_disjunction
 query TT
 explain select t1.t1_id,
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index dcbcfbfa439d5..44d0f1f97d4d5 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -416,6 +416,33 @@ SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as time
 ----
 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784
 
+# to_timestamp Decimal128 inputs
+
+query PPP
+SELECT to_timestamp(arrow_cast(1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(1.1, 'Decimal128(2,1)')::timestamp as c3;
+----
+1970-01-01T00:00:01.100 1970-01-01T00:00:01.100 1970-01-01T00:00:01.100
+
+query PPP
+SELECT to_timestamp(arrow_cast(-1.1, 'Decimal128(2,1)')) as c1, cast(arrow_cast(-1.1, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(-1.1, 'Decimal128(2,1)')::timestamp as c3;
+----
+1969-12-31T23:59:58.900 1969-12-31T23:59:58.900 1969-12-31T23:59:58.900
+
+query PPP
+SELECT to_timestamp(arrow_cast(0.0, 'Decimal128(2,1)')) as c1, cast(arrow_cast(0.0, 'Decimal128(2,1)') as timestamp) as c2, arrow_cast(0.0, 'Decimal128(2,1)')::timestamp as c3;
+----
+1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00
+
+query PPP
+SELECT to_timestamp(arrow_cast(1.23456789, 'Decimal128(9,8)')) as c1, cast(arrow_cast(1.23456789, 'Decimal128(9,8)') as timestamp) as c2, arrow_cast(1.23456789, 'Decimal128(9,8)')::timestamp as c3;
+----
+1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890 1970-01-01T00:00:01.234567890
+
+query PPP
+SELECT to_timestamp(arrow_cast(123456789.123456789, 'Decimal128(18,9)')) as c1, cast(arrow_cast(123456789.123456789, 'Decimal128(18,9)') as timestamp) as c2, arrow_cast(123456789.123456789, 'Decimal128(18,9)')::timestamp as c3;
+----
+1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784 1973-11-29T21:33:09.123456784
+
 
 # from_unixtime
 
@@ -2241,23 +2268,23 @@ query error input contains invalid characters
 SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%c', '%+')
 
 # to_timestamp with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_timestamp('2020-09-08 12/00/00+00:00', '%q')
 
 # to_timestamp_nanos with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_timestamp_nanos('2020-09-08 12/00/00+00:00', '%q')
 
 # to_timestamp_millis with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_timestamp_millis('2020-09-08 12/00/00+00:00', '%q')
 
 # to_timestamp_micros with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_timestamp_micros('2020-09-08 12/00/00+00:00', '%q')
 
 # to_timestamp_seconds with broken formatting
-query error bad or unsupported format string
+query error DataFusion error: Execution error: Error parsing timestamp from '2020\-09\-08 12/00/00\+00:00' using format '%q': trailing input
 SELECT to_timestamp_seconds('2020-09-08 12/00/00+00:00', '%q')
 
 # Create string timestamp table with different formats
@@ -2815,6 +2842,11 @@ select to_char(arrow_cast(TIMESTAMP '2023-08-03 14:38:50Z', 'Timestamp(Second, N
 ----
 03-08-2023 14-38-50
 
+query T
+select to_char(arrow_cast('2023-09-04'::date, 'Timestamp(Second, Some("UTC"))'), '%Y-%m-%dT%H:%M:%S%.3f');
+----
+2023-09-04T00:00:00.000
+
 query T
 select to_char(arrow_cast(123456, 'Duration(Second)'), 'pretty');
 ----
diff --git a/datafusion/sqllogictest/test_files/topk.slt b/datafusion/sqllogictest/test_files/topk.slt
index b5ff95c358d8e..ce23fe26528c3 100644
--- a/datafusion/sqllogictest/test_files/topk.slt
+++ b/datafusion/sqllogictest/test_files/topk.slt
@@ -233,3 +233,165 @@ d 1 -98 y7C453hRWd4E7ImjNDWlpexB8nUqjh y7C453hRWd4E7ImjNDWlpexB8nUqjh
 e 2 52 xipQ93429ksjNcXPX5326VSg1xJZcW xipQ93429ksjNcXPX5326VSg1xJZcW
 d 1 -72 wwXqSGKLyBQyPkonlzBNYUJTCo4LRS wwXqSGKLyBQyPkonlzBNYUJTCo4LRS
 a 1 -5 waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs
+
+#####################################
+## Test TopK with Partially Sorted Inputs
+#####################################
+
+
+# Create an external table where data is pre-sorted by (number DESC, letter ASC) only.
+statement ok
+CREATE EXTERNAL TABLE partial_sorted (
+    number INT,
+    letter VARCHAR,
+    age INT
+)
+STORED AS parquet
+LOCATION 'test_files/scratch/topk/partial_sorted/1.parquet'
+WITH ORDER (number DESC, letter ASC);
+
+# Insert test data into the external table.
+query I
+COPY (
+  SELECT *
+  FROM (
+    VALUES
+      (1, 'F', 100),
+      (1, 'B', 50),
+      (2, 'C', 70),
+      (2, 'D', 80),
+      (3, 'A', 60),
+      (3, 'E', 90)
+  ) AS t(number, letter, age)
+  ORDER BY number DESC, letter ASC
+)
+TO 'test_files/scratch/topk/partial_sorted/1.parquet';
+----
+6
+
+## explain physical_plan only
+statement ok
+set datafusion.explain.physical_plan_only = true
+
+## batch size smaller than number of rows in the table and result
+statement ok
+set datafusion.execution.batch_size = 2
+
+# Run a TopK query that orders by all columns.
+# Although the table is only guaranteed to be sorted by (number DESC, letter ASC),
+# DataFusion should use the common prefix optimization
+# and return the correct top 3 rows when ordering by all columns.
+query ITI
+select number, letter, age from partial_sorted order by number desc, letter asc, age desc limit 3;
+----
+3 A 60
+3 E 90
+2 C 70
+
+# A more complex example with a projection that includes an expression (see further down for the explained plan)
+query IIITI
+select
+  number + 1 as number_plus,
+  number,
+  number + 1 as other_number_plus,
+  letter,
+  age
+from partial_sorted
+order by
+  number_plus desc,
+  number desc,
+  other_number_plus desc,
+  letter asc,
+  age desc
+limit 3;
+----
+4 3 4 A 60
+4 3 4 E 90
+3 2 3 C 70
+
+# Verify that the physical plan includes the sort prefix.
+# The output should display a "sort_prefix" in the SortExec node.
+query TT
+explain select number, letter, age from partial_sorted order by number desc, letter asc, age desc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 DESC, letter@1 ASC NULLS LAST, age@2 DESC], preserve_partitioning=[false], sort_prefix=[number@0 DESC, letter@1 ASC NULLS LAST]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+
+# Explain variations of the above query with different orderings, and different sort prefixes.
+# The "sort_prefix" in the  SortExec node should only be present if the TopK's ordering starts with either (number DESC, letter ASC) or just (number DESC).
+query TT
+explain select number, letter, age from partial_sorted order by age desc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[age@2 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+query TT
+explain select number, letter, age from partial_sorted order by number desc, letter desc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 DESC, letter@1 DESC], preserve_partitioning=[false], sort_prefix=[number@0 DESC]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+query TT
+explain select number, letter, age from partial_sorted order by number asc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+query TT
+explain select number, letter, age from partial_sorted order by letter asc, number desc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[letter@1 ASC NULLS LAST, number@0 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+# Explicit NULLS ordering cases (reversing the order of the NULLS on the number and letter orderings)
+query TT
+explain select number, letter, age from partial_sorted order by number desc, letter asc NULLS FIRST limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 DESC, letter@1 ASC], preserve_partitioning=[false], sort_prefix=[number@0 DESC]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+query TT
+explain select number, letter, age from partial_sorted order by number desc NULLS LAST, letter asc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 DESC NULLS LAST, letter@1 ASC NULLS LAST], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+
+# Verify that the sort prefix is correctly computed on the normalized ordering (removing redundant aliased columns)
+query TT
+explain select number, letter, age, number as column4, letter as column5 from partial_sorted order by number desc, column4 desc, letter asc, column5 asc, age desc limit 3;
+----
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[number@0 DESC, column4@3 DESC, letter@1 ASC NULLS LAST, column5@4 ASC NULLS LAST, age@2 DESC], preserve_partitioning=[false], sort_prefix=[number@0 DESC, letter@1 ASC NULLS LAST]
+02)--ProjectionExec: expr=[number@0 as number, letter@1 as letter, age@2 as age, number@0 as column4, letter@1 as column5]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, letter, age], output_ordering=[number@0 DESC, letter@1 ASC NULLS LAST], file_type=parquet
+
+# Verify that the sort prefix is correctly computed over normalized, order-maintaining projections (number + 1, number, number + 1, age)
+query TT
+explain select number + 1 as number_plus, number, number + 1 as other_number_plus, age from partial_sorted order by number_plus desc, number desc, other_number_plus desc, age asc limit 3;
+----
+physical_plan
+01)SortPreservingMergeExec: [number_plus@0 DESC, number@1 DESC, other_number_plus@2 DESC, age@3 ASC NULLS LAST], fetch=3
+02)--SortExec: TopK(fetch=3), expr=[number_plus@0 DESC, number@1 DESC, other_number_plus@2 DESC, age@3 ASC NULLS LAST], preserve_partitioning=[true], sort_prefix=[number_plus@0 DESC, number@1 DESC]
+03)----ProjectionExec: expr=[__common_expr_1@0 as number_plus, number@1 as number, __common_expr_1@0 as other_number_plus, age@2 as age]
+04)------ProjectionExec: expr=[CAST(number@0 AS Int64) + 1 as __common_expr_1, number@0 as number, age@1 as age]
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/topk/partial_sorted/1.parquet]]}, projection=[number, age], output_ordering=[number@0 DESC], file_type=parquet
+
+# Cleanup
+statement ok
+DROP TABLE partial_sorted;
+
+statement ok
+set datafusion.explain.physical_plan_only = false
+
+statement ok
+set datafusion.execution.batch_size = 8192
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 76e3751e4b8e4..52cc80eae1c8a 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -2356,7 +2356,7 @@ logical_plan
 03)----WindowAggr: windowExpr=[[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 04)------TableScan: aggregate_test_100 projection=[c9]
 physical_plan
-01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false]
+01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false], sort_prefix=[rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
@@ -5537,6 +5537,21 @@ physical_plan
 02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], file_type=csv, has_header=true
 
+query II rowsort
+SELECT
+  t1.v1,
+  SUM(t1.v1) OVER w + 1
+FROM
+  generate_series(1, 5) AS t1(v1)
+WINDOW
+  w AS (ORDER BY t1.v1);
+----
+1 2
+2 4
+3 7
+4 11
+5 16
+
 # Testing Utf8View with window
 statement ok
 CREATE TABLE aggregate_test_100_utf8view AS SELECT
@@ -5595,3 +5610,35 @@ DROP TABLE aggregate_test_100_utf8view;
 
 statement ok
 DROP TABLE aggregate_test_100
+
+# window definitions with aliases
+query II rowsort
+SELECT
+  t1.v1,
+  SUM(t1.v1) OVER W + 1
+FROM
+  generate_series(1, 5) AS t1(v1)
+WINDOW
+  w AS (ORDER BY t1.v1);
+----
+1 2
+2 4
+3 7
+4 11
+5 16
+
+# window definitions with aliases
+query II rowsort
+SELECT
+  t1.v1,
+  SUM(t1.v1) OVER w + 1
+FROM
+  generate_series(1, 5) AS t1(v1)
+WINDOW
+  W AS (ORDER BY t1.v1);
+----
+1 2
+2 4
+3 7
+4 11
+5 16
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 61f3379735c7d..1442267d3dbb6 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -1835,8 +1835,7 @@ fn requalify_sides_if_needed(
         })
     }) {
         // These names have no connection to the original plan, but they'll make the columns
-        // (mostly) unique. There may be cases where this still causes duplicates, if either left
-        // or right side itself contains duplicate names with different qualifiers.
+        // (mostly) unique.
         Ok((
             left.alias(TableReference::bare("left"))?,
             right.alias(TableReference::bare("right"))?,
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index 9ba0e0c964e9e..cb725a7277fd3 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -61,7 +61,7 @@ pub fn to_substrait_rel(
                     substrait_files.push(FileOrFiles {
                         partition_index: partition_index.try_into().unwrap(),
                         start: 0,
-                        length: file.object_meta.size as u64,
+                        length: file.object_meta.size,
                         path_type: Some(PathType::UriPath(
                             file.object_meta.location.as_ref().to_string(),
                         )),
diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs
index af9d92378298a..bdeeeb585c0cb 100644
--- a/datafusion/substrait/tests/cases/consumer_integration.rs
+++ b/datafusion/substrait/tests/cases/consumer_integration.rs
@@ -519,6 +519,33 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_multiple_joins() -> Result<()> {
+        let plan_str = test_plan_to_string("multiple_joins.json").await?;
+        assert_eq!(
+            plan_str,
+            "Projection: left.count(Int64(1)) AS count_first, left.category, left.count(Int64(1)):1 AS count_second, right.count(Int64(1)) AS count_third\
+            \n  Left Join: left.id = right.id\
+            \n    SubqueryAlias: left\
+            \n      Left Join: left.id = right.id\
+            \n        SubqueryAlias: left\
+            \n          Left Join: left.id = right.id\
+            \n            SubqueryAlias: left\
+            \n              Aggregate: groupBy=[[id]], aggr=[[count(Int64(1))]]\
+            \n                Values: (Int64(1)), (Int64(2))\
+            \n            SubqueryAlias: right\
+            \n              Aggregate: groupBy=[[id, category]], aggr=[[]]\
+            \n                Values: (Int64(1), Utf8(\"info\")), (Int64(2), Utf8(\"low\"))\
+            \n        SubqueryAlias: right\
+            \n          Aggregate: groupBy=[[id]], aggr=[[count(Int64(1))]]\
+            \n            Values: (Int64(1)), (Int64(2))\
+            \n    SubqueryAlias: right\
+            \n      Aggregate: groupBy=[[id]], aggr=[[count(Int64(1))]]\
+            \n        Values: (Int64(1)), (Int64(2))"
+        );
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_select_window_count() -> Result<()> {
         let plan_str = test_plan_to_string("select_window_count.substrait.json").await?;
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index f989d05c80dd1..9a85f3e6c4dc4 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -37,6 +37,7 @@ use datafusion::logical_expr::{
 };
 use datafusion::optimizer::simplify_expressions::expr_simplifier::THRESHOLD_INLINE_INLIST;
 use datafusion::prelude::*;
+use insta::assert_snapshot;
 use std::hash::Hash;
 use std::sync::Arc;
 use substrait::proto::extensions::simple_extension_declaration::MappingType;
@@ -188,13 +189,16 @@ async fn simple_select() -> Result<()> {
 
 #[tokio::test]
 async fn wildcard_select() -> Result<()> {
-    assert_expected_plan_unoptimized(
-        "SELECT * FROM data",
-        "Projection: data.a, data.b, data.c, data.d, data.e, data.f\
-        \n  TableScan: data",
-        true,
-    )
-    .await
+    let plan = generate_plan_from_sql("SELECT * FROM data", true, false).await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: data.a, data.b, data.c, data.d, data.e, data.f
+      TableScan: data
+    "#
+    );
+    Ok(())
 }
 
 #[tokio::test]
@@ -299,24 +303,42 @@ async fn aggregate_grouping_sets() -> Result<()> {
 
 #[tokio::test]
 async fn aggregate_grouping_rollup() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT a, c, e, avg(b) FROM data GROUP BY ROLLUP (a, c, e)",
-        "Projection: data.a, data.c, data.e, avg(data.b)\
-        \n  Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]]\
-        \n    TableScan: data projection=[a, b, c, e]",
-        true
-    ).await
+        true,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+        Projection: data.a, data.c, data.e, avg(data.b)
+          Aggregate: groupBy=[[GROUPING SETS ((data.a, data.c, data.e), (data.a, data.c), (data.a), ())]], aggr=[[avg(data.b)]]
+            TableScan: data projection=[a, b, c, e]
+        "#
+    );
+    Ok(())
 }
 
 #[tokio::test]
 async fn multilayer_aggregate() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT a, sum(partial_count_b) FROM (SELECT a, count(b) as partial_count_b FROM data GROUP BY a) GROUP BY a",
-        "Aggregate: groupBy=[[data.a]], aggr=[[sum(count(data.b)) AS sum(partial_count_b)]]\
-        \n  Aggregate: groupBy=[[data.a]], aggr=[[count(data.b)]]\
-        \n    TableScan: data projection=[a, b]",
-        true
-    ).await
+        true,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Aggregate: groupBy=[[data.a]], aggr=[[sum(count(data.b)) AS sum(partial_count_b)]]
+      Aggregate: groupBy=[[data.a]], aggr=[[count(data.b)]]
+        TableScan: data projection=[a, b]
+    "#
+    );
+    Ok(())
 }
 
 #[tokio::test]
@@ -454,13 +476,21 @@ async fn try_cast_decimal_to_string() -> Result<()> {
 
 #[tokio::test]
 async fn aggregate_case() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT sum(CASE WHEN a > 0 THEN 1 ELSE NULL END) FROM data",
-        "Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]]\
-         \n  TableScan: data projection=[a]",
-        true
+        true,
+        true,
     )
-        .await
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE Int64(NULL) END) AS sum(CASE WHEN data.a > Int64(0) THEN Int64(1) ELSE NULL END)]]
+      TableScan: data projection=[a]
+    "#
+    );
+    Ok(())
 }
 
 #[tokio::test]
@@ -493,18 +523,27 @@ async fn roundtrip_inlist_4() -> Result<()> {
 #[tokio::test]
 async fn roundtrip_inlist_5() -> Result<()> {
     // on roundtrip there is an additional projection during TableScan which includes all column of the table,
-    // using assert_expected_plan here as a workaround
-    assert_expected_plan(
+    // using assert_and_generate_plan and assert_snapshot! here as a workaround
+    let plan = generate_plan_from_sql(
         "SELECT a, f FROM data WHERE (f IN ('a', 'b', 'c') OR a in (SELECT data2.a FROM data2 WHERE f IN ('b', 'c', 'd')))",
+        true,
+        true,
+    )
+    .await?;
 
-        "Projection: data.a, data.f\
-        \n  Filter: data.f = Utf8(\"a\") OR data.f = Utf8(\"b\") OR data.f = Utf8(\"c\") OR data2.mark\
-        \n    LeftMark Join: data.a = data2.a\
-        \n      TableScan: data projection=[a, f]\
-        \n      Projection: data2.a\
-        \n        Filter: data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")\
-        \n          TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8(\"b\") OR data2.f = Utf8(\"c\") OR data2.f = Utf8(\"d\")]",
-    true).await
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: data.a, data.f
+      Filter: data.f = Utf8("a") OR data.f = Utf8("b") OR data.f = Utf8("c") OR data2.mark
+        LeftMark Join: data.a = data2.a
+          TableScan: data projection=[a, f]
+          Projection: data2.a
+            Filter: data2.f = Utf8("b") OR data2.f = Utf8("c") OR data2.f = Utf8("d")
+              TableScan: data2 projection=[a, f], partial_filters=[data2.f = Utf8("b") OR data2.f = Utf8("c") OR data2.f = Utf8("d")]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
@@ -535,27 +574,44 @@ async fn roundtrip_non_equi_join() -> Result<()> {
 
 #[tokio::test]
 async fn roundtrip_exists_filter() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT b FROM data d1 WHERE EXISTS (SELECT * FROM data2 d2 WHERE d2.a = d1.a AND d2.e != d1.e)",
-        "Projection: data.b\
-        \n  LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64)\
-        \n    TableScan: data projection=[a, b, e]\
-        \n    TableScan: data2 projection=[a, e]",
-        false // "d1" vs "data" field qualifier
-    ).await
+        false,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: data.b
+      LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64)
+        TableScan: data projection=[a, b, e]
+        TableScan: data2 projection=[a, e]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
 async fn inner_join() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT data.a FROM data JOIN data2 ON data.a = data2.a",
-        "Projection: data.a\
-         \n  Inner Join: data.a = data2.a\
-         \n    TableScan: data projection=[a]\
-         \n    TableScan: data2 projection=[a]",
+        true,
         true,
     )
-    .await
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: data.a
+      Inner Join: data.a = data2.a
+        TableScan: data projection=[a]
+        TableScan: data2 projection=[a]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
@@ -592,17 +648,25 @@ async fn roundtrip_self_implicit_cross_join() -> Result<()> {
 
 #[tokio::test]
 async fn self_join_introduces_aliases() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT d1.b, d2.c FROM data d1 JOIN data d2 ON d1.b = d2.b",
-        "Projection: left.b, right.c\
-        \n  Inner Join: left.b = right.b\
-        \n    SubqueryAlias: left\
-        \n      TableScan: data projection=[b]\
-        \n    SubqueryAlias: right\
-        \n      TableScan: data projection=[b, c]",
         false,
+        true,
     )
-    .await
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: left.b, right.c
+      Inner Join: left.b = right.b
+        SubqueryAlias: left
+          TableScan: data projection=[b]
+        SubqueryAlias: right
+          TableScan: data projection=[b, c]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
@@ -747,12 +811,15 @@ async fn aggregate_wo_projection_consume() -> Result<()> {
     let proto_plan =
         read_json("tests/testdata/test_plans/aggregate_no_project.substrait.json");
 
-    assert_expected_plan_substrait(
-        proto_plan,
-        "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]\
-        \n  TableScan: data projection=[a]",
-    )
-    .await
+    let plan = generate_plan_from_substrait(proto_plan).await?;
+    assert_snapshot!(
+    plan,
+    @r#"
+            Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
+              TableScan: data projection=[a]
+            "#
+        );
+    Ok(())
 }
 
 #[tokio::test]
@@ -760,12 +827,15 @@ async fn aggregate_wo_projection_group_expression_ref_consume() -> Result<()> {
     let proto_plan =
         read_json("tests/testdata/test_plans/aggregate_no_project_group_expression_ref.substrait.json");
 
-    assert_expected_plan_substrait(
-        proto_plan,
-        "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]\
-        \n  TableScan: data projection=[a]",
-    )
-    .await
+    let plan = generate_plan_from_substrait(proto_plan).await?;
+    assert_snapshot!(
+    plan,
+    @r#"
+            Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) AS countA]]
+              TableScan: data projection=[a]
+            "#
+        );
+    Ok(())
 }
 
 #[tokio::test]
@@ -773,12 +843,15 @@ async fn aggregate_wo_projection_sorted_consume() -> Result<()> {
     let proto_plan =
         read_json("tests/testdata/test_plans/aggregate_sorted_no_project.substrait.json");
 
-    assert_expected_plan_substrait(
-        proto_plan,
-        "Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) ORDER BY [data.a DESC NULLS FIRST] AS countA]]\
-        \n  TableScan: data projection=[a]",
-    )
-    .await
+    let plan = generate_plan_from_substrait(proto_plan).await?;
+    assert_snapshot!(
+    plan,
+    @r#"
+    Aggregate: groupBy=[[data.a]], aggr=[[count(data.a) ORDER BY [data.a DESC NULLS FIRST] AS countA]]
+      TableScan: data projection=[a]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
@@ -986,19 +1059,67 @@ async fn roundtrip_literal_list() -> Result<()> {
 
 #[tokio::test]
 async fn roundtrip_literal_struct() -> Result<()> {
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT STRUCT(1, true, CAST(NULL AS STRING)) FROM data",
-        "Projection: Struct({c0:1,c1:true,c2:}) AS struct(Int64(1),Boolean(true),NULL)\
-        \n  TableScan: data projection=[]",
-        false, // "Struct(..)" vs "struct(..)"
+        true,
+        true,
     )
-    .await
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: Struct({c0:1,c1:true,c2:}) AS struct(Int64(1),Boolean(true),NULL)
+      TableScan: data projection=[]
+    "#
+            );
+    Ok(())
+}
+
+#[tokio::test]
+async fn roundtrip_literal_named_struct() -> Result<()> {
+    let plan = generate_plan_from_sql(
+        "SELECT STRUCT(1 as int_field, true as boolean_field, CAST(NULL AS STRING) as string_field) FROM data",
+        true,
+        true,
+    )
+        .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: Struct({int_field:1,boolean_field:true,string_field:}) AS named_struct(Utf8("int_field"),Int64(1),Utf8("boolean_field"),Boolean(true),Utf8("string_field"),NULL)
+      TableScan: data projection=[]
+    "#
+            );
+    Ok(())
+}
+
+#[tokio::test]
+async fn roundtrip_literal_renamed_struct() -> Result<()> {
+    // This test aims to hit a case where the struct column itself has the expected name, but its
+    // inner field needs to be renamed.
+    let plan = generate_plan_from_sql(
+        "SELECT CAST((STRUCT(1)) AS Struct<\"int_field\"Int>) AS 'Struct({c0:1})' FROM data",
+        true,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: Struct({int_field:1}) AS Struct({c0:1})
+      TableScan: data projection=[]
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
 async fn roundtrip_values() -> Result<()> {
     // TODO: would be nice to have a struct inside the LargeList, but arrow_cast doesn't support that currently
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "VALUES \
             (\
                 1, \
@@ -1009,17 +1130,18 @@ async fn roundtrip_values() -> Result<()> {
                 [STRUCT(STRUCT('a' AS string_field) AS struct_field), STRUCT(STRUCT('b' AS string_field) AS struct_field)]\
             ), \
             (NULL, NULL, NULL, NULL, NULL, NULL)",
-        "Values: \
-            (\
-                Int64(1), \
-                Utf8(\"a\"), \
-                List([[-213.1, , 5.5, 2.0, 1.0], []]), \
-                LargeList([1, 2, 3]), \
-                Struct({c0:true,int_field:1,c2:}), \
-                List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])\
-            ), \
-            (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())",
-    true).await
+        true,
+        true,
+    )
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Values: (Int64(1), Utf8("a"), List([[-213.1, , 5.5, 2.0, 1.0], []]), LargeList([1, 2, 3]), Struct({c0:true,int_field:1,c2:}), List([{struct_field: {string_field: a}}, {struct_field: {string_field: b}}])), (Int64(NULL), Utf8(NULL), List(), LargeList(), Struct({c0:,int_field:,c2:}), List())
+    "#
+            );
+    Ok(())
 }
 
 #[tokio::test]
@@ -1061,14 +1183,22 @@ async fn duplicate_column() -> Result<()> {
     // only. DataFusion however, is strict about not having duplicate column names appear in the plan.
     // This test confirms that we generate aliases for columns in the plan which would otherwise have
     // colliding names.
-    assert_expected_plan(
+    let plan = generate_plan_from_sql(
         "SELECT a + 1 as sum_a, a + 1 as sum_a_2 FROM data",
-        "Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2\
-            \n  Projection: data.a + Int64(1)\
-            \n    TableScan: data projection=[a]",
+        true,
         true,
     )
-    .await
+    .await?;
+
+    assert_snapshot!(
+    plan,
+    @r#"
+    Projection: data.a + Int64(1) AS sum_a, data.a + Int64(1) AS data.a + Int64(1)__temp__0 AS sum_a_2
+      Projection: data.a + Int64(1)
+        TableScan: data projection=[a]
+    "#
+        );
+    Ok(())
 }
 
 /// Construct a plan that cast columns. Only those SQL types are supported for now.
@@ -1374,30 +1504,32 @@ async fn assert_read_filter_count(
     Ok(())
 }
 
-async fn assert_expected_plan_unoptimized(
+async fn generate_plan_from_sql(
     sql: &str,
-    expected_plan_str: &str,
     assert_schema: bool,
-) -> Result<()> {
+    optimized: bool,
+) -> Result<LogicalPlan> {
     let ctx = create_context().await?;
-    let df = ctx.sql(sql).await?;
-    let plan = df.into_unoptimized_plan();
-    let proto = to_substrait_plan(&plan, &ctx.state())?;
-    let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
-
-    println!("{plan}");
-    println!("{plan2}");
+    let df: DataFrame = ctx.sql(sql).await?;
 
-    println!("{proto:?}");
+    let plan = if optimized {
+        df.into_optimized_plan()?
+    } else {
+        df.into_unoptimized_plan()
+    };
+    let proto = to_substrait_plan(&plan, &ctx.state())?;
+    let plan2 = if optimized {
+        let temp = from_substrait_plan(&ctx.state(), &proto).await?;
+        ctx.state().optimize(&temp)?
+    } else {
+        from_substrait_plan(&ctx.state(), &proto).await?
+    };
 
     if assert_schema {
         assert_eq!(plan.schema(), plan2.schema());
     }
 
-    let plan2str = format!("{plan2}");
-    assert_eq!(expected_plan_str, &plan2str);
-
-    Ok(())
+    Ok(plan2)
 }
 
 async fn assert_expected_plan(
@@ -1412,11 +1544,6 @@ async fn assert_expected_plan(
     let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    println!("{plan}");
-    println!("{plan2}");
-
-    println!("{proto:?}");
-
     if assert_schema {
         assert_eq!(plan.schema(), plan2.schema());
     }
@@ -1427,20 +1554,14 @@ async fn assert_expected_plan(
     Ok(())
 }
 
-async fn assert_expected_plan_substrait(
-    substrait_plan: Plan,
-    expected_plan_str: &str,
-) -> Result<()> {
+async fn generate_plan_from_substrait(substrait_plan: Plan) -> Result<LogicalPlan> {
     let ctx = create_context().await?;
 
     let plan = from_substrait_plan(&ctx.state(), &substrait_plan).await?;
 
     let plan = ctx.state().optimize(&plan)?;
 
-    let planstr = format!("{plan}");
-    assert_eq!(planstr, expected_plan_str);
-
-    Ok(())
+    Ok(plan)
 }
 
 async fn assert_substrait_sql(substrait_plan: Plan, sql: &str) -> Result<()> {
@@ -1491,9 +1612,6 @@ async fn test_alias(sql_with_alias: &str, sql_no_alias: &str) -> Result<()> {
     let proto = to_substrait_plan(&df.into_optimized_plan()?, &ctx.state())?;
     let plan = from_substrait_plan(&ctx.state(), &proto).await?;
 
-    println!("{plan_with_alias}");
-    println!("{plan}");
-
     let plan1str = format!("{plan_with_alias}");
     let plan2str = format!("{plan}");
     assert_eq!(plan1str, plan2str);
@@ -1510,11 +1628,6 @@ async fn roundtrip_logical_plan_with_ctx(
     let plan2 = from_substrait_plan(&ctx.state(), &proto).await?;
     let plan2 = ctx.state().optimize(&plan2)?;
 
-    println!("{plan}");
-    println!("{plan2}");
-
-    println!("{proto:?}");
-
     let plan1str = format!("{plan}");
     let plan2str = format!("{plan2}");
     assert_eq!(plan1str, plan2str);
diff --git a/datafusion/substrait/tests/testdata/test_plans/multiple_joins.json b/datafusion/substrait/tests/testdata/test_plans/multiple_joins.json
new file mode 100644
index 0000000000000..e88cce648da7c
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/multiple_joins.json
@@ -0,0 +1,536 @@
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 1,
+    "uri": "/functions_aggregate_generic.yaml"
+  }, {
+    "extensionUriAnchor": 2,
+    "uri": "/functions_comparison.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 0,
+      "name": "count:"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 1,
+      "name": "equal:any_any"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "project": {
+          "common": {
+            "emit": {
+              "outputMapping": [8, 9, 10, 11]
+            }
+          },
+          "input": {
+            "join": {
+              "common": {
+                "direct": {
+                }
+              },
+              "left": {
+                "join": {
+                  "common": {
+                    "direct": {
+                    }
+                  },
+                  "left": {
+                    "join": {
+                      "common": {
+                        "direct": {
+                        }
+                      },
+                      "left": {
+                        "aggregate": {
+                          "common": {
+                            "direct": {
+                            }
+                          },
+                          "input": {
+                            "read": {
+                              "common": {
+                                "direct": {
+                                }
+                              },
+                              "baseSchema": {
+                                "names": ["id"],
+                                "struct": {
+                                  "types": [{
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }],
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              "virtualTable": {
+                                "values": [{
+                                  "fields": [{
+                                    "i64": "1",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }]
+                                }, {
+                                  "fields": [{
+                                    "i64": "2",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }]
+                                }]
+                              }
+                            }
+                          },
+                          "groupings": [{
+                            "groupingExpressions": [{
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 0
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }],
+                            "expressionReferences": []
+                          }],
+                          "measures": [{
+                            "measure": {
+                              "functionReference": 0,
+                              "args": [],
+                              "sorts": [],
+                              "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                              "outputType": {
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              "invocation": "AGGREGATION_INVOCATION_ALL",
+                              "arguments": [],
+                              "options": []
+                            }
+                          }],
+                          "groupingExpressions": []
+                        }
+                      },
+                      "right": {
+                        "aggregate": {
+                          "common": {
+                            "direct": {
+                            }
+                          },
+                          "input": {
+                            "read": {
+                              "common": {
+                                "direct": {
+                                }
+                              },
+                              "baseSchema": {
+                                "names": ["id", "category"],
+                                "struct": {
+                                  "types": [{
+                                    "i64": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }, {
+                                    "string": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }],
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_REQUIRED"
+                                }
+                              },
+                              "virtualTable": {
+                                "values": [{
+                                  "fields": [{
+                                    "i64": "1",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }, {
+                                    "string": "info",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }]
+                                }, {
+                                  "fields": [{
+                                    "i64": "2",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }, {
+                                    "string": "low",
+                                    "nullable": true,
+                                    "typeVariationReference": 0
+                                  }]
+                                }]
+                              }
+                            }
+                          },
+                          "groupings": [{
+                            "groupingExpressions": [{
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 0
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }, {
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 1
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }],
+                            "expressionReferences": []
+                          }],
+                          "measures": [],
+                          "groupingExpressions": []
+                        }
+                      },
+                      "expression": {
+                        "scalarFunction": {
+                          "functionReference": 1,
+                          "args": [],
+                          "outputType": {
+                            "bool": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          },
+                          "arguments": [{
+                            "value": {
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 0
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }
+                          }, {
+                            "value": {
+                              "selection": {
+                                "directReference": {
+                                  "structField": {
+                                    "field": 2
+                                  }
+                                },
+                                "rootReference": {
+                                }
+                              }
+                            }
+                          }],
+                          "options": []
+                        }
+                      },
+                      "type": "JOIN_TYPE_LEFT"
+                    }
+                  },
+                  "right": {
+                    "aggregate": {
+                      "common": {
+                        "direct": {
+                        }
+                      },
+                      "input": {
+                        "read": {
+                          "common": {
+                            "direct": {
+                            }
+                          },
+                          "baseSchema": {
+                            "names": ["id"],
+                            "struct": {
+                              "types": [{
+                                "i64": {
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }],
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_REQUIRED"
+                            }
+                          },
+                          "virtualTable": {
+                            "values": [{
+                              "fields": [{
+                                "i64": "1",
+                                "nullable": true,
+                                "typeVariationReference": 0
+                              }]
+                            }, {
+                              "fields": [{
+                                "i64": "2",
+                                "nullable": true,
+                                "typeVariationReference": 0
+                              }]
+                            }]
+                          }
+                        }
+                      },
+                      "groupings": [{
+                        "groupingExpressions": [{
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 0
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }],
+                        "expressionReferences": []
+                      }],
+                      "measures": [{
+                        "measure": {
+                          "functionReference": 0,
+                          "args": [],
+                          "sorts": [],
+                          "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                          "outputType": {
+                            "i64": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_REQUIRED"
+                            }
+                          },
+                          "invocation": "AGGREGATION_INVOCATION_ALL",
+                          "arguments": [],
+                          "options": []
+                        }
+                      }],
+                      "groupingExpressions": []
+                    }
+                  },
+                  "expression": {
+                    "scalarFunction": {
+                      "functionReference": 1,
+                      "args": [],
+                      "outputType": {
+                        "bool": {
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      "arguments": [{
+                        "value": {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 0
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }
+                      }, {
+                        "value": {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 4
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }
+                      }],
+                      "options": []
+                    }
+                  },
+                  "type": "JOIN_TYPE_LEFT"
+                }
+              },
+              "right": {
+                "aggregate": {
+                  "common": {
+                    "direct": {
+                    }
+                  },
+                  "input": {
+                    "read": {
+                      "common": {
+                        "direct": {
+                        }
+                      },
+                      "baseSchema": {
+                        "names": ["id"],
+                        "struct": {
+                          "types": [{
+                            "i64": {
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }],
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "virtualTable": {
+                        "values": [{
+                          "fields": [{
+                            "i64": "1",
+                            "nullable": true,
+                            "typeVariationReference": 0
+                          }]
+                        }, {
+                          "fields": [{
+                            "i64": "2",
+                            "nullable": true,
+                            "typeVariationReference": 0
+                          }]
+                        }]
+                      }
+                    }
+                  },
+                  "groupings": [{
+                    "groupingExpressions": [{
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    }],
+                    "expressionReferences": []
+                  }],
+                  "measures": [{
+                    "measure": {
+                      "functionReference": 0,
+                      "args": [],
+                      "sorts": [],
+                      "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                      "outputType": {
+                        "i64": {
+                          "typeVariationReference": 0,
+                          "nullability": "NULLABILITY_REQUIRED"
+                        }
+                      },
+                      "invocation": "AGGREGATION_INVOCATION_ALL",
+                      "arguments": [],
+                      "options": []
+                    }
+                  }],
+                  "groupingExpressions": []
+                }
+              },
+              "expression": {
+                "scalarFunction": {
+                  "functionReference": 1,
+                  "args": [],
+                  "outputType": {
+                    "bool": {
+                      "typeVariationReference": 0,
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "arguments": [{
+                    "value": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 0
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    }
+                  }, {
+                    "value": {
+                      "selection": {
+                        "directReference": {
+                          "structField": {
+                            "field": 6
+                          }
+                        },
+                        "rootReference": {
+                        }
+                      }
+                    }
+                  }],
+                  "options": []
+                }
+              },
+              "type": "JOIN_TYPE_LEFT"
+            }
+          },
+          "expressions": [{
+            "selection": {
+              "directReference": {
+                "structField": {
+                  "field": 1
+                }
+              },
+              "rootReference": {
+              }
+            }
+          }, {
+            "selection": {
+              "directReference": {
+                "structField": {
+                  "field": 3
+                }
+              },
+              "rootReference": {
+              }
+            }
+          }, {
+            "selection": {
+              "directReference": {
+                "structField": {
+                  "field": 5
+                }
+              },
+              "rootReference": {
+              }
+            }
+          }, {
+            "selection": {
+              "directReference": {
+                "structField": {
+                  "field": 7
+                }
+              },
+              "rootReference": {
+              }
+            }
+          }]
+        }
+      },
+      "names": ["count_first", "category", "count_second", "count_third"]
+    }
+  }],
+  "expectedTypeUrls": [],
+  "version": {
+    "majorNumber": 0,
+    "minorNumber": 52,
+    "patchNumber": 0,
+    "gitHash": ""
+  }
+}
\ No newline at end of file
diff --git a/datafusion/wasmtest/README.md b/datafusion/wasmtest/README.md
index 8843eed697eca..70f4daef91034 100644
--- a/datafusion/wasmtest/README.md
+++ b/datafusion/wasmtest/README.md
@@ -71,8 +71,6 @@ wasm-pack test --headless --chrome
 wasm-pack test --headless --safari
 ```
 
-**Note:** In GitHub Actions we test the compilation with `wasm-build`, but we don't currently invoke `wasm-pack test`. This is because the headless mode is not yet working. Document of adding a GitHub Action job: https://rustwasm.github.io/docs/wasm-bindgen/wasm-bindgen-test/continuous-integration.html#github-actions.
-
 To tweak timeout setting, use `WASM_BINDGEN_TEST_TIMEOUT` environment variable. E.g., `WASM_BINDGEN_TEST_TIMEOUT=300 wasm-pack test --firefox --headless`.
 
 ## Compatibility
diff --git a/datafusion/wasmtest/datafusion-wasm-app/package-lock.json b/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
index 65d8bdbb5e931..c018e779fcbf3 100644
--- a/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
+++ b/datafusion/wasmtest/datafusion-wasm-app/package-lock.json
@@ -2007,10 +2007,11 @@
       }
     },
     "node_modules/http-proxy-middleware": {
-      "version": "2.0.6",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz",
-      "integrity": "sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw==",
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz",
+      "integrity": "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "@types/http-proxy": "^1.17.8",
         "http-proxy": "^1.18.1",
@@ -5562,9 +5563,9 @@
       }
     },
     "http-proxy-middleware": {
-      "version": "2.0.6",
-      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.6.tgz",
-      "integrity": "sha512-ya/UeJ6HVBYxrgYotAZo1KvPWlgB48kUJLDePFeneHsVujFaW5WNj2NgWCAE//B1Dl02BIfYlpNgBy8Kf8Rjmw==",
+      "version": "2.0.9",
+      "resolved": "https://registry.npmjs.org/http-proxy-middleware/-/http-proxy-middleware-2.0.9.tgz",
+      "integrity": "sha512-c1IyJYLYppU574+YI7R4QyX2ystMtVXZwIdzazUIPIJsHuWNd+mho2j+bKoHftndicGj9yh+xjd+l0yj7VeT1Q==",
       "dev": true,
       "requires": {
         "@types/http-proxy": "^1.17.8",
diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs
index 6c7be9056eb43..0a7e546b4b18d 100644
--- a/datafusion/wasmtest/src/lib.rs
+++ b/datafusion/wasmtest/src/lib.rs
@@ -82,7 +82,6 @@ pub fn basic_parse() {
 #[cfg(test)]
 mod test {
     use super::*;
-    use datafusion::execution::options::ParquetReadOptions;
     use datafusion::{
         arrow::{
             array::{ArrayRef, Int32Array, RecordBatch, StringArray},
@@ -98,7 +97,6 @@ mod test {
     };
     use datafusion_physical_plan::collect;
     use datafusion_sql::parser::DFParser;
-    use insta::assert_snapshot;
     use object_store::{memory::InMemory, path::Path, ObjectStore};
     use url::Url;
     use wasm_bindgen_test::wasm_bindgen_test;
@@ -240,22 +238,24 @@ mod test {
 
         let url = Url::parse("memory://").unwrap();
         session_ctx.register_object_store(&url, Arc::new(store));
-
-        let df = session_ctx
-            .read_parquet("memory:///", ParquetReadOptions::new())
+        session_ctx
+            .register_parquet("a", "memory:///a.parquet", Default::default())
             .await
             .unwrap();
 
+        let df = session_ctx.sql("SELECT * FROM a").await.unwrap();
+
         let result = df.collect().await.unwrap();
 
-        assert_snapshot!(batches_to_string(&result), @r"
-        +----+-------+
-        | id | value |
-        +----+-------+
-        | 1  | a     |
-        | 2  | b     |
-        | 3  | c     |
-        +----+-------+
-        ");
+        assert_eq!(
+            batches_to_string(&result),
+            "+----+-------+\n\
+             | id | value |\n\
+             +----+-------+\n\
+             | 1  | a     |\n\
+             | 2  | b     |\n\
+             | 3  | c     |\n\
+             +----+-------+"
+        );
     }
 }
diff --git a/datafusion/wasmtest/webdriver.json b/datafusion/wasmtest/webdriver.json
new file mode 100644
index 0000000000000..f59a2be9955f1
--- /dev/null
+++ b/datafusion/wasmtest/webdriver.json
@@ -0,0 +1,15 @@
+{
+	"moz:firefoxOptions": {
+		"prefs": {
+			"media.navigator.streams.fake": true,
+			"media.navigator.permission.disabled": true
+		},
+		"args": []
+	},
+	"goog:chromeOptions": {
+		"args": [
+			"--use-fake-device-for-media-stream",
+			"--use-fake-ui-for-media-stream"
+		]
+	}
+}
\ No newline at end of file
diff --git a/dev/changelog/47.0.0.md b/dev/changelog/47.0.0.md
new file mode 100644
index 0000000000000..64ca2e157a9e3
--- /dev/null
+++ b/dev/changelog/47.0.0.md
@@ -0,0 +1,506 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 47.0.0 Changelog
+
+This release consists of 364 commits from 94 contributors. See credits at the end of this changelog for more information.
+
+**Breaking changes:**
+
+- chore: cleanup deprecated API since `version <= 40` [#15027](https://github.com/apache/datafusion/pull/15027) (qazxcdswe123)
+- fix: mark ScalarUDFImpl::invoke_batch as deprecated [#15049](https://github.com/apache/datafusion/pull/15049) (Blizzara)
+- feat: support customize metadata in alias for dataframe api [#15120](https://github.com/apache/datafusion/pull/15120) (chenkovsky)
+- Refactor: add `FileGroup` structure for `Vec<PartitionedFile>` [#15379](https://github.com/apache/datafusion/pull/15379) (xudong963)
+- Change default `EXPLAIN` format in `datafusion-cli` to `tree` format [#15427](https://github.com/apache/datafusion/pull/15427) (alamb)
+- Support computing statistics for FileGroup [#15432](https://github.com/apache/datafusion/pull/15432) (xudong963)
+- Remove redundant statistics from FileScanConfig [#14955](https://github.com/apache/datafusion/pull/14955) (Standing-Man)
+- parquet reader: move pruning predicate creation from ParquetSource to ParquetOpener [#15561](https://github.com/apache/datafusion/pull/15561) (adriangb)
+- feat: Add unique id for every memory consumer [#15613](https://github.com/apache/datafusion/pull/15613) (EmilyMatt)
+
+**Performance related:**
+
+- Fix sequential metadata fetching in ListingTable causing high latency [#14918](https://github.com/apache/datafusion/pull/14918) (geoffreyclaude)
+- Implement GroupsAccumulator for min/max Duration [#15322](https://github.com/apache/datafusion/pull/15322) (shruti2522)
+- [Minor] Remove/reorder logical plan rules [#15421](https://github.com/apache/datafusion/pull/15421) (Dandandan)
+- Improve performance of `first_value` by implementing special `GroupsAccumulator` [#15266](https://github.com/apache/datafusion/pull/15266) (UBarney)
+- perf: unwrap cast for comparing ints =/!= strings [#15110](https://github.com/apache/datafusion/pull/15110) (alan910127)
+- Improve performance sort TPCH q3 with Utf8Vew ( Sort-preserving mergi… [#15447](https://github.com/apache/datafusion/pull/15447) (zhuqi-lucas)
+- perf: Reuse row converter during sort [#15302](https://github.com/apache/datafusion/pull/15302) (2010YOUY01)
+- perf: Add TopK benchmarks as variation over the `sort_tpch` benchmarks [#15560](https://github.com/apache/datafusion/pull/15560) (geoffreyclaude)
+- Perf: remove `clone` on `uninitiated_partitions` in SortPreservingMergeStream [#15562](https://github.com/apache/datafusion/pull/15562) (rluvaton)
+- Add short circuit evaluation for `AND` and `OR` [#15462](https://github.com/apache/datafusion/pull/15462) (acking-you)
+- perf: Introduce sort prefix computation for early TopK exit optimization on partially sorted input (10x speedup on top10 bench) [#15563](https://github.com/apache/datafusion/pull/15563) (geoffreyclaude)
+- Improve performance of `last_value` by implementing special `GroupsAccumulator` [#15542](https://github.com/apache/datafusion/pull/15542) (UBarney)
+- Enhance: simplify `x=x` --> `x IS NOT NULL OR NULL` [#15589](https://github.com/apache/datafusion/pull/15589) (ding-young)
+
+**Implemented enhancements:**
+
+- feat: Add `tree` / pretty explain mode [#14677](https://github.com/apache/datafusion/pull/14677) (irenjj)
+- feat: Add `array_max` function support [#14470](https://github.com/apache/datafusion/pull/14470) (erenavsarogullari)
+- feat: implement tree explain for `ProjectionExec` [#15082](https://github.com/apache/datafusion/pull/15082) (Standing-Man)
+- feat: support ApproxDistinct with utf8view [#15200](https://github.com/apache/datafusion/pull/15200) (zhuqi-lucas)
+- feat: Attach `Diagnostic` to more than one column errors in scalar_subquery and in_subquery [#15143](https://github.com/apache/datafusion/pull/15143) (changsun20)
+- feat: topk functionality for aggregates should support utf8view and largeutf8 [#15152](https://github.com/apache/datafusion/pull/15152) (zhuqi-lucas)
+- feat: Native support utf8view for regex string operators [#15275](https://github.com/apache/datafusion/pull/15275) (zhuqi-lucas)
+- feat: introduce `JoinSetTracer` trait for tracing context propagation in spawned tasks [#14547](https://github.com/apache/datafusion/pull/14547) (geoffreyclaude)
+- feat: Support serde for JsonSource PhysicalPlan [#15311](https://github.com/apache/datafusion/pull/15311) (westhide)
+- feat: Support serde for FileScanConfig `batch_size` [#15335](https://github.com/apache/datafusion/pull/15335) (westhide)
+- feat: simplify regex wildcard pattern [#15299](https://github.com/apache/datafusion/pull/15299) (waynexia)
+- feat: Add union_by_name, union_by_name_distinct to DataFrame api [#15489](https://github.com/apache/datafusion/pull/15489) (Omega359)
+- feat: Add config `max_temp_directory_size` to limit max disk usage for spilling queries [#15520](https://github.com/apache/datafusion/pull/15520) (2010YOUY01)
+- feat: Add tracing regression tests [#15673](https://github.com/apache/datafusion/pull/15673) (geoffreyclaude)
+
+**Fixed bugs:**
+
+- fix: External sort failing on an edge case [#15017](https://github.com/apache/datafusion/pull/15017) (2010YOUY01)
+- fix: graceful NULL and type error handling in array functions [#14737](https://github.com/apache/datafusion/pull/14737) (alan910127)
+- fix: Support datatype cast for insert api same as insert into sql [#15091](https://github.com/apache/datafusion/pull/15091) (zhuqi-lucas)
+- fix: unparse for subqueryalias [#15068](https://github.com/apache/datafusion/pull/15068) (chenkovsky)
+- fix: date_trunc bench broken by #15049 [#15169](https://github.com/apache/datafusion/pull/15169) (Blizzara)
+- fix: compound_field_access doesn't identifier qualifier. [#15153](https://github.com/apache/datafusion/pull/15153) (chenkovsky)
+- fix: unparsing left/ right semi/mark join [#15212](https://github.com/apache/datafusion/pull/15212) (chenkovsky)
+- fix: handle duplicate WindowFunction expressions in Substrait consumer [#15211](https://github.com/apache/datafusion/pull/15211) (Blizzara)
+- fix: write hive partitions for any int/uint/float [#15337](https://github.com/apache/datafusion/pull/15337) (christophermcdermott)
+- fix: `core_expressions` feature flag broken, move `overlay` into `core` functions [#15217](https://github.com/apache/datafusion/pull/15217) (shruti2522)
+- fix: Redundant files spilled during external sort + introduce `SpillManager` [#15355](https://github.com/apache/datafusion/pull/15355) (2010YOUY01)
+- fix: typo of DropFunction [#15434](https://github.com/apache/datafusion/pull/15434) (chenkovsky)
+- fix: Unconditionally wrap UNION BY NAME input nodes w/ `Projection` [#15242](https://github.com/apache/datafusion/pull/15242) (rkrishn7)
+- fix: the average time for clickbench query compute should use new vec to make it compute for each query [#15472](https://github.com/apache/datafusion/pull/15472) (zhuqi-lucas)
+- fix: Assertion fail in external sort [#15469](https://github.com/apache/datafusion/pull/15469) (2010YOUY01)
+- fix: aggregation corner case [#15457](https://github.com/apache/datafusion/pull/15457) (chenkovsky)
+- fix: update group by columns for merge phase after spill [#15531](https://github.com/apache/datafusion/pull/15531) (rluvaton)
+- fix: Queries similar to `count-bug` produce incorrect results [#15281](https://github.com/apache/datafusion/pull/15281) (suibianwanwank)
+- fix: ffi aggregation [#15576](https://github.com/apache/datafusion/pull/15576) (chenkovsky)
+- fix: nested window function [#15033](https://github.com/apache/datafusion/pull/15033) (chenkovsky)
+- fix: dictionary encoded column to partition column casting bug [#15652](https://github.com/apache/datafusion/pull/15652) (haruband)
+- fix: recursion protection for physical plan node [#15600](https://github.com/apache/datafusion/pull/15600) (chenkovsky)
+- fix: add map coercion for binary ops [#15551](https://github.com/apache/datafusion/pull/15551) (alexwilcoxson-rel)
+- fix: Rewrite `date_trunc` and `from_unixtime` for the SQLite unparser [#15630](https://github.com/apache/datafusion/pull/15630) (peasee)
+- fix(substrait): fix regressed edge case in renaming inner struct fields [#15634](https://github.com/apache/datafusion/pull/15634) (Blizzara)
+- fix: normalize window ident [#15639](https://github.com/apache/datafusion/pull/15639) (chenkovsky)
+- fix: unparse join without projection [#15693](https://github.com/apache/datafusion/pull/15693) (chenkovsky)
+
+**Documentation updates:**
+
+- MINOR fix(docs): set the proper link for dev-env setup in contrib guide [#14960](https://github.com/apache/datafusion/pull/14960) (clflushopt)
+- Add Upgrade Guide for DataFusion 46.0.0 [#14891](https://github.com/apache/datafusion/pull/14891) (alamb)
+- Improve `SessionStateBuilder::new` documentation [#14980](https://github.com/apache/datafusion/pull/14980) (alamb)
+- Minor: Replace Star and Fork buttons in docs with static versions [#14988](https://github.com/apache/datafusion/pull/14988) (amoeba)
+- Fix documentation warnings and error if anymore occur [#14952](https://github.com/apache/datafusion/pull/14952) (AmosAidoo)
+- docs: Improve docs on AggregateFunctionExpr construction [#15044](https://github.com/apache/datafusion/pull/15044) (ctsk)
+- Minor: More comment to aggregation fuzzer [#15048](https://github.com/apache/datafusion/pull/15048) (2010YOUY01)
+- Improve benchmark documentation [#15054](https://github.com/apache/datafusion/pull/15054) (carols10cents)
+- doc: update RecordBatchReceiverStreamBuilder::spawn_blocking task behaviour [#14995](https://github.com/apache/datafusion/pull/14995) (shruti2522)
+- doc: Correct benchmark command [#15094](https://github.com/apache/datafusion/pull/15094) (qazxcdswe123)
+- Add `insta` / snapshot testing to CLI & set up AWS mock [#13672](https://github.com/apache/datafusion/pull/13672) (blaginin)
+- Config: Add support default sql varchar to view types [#15104](https://github.com/apache/datafusion/pull/15104) (zhuqi-lucas)
+- Support `EXPLAIN ... FORMAT <indent | tree | json | graphviz > ...` [#15166](https://github.com/apache/datafusion/pull/15166) (alamb)
+- Update version to 46.0.1, add CHANGELOG (#15243) [#15244](https://github.com/apache/datafusion/pull/15244) (xudong963)
+- docs: update documentation for Final GroupBy in accumulator.rs [#15279](https://github.com/apache/datafusion/pull/15279) (qazxcdswe123)
+- minor: fix `data/sqlite` link [#15286](https://github.com/apache/datafusion/pull/15286) (sdht0)
+- Add upgrade notes for array signatures [#15237](https://github.com/apache/datafusion/pull/15237) (jkosh44)
+- Add doc for the `statistics_from_parquet_meta_calc method` [#15330](https://github.com/apache/datafusion/pull/15330) (xudong963)
+- added explaination for Schema and DFSchema to documentation [#15329](https://github.com/apache/datafusion/pull/15329) (Jiashu-Hu)
+- Documentation: Plan custom expressions [#15353](https://github.com/apache/datafusion/pull/15353) (Jiashu-Hu)
+- Update concepts-readings-events.md [#15440](https://github.com/apache/datafusion/pull/15440) (berkaysynnada)
+- Add support for DISTINCT + ORDER BY in `ARRAY_AGG` [#14413](https://github.com/apache/datafusion/pull/14413) (gabotechs)
+- Update the copyright year [#15453](https://github.com/apache/datafusion/pull/15453) (omkenge)
+- Docs: Formatting and Added Extra resources [#15450](https://github.com/apache/datafusion/pull/15450) (2SpaceMasterRace)
+- Add documentation for `Run extended tests` command [#15463](https://github.com/apache/datafusion/pull/15463) (alamb)
+- bench: Document how to use cross platform Samply profiler [#15481](https://github.com/apache/datafusion/pull/15481) (comphead)
+- Update user guide to note decimal is not experimental anymore [#15515](https://github.com/apache/datafusion/pull/15515) (Jiashu-Hu)
+- datafusion-cli: document reading partitioned parquet [#15505](https://github.com/apache/datafusion/pull/15505) (marvelshan)
+- Update concepts-readings-events.md [#15541](https://github.com/apache/datafusion/pull/15541) (oznur-synnada)
+- Add documentation example for `AggregateExprBuilder` [#15504](https://github.com/apache/datafusion/pull/15504) (Shreyaskr1409)
+- Docs : Added Sql examples for window Functions : `nth_val` , etc [#15555](https://github.com/apache/datafusion/pull/15555) (Adez017)
+- Add disk usage limit configuration to datafusion-cli [#15586](https://github.com/apache/datafusion/pull/15586) (jsai28)
+- Bug fix : fix the bug in docs in 'cum_dist()' Example [#15618](https://github.com/apache/datafusion/pull/15618) (Adez017)
+- Make tree the Default EXPLAIN Format and Reorder Documentation Sections [#15706](https://github.com/apache/datafusion/pull/15706) (kosiew)
+- Add coerce int96 option for Parquet to support different TimeUnits, test int96_from_spark.parquet from parquet-testing [#15537](https://github.com/apache/datafusion/pull/15537) (mbutrovich)
+- STRING_AGG missing functionality [#14412](https://github.com/apache/datafusion/pull/14412) (gabotechs)
+- doc : update RepartitionExec display tree [#15710](https://github.com/apache/datafusion/pull/15710) (getChan)
+- Update version to 47.0.0, add CHANGELOG [#15731](https://github.com/apache/datafusion/pull/15731) (xudong963)
+
+**Other:**
+
+- Improve documentation for `DataSourceExec`, `FileScanConfig`, `DataSource` etc [#14941](https://github.com/apache/datafusion/pull/14941) (alamb)
+- Do not swap with projection when file is partitioned [#14956](https://github.com/apache/datafusion/pull/14956) (blaginin)
+- Minor: Add more projection pushdown tests, clarify comments [#14963](https://github.com/apache/datafusion/pull/14963) (alamb)
+- Update labeler components [#14942](https://github.com/apache/datafusion/pull/14942) (alamb)
+- Deprecate `Expr::Wildcard` [#14959](https://github.com/apache/datafusion/pull/14959) (linhr)
+- Minor: use FileScanConfig builder API in some tests [#14938](https://github.com/apache/datafusion/pull/14938) (alamb)
+- Minor: improve documentation of `AggregateMode` [#14946](https://github.com/apache/datafusion/pull/14946) (alamb)
+- chore(deps): bump thiserror from 2.0.11 to 2.0.12 [#14971](https://github.com/apache/datafusion/pull/14971) (dependabot[bot])
+- chore(deps): bump pyo3 from 0.23.4 to 0.23.5 [#14972](https://github.com/apache/datafusion/pull/14972) (dependabot[bot])
+- chore(deps): bump async-trait from 0.1.86 to 0.1.87 [#14973](https://github.com/apache/datafusion/pull/14973) (dependabot[bot])
+- Fix verification script and extended tests due to `rustup` changes [#14990](https://github.com/apache/datafusion/pull/14990) (alamb)
+- Split out avro, parquet, json and csv into individual crates [#14951](https://github.com/apache/datafusion/pull/14951) (AdamGS)
+- Minor: Add `backtrace` feature in datafusion-cli [#14997](https://github.com/apache/datafusion/pull/14997) (2010YOUY01)
+- chore: Update `SessionStateBuilder::with_default_features` does not replace existing features [#14935](https://github.com/apache/datafusion/pull/14935) (irenjj)
+- Make `create_ordering` pub and add doc for it [#14996](https://github.com/apache/datafusion/pull/14996) (xudong963)
+- Simplify Between expression to Eq [#14994](https://github.com/apache/datafusion/pull/14994) (jayzhan211)
+- Count wildcard alias [#14927](https://github.com/apache/datafusion/pull/14927) (jayzhan211)
+- replace TypeSignature::String with TypeSignature::Coercible [#14917](https://github.com/apache/datafusion/pull/14917) (zjregee)
+- Minor: Add indentation to EnforceDistribution test plans. [#15007](https://github.com/apache/datafusion/pull/15007) (wiedld)
+- Minor: add method `SessionStateBuilder::new_with_default_features()` [#14998](https://github.com/apache/datafusion/pull/14998) (shruti2522)
+- Implement `tree` explain for FilterExec [#15001](https://github.com/apache/datafusion/pull/15001) (alamb)
+- Unparser add `AtArrow` and `ArrowAt` conversion to BinaryOperator [#14968](https://github.com/apache/datafusion/pull/14968) (cetra3)
+- Add dependency checks to verify-release-candidate script [#15009](https://github.com/apache/datafusion/pull/15009) (waynexia)
+- Fix: to_char Function Now Correctly Handles DATE Values in DataFusion [#14970](https://github.com/apache/datafusion/pull/14970) (kosiew)
+- Make Substrait Schema Structs always non-nullable [#15011](https://github.com/apache/datafusion/pull/15011) (amoeba)
+- Adjust physical optimizer rule order, put `ProjectionPushdown` at last [#15040](https://github.com/apache/datafusion/pull/15040) (xudong963)
+- Move `UnwrapCastInComparison` into `Simplifier` [#15012](https://github.com/apache/datafusion/pull/15012) (jayzhan211)
+- chore(deps): bump aws-config from 1.5.17 to 1.5.18 [#15041](https://github.com/apache/datafusion/pull/15041) (dependabot[bot])
+- chore(deps): bump bytes from 1.10.0 to 1.10.1 [#15042](https://github.com/apache/datafusion/pull/15042) (dependabot[bot])
+- Minor: Deprecate `ScalarValue::raw_data` [#15016](https://github.com/apache/datafusion/pull/15016) (qazxcdswe123)
+- Implement tree explain for `DataSourceExec` [#15029](https://github.com/apache/datafusion/pull/15029) (alamb)
+- Refactor test suite in EnforceDistribution, to use standard test config. [#15010](https://github.com/apache/datafusion/pull/15010) (wiedld)
+- Update ring to v0.17.13 [#15063](https://github.com/apache/datafusion/pull/15063) (alamb)
+- Remove deprecated function `OptimizerRule::try_optimize` [#15051](https://github.com/apache/datafusion/pull/15051) (qazxcdswe123)
+- Minor: fix CI to make the sqllogic testing result consistent [#15059](https://github.com/apache/datafusion/pull/15059) (zhuqi-lucas)
+- Refactor SortPushdown using the standard top-down visitor and using `EquivalenceProperties` [#14821](https://github.com/apache/datafusion/pull/14821) (wiedld)
+- Improve explain tree formatting for longer lines / word wrap [#15031](https://github.com/apache/datafusion/pull/15031) (irenjj)
+- chore(deps): bump sqllogictest from 0.27.2 to 0.28.0 [#15060](https://github.com/apache/datafusion/pull/15060) (dependabot[bot])
+- chore(deps): bump async-compression from 0.4.18 to 0.4.19 [#15061](https://github.com/apache/datafusion/pull/15061) (dependabot[bot])
+- Handle columns in with_new_exprs with a Join [#15055](https://github.com/apache/datafusion/pull/15055) (delamarch3)
+- Minor: Improve documentation of `need_handle_count_bug` [#15050](https://github.com/apache/datafusion/pull/15050) (suibianwanwank)
+- Implement `tree` explain for `HashJoinExec` [#15079](https://github.com/apache/datafusion/pull/15079) (irenjj)
+- Implement tree explain for PartialSortExec [#15066](https://github.com/apache/datafusion/pull/15066) (irenjj)
+- Implement `tree` explain for `SortExec` [#15077](https://github.com/apache/datafusion/pull/15077) (irenjj)
+- Minor: final `46.0.0` release tweaks: changelog + instructions [#15073](https://github.com/apache/datafusion/pull/15073) (alamb)
+- Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `So… [#15081](https://github.com/apache/datafusion/pull/15081) (irenjj)
+- Implement `tree` explain for `BoundedWindowAggExec` and `WindowAggExec` [#15084](https://github.com/apache/datafusion/pull/15084) (irenjj)
+- implement tree rendering for StreamingTableExec [#15085](https://github.com/apache/datafusion/pull/15085) (Standing-Man)
+- chore(deps): bump semver from 1.0.25 to 1.0.26 [#15116](https://github.com/apache/datafusion/pull/15116) (dependabot[bot])
+- chore(deps): bump clap from 4.5.30 to 4.5.31 [#15115](https://github.com/apache/datafusion/pull/15115) (dependabot[bot])
+- implement tree explain for GlobalLimitExec [#15100](https://github.com/apache/datafusion/pull/15100) (zjregee)
+- Minor: Cleanup useless/duplicated code in gen tools [#15113](https://github.com/apache/datafusion/pull/15113) (lewiszlw)
+- Refactor EnforceDistribution test cases to demonstrate dependencies across optimizer runs. [#15074](https://github.com/apache/datafusion/pull/15074) (wiedld)
+- Improve parsing `extra_info` in tree explain [#15125](https://github.com/apache/datafusion/pull/15125) (irenjj)
+- Add tests for simplification and coercion of `SessionContext::create_physical_expr` [#15034](https://github.com/apache/datafusion/pull/15034) (alamb)
+- Minor: Fix invalid query in test [#15131](https://github.com/apache/datafusion/pull/15131) (alamb)
+- Do not display logical_plan win explain `tree` mode 🧹 [#15132](https://github.com/apache/datafusion/pull/15132) (alamb)
+- Substrait support for propagating TableScan.filters to Substrait ReadRel.filter [#14194](https://github.com/apache/datafusion/pull/14194) (jamxia155)
+- Fix wasm32 build on version 46 [#15102](https://github.com/apache/datafusion/pull/15102) (XiangpengHao)
+- Fix broken `serde` feature [#15124](https://github.com/apache/datafusion/pull/15124) (vadimpiven)
+- chore(deps): bump tempfile from 3.17.1 to 3.18.0 [#15146](https://github.com/apache/datafusion/pull/15146) (dependabot[bot])
+- chore(deps): bump syn from 2.0.98 to 2.0.100 [#15147](https://github.com/apache/datafusion/pull/15147) (dependabot[bot])
+- Implement tree explain for AggregateExec [#15103](https://github.com/apache/datafusion/pull/15103) (zebsme)
+- Implement tree explain for `RepartitionExec` and `WorkTableExec` [#15137](https://github.com/apache/datafusion/pull/15137) (Standing-Man)
+- Expand wildcard to actual expressions in `prepare_select_exprs` [#15090](https://github.com/apache/datafusion/pull/15090) (jayzhan211)
+- fixed PushDownFilter bug [15047] [#15142](https://github.com/apache/datafusion/pull/15142) (Jiashu-Hu)
+- Bump `env_logger` from `0.11.6` to `0.11.7` [#15148](https://github.com/apache/datafusion/pull/15148) (mbrobbel)
+- Minor: fix extend sqllogical consistent with main test [#15145](https://github.com/apache/datafusion/pull/15145) (zhuqi-lucas)
+- Implement tree rendering for `SortPreservingMergeExec` [#15140](https://github.com/apache/datafusion/pull/15140) (Standing-Man)
+- Remove expand wildcard rule [#15170](https://github.com/apache/datafusion/pull/15170) (jayzhan211)
+- chore: remove ScalarUDFImpl::return_type_from_exprs [#15130](https://github.com/apache/datafusion/pull/15130) (Blizzara)
+- chore(deps): bump libc from 0.2.170 to 0.2.171 [#15176](https://github.com/apache/datafusion/pull/15176) (dependabot[bot])
+- chore(deps): bump serde_json from 1.0.139 to 1.0.140 [#15175](https://github.com/apache/datafusion/pull/15175) (dependabot[bot])
+- chore(deps): bump substrait from 0.53.2 to 0.54.0 [#15043](https://github.com/apache/datafusion/pull/15043) (dependabot[bot])
+- Minor: split EXPLAIN and ANALYZE planning into different functions [#15188](https://github.com/apache/datafusion/pull/15188) (alamb)
+- Implement `tree` explain for `JsonSink` [#15185](https://github.com/apache/datafusion/pull/15185) (irenjj)
+- Split out `datafusion-substrait` and `datafusion-proto` CI feature checks, increase coverage [#15156](https://github.com/apache/datafusion/pull/15156) (alamb)
+- Remove unused wildcard expanding methods [#15180](https://github.com/apache/datafusion/pull/15180) (goldmedal)
+- #15108 issue: "Non Panic Task error" is not an internal error [#15109](https://github.com/apache/datafusion/pull/15109) (Satyam018)
+- Implement tree explain for LazyMemoryExec [#15187](https://github.com/apache/datafusion/pull/15187) (zebsme)
+- implement tree explain for CoalesceBatchesExec [#15194](https://github.com/apache/datafusion/pull/15194) (Standing-Man)
+- Implement `tree` explain for `CsvSink` [#15204](https://github.com/apache/datafusion/pull/15204) (irenjj)
+- chore(deps): bump blake3 from 1.6.0 to 1.6.1 [#15198](https://github.com/apache/datafusion/pull/15198) (dependabot[bot])
+- chore(deps): bump clap from 4.5.31 to 4.5.32 [#15199](https://github.com/apache/datafusion/pull/15199) (dependabot[bot])
+- chore(deps): bump serde from 1.0.218 to 1.0.219 [#15197](https://github.com/apache/datafusion/pull/15197) (dependabot[bot])
+- Fix datafusion proto crate `json` feature [#15172](https://github.com/apache/datafusion/pull/15172) (Owen-CH-Leung)
+- Add blog link to `EquivalenceProperties` docs [#15215](https://github.com/apache/datafusion/pull/15215) (alamb)
+- Minor: split datafusion-cli testing into its own CI job [#15075](https://github.com/apache/datafusion/pull/15075) (alamb)
+- Implement tree explain for InterleaveExec [#15219](https://github.com/apache/datafusion/pull/15219) (zebsme)
+- Move catalog_common out of core [#15193](https://github.com/apache/datafusion/pull/15193) (logan-keede)
+- chore(deps): bump tokio-util from 0.7.13 to 0.7.14 [#15223](https://github.com/apache/datafusion/pull/15223) (dependabot[bot])
+- chore(deps): bump aws-config from 1.5.18 to 1.6.0 [#15222](https://github.com/apache/datafusion/pull/15222) (dependabot[bot])
+- chore(deps): bump bzip2 from 0.5.1 to 0.5.2 [#15221](https://github.com/apache/datafusion/pull/15221) (dependabot[bot])
+- Document guidelines for physical operator yielding [#15030](https://github.com/apache/datafusion/pull/15030) (carols10cents)
+- Implement `tree` explain for `ArrowFileSink`, fix original URL [#15206](https://github.com/apache/datafusion/pull/15206) (irenjj)
+- Implement tree explain for `LocalLimitExec` [#15232](https://github.com/apache/datafusion/pull/15232) (shruti2522)
+- Use insta for `DataFrame` tests [#15165](https://github.com/apache/datafusion/pull/15165) (blaginin)
+- Re-enable github discussion [#15241](https://github.com/apache/datafusion/pull/15241) (2010YOUY01)
+- Minor: exclude datafusion-cli testing for mac [#15240](https://github.com/apache/datafusion/pull/15240) (zhuqi-lucas)
+- Implement tree explain for CoalescePartitionsExec [#15225](https://github.com/apache/datafusion/pull/15225) (Shreyaskr1409)
+- Enable `used_underscore_binding` clippy lint [#15189](https://github.com/apache/datafusion/pull/15189) (Shreyaskr1409)
+- Simpler to see expressions in explain `tree` mode [#15163](https://github.com/apache/datafusion/pull/15163) (irenjj)
+- Fix invalid schema for unions in ViewTables [#15135](https://github.com/apache/datafusion/pull/15135) (Friede80)
+- Make `ListingTableUrl::try_new` public [#15250](https://github.com/apache/datafusion/pull/15250) (linhr)
+- Fix wildcard dataframe case [#15230](https://github.com/apache/datafusion/pull/15230) (jayzhan211)
+- Simplify the printing of all plans containing `expr` in `tree` mode [#15249](https://github.com/apache/datafusion/pull/15249) (irenjj)
+- Support utf8view datatype for window [#15257](https://github.com/apache/datafusion/pull/15257) (zhuqi-lucas)
+- chore: remove deprecated variants of UDF's invoke (invoke, invoke_no_args, invoke_batch) [#15123](https://github.com/apache/datafusion/pull/15123) (Blizzara)
+- Improve feature flag CI coverage `datafusion` and `datafusion-functions` [#15203](https://github.com/apache/datafusion/pull/15203) (alamb)
+- Add debug logging for default catalog overwrite in SessionState build [#15251](https://github.com/apache/datafusion/pull/15251) (byte-sourcerer)
+- Implement tree explain for PlaceholderRowExec [#15270](https://github.com/apache/datafusion/pull/15270) (zebsme)
+- Implement tree explain for UnionExec [#15278](https://github.com/apache/datafusion/pull/15278) (zebsme)
+- Migrate dataframe tests to `insta` [#15262](https://github.com/apache/datafusion/pull/15262) (jsai28)
+- Minor: consistently apply `clippy::clone_on_ref_ptr` in all crates [#15284](https://github.com/apache/datafusion/pull/15284) (alamb)
+- chore(deps): bump async-trait from 0.1.87 to 0.1.88 [#15294](https://github.com/apache/datafusion/pull/15294) (dependabot[bot])
+- chore(deps): bump uuid from 1.15.1 to 1.16.0 [#15292](https://github.com/apache/datafusion/pull/15292) (dependabot[bot])
+- Add CatalogProvider and SchemaProvider to FFI Crate [#15280](https://github.com/apache/datafusion/pull/15280) (timsaucer)
+- Refactor file schema type coercions [#15268](https://github.com/apache/datafusion/pull/15268) (xudong963)
+- chore(deps): bump rust_decimal from 1.36.0 to 1.37.0 [#15293](https://github.com/apache/datafusion/pull/15293) (dependabot[bot])
+- chore: Attach Diagnostic to "incompatible type in unary expression" error [#15209](https://github.com/apache/datafusion/pull/15209) (onlyjackfrost)
+- Support logic optimize rule to pass the case that Utf8view datatype combined with Utf8 datatype [#15239](https://github.com/apache/datafusion/pull/15239) (zhuqi-lucas)
+- Migrate user_defined tests to insta [#15255](https://github.com/apache/datafusion/pull/15255) (shruti2522)
+- Remove inline table scan analyzer rule [#15201](https://github.com/apache/datafusion/pull/15201) (jayzhan211)
+- CI Red: Fix union in view table test [#15300](https://github.com/apache/datafusion/pull/15300) (jayzhan211)
+- refactor: Move view and stream from `datasource` to `catalog`, deprecate `View::try_new` [#15260](https://github.com/apache/datafusion/pull/15260) (logan-keede)
+- chore(deps): bump substrait from 0.54.0 to 0.55.0 [#15305](https://github.com/apache/datafusion/pull/15305) (dependabot[bot])
+- chore(deps): bump half from 2.4.1 to 2.5.0 [#15303](https://github.com/apache/datafusion/pull/15303) (dependabot[bot])
+- chore(deps): bump mimalloc from 0.1.43 to 0.1.44 [#15304](https://github.com/apache/datafusion/pull/15304) (dependabot[bot])
+- Fix predicate pushdown for custom SchemaAdapters [#15263](https://github.com/apache/datafusion/pull/15263) (adriangb)
+- Fix extended tests by restore datafusion-testing submodule [#15318](https://github.com/apache/datafusion/pull/15318) (alamb)
+- Support Duration in min/max agg functions [#15310](https://github.com/apache/datafusion/pull/15310) (svranesevic)
+- Migrate tests to insta [#15288](https://github.com/apache/datafusion/pull/15288) (jsai28)
+- chore(deps): bump quote from 1.0.38 to 1.0.40 [#15332](https://github.com/apache/datafusion/pull/15332) (dependabot[bot])
+- chore(deps): bump blake3 from 1.6.1 to 1.7.0 [#15331](https://github.com/apache/datafusion/pull/15331) (dependabot[bot])
+- Simplify display format of `AggregateFunctionExpr`, add `Expr::sql_name` [#15253](https://github.com/apache/datafusion/pull/15253) (irenjj)
+- chore(deps): bump indexmap from 2.7.1 to 2.8.0 [#15333](https://github.com/apache/datafusion/pull/15333) (dependabot[bot])
+- chore(deps): bump tokio from 1.43.0 to 1.44.1 [#15347](https://github.com/apache/datafusion/pull/15347) (dependabot[bot])
+- chore(deps): bump tempfile from 3.18.0 to 3.19.1 [#15346](https://github.com/apache/datafusion/pull/15346) (dependabot[bot])
+- Minor: Keep debug symbols for `release-nonlto` build [#15350](https://github.com/apache/datafusion/pull/15350) (2010YOUY01)
+- Use `any` instead of `for_each` [#15289](https://github.com/apache/datafusion/pull/15289) (xudong963)
+- refactor: move `CteWorkTable`, `default_table_source` a bunch of files out of core [#15316](https://github.com/apache/datafusion/pull/15316) (logan-keede)
+- Fix empty aggregation function count() in Substrait [#15345](https://github.com/apache/datafusion/pull/15345) (gabotechs)
+- Improved error for expand wildcard rule [#15287](https://github.com/apache/datafusion/pull/15287) (Jiashu-Hu)
+- Added tests with are writing into parquet files in memory for issue #… [#15325](https://github.com/apache/datafusion/pull/15325) (pranavJibhakate)
+- Migrate physical plan tests to `insta` (Part-1) [#15313](https://github.com/apache/datafusion/pull/15313) (Shreyaskr1409)
+- Fix array_has_all and array_has_any with empty array [#15039](https://github.com/apache/datafusion/pull/15039) (LuQQiu)
+- Update datafusion-testing pin to fix extended tests [#15368](https://github.com/apache/datafusion/pull/15368) (alamb)
+- chore(deps): Update sqlparser to 0.55.0 [#15183](https://github.com/apache/datafusion/pull/15183) (PokIsemaine)
+- Only unnest source for `EmptyRelation` [#15159](https://github.com/apache/datafusion/pull/15159) (blaginin)
+- chore(deps): bump rust_decimal from 1.37.0 to 1.37.1 [#15378](https://github.com/apache/datafusion/pull/15378) (dependabot[bot])
+- chore(deps): bump chrono-tz from 0.10.1 to 0.10.2 [#15377](https://github.com/apache/datafusion/pull/15377) (dependabot[bot])
+- remove the duplicate test for unparser [#15385](https://github.com/apache/datafusion/pull/15385) (goldmedal)
+- Minor: add average time for clickbench benchmark query [#15381](https://github.com/apache/datafusion/pull/15381) (zhuqi-lucas)
+- include some BinaryOperator from sqlparser [#15327](https://github.com/apache/datafusion/pull/15327) (waynexia)
+- Add "end to end parquet reading test" for WASM [#15362](https://github.com/apache/datafusion/pull/15362) (jsai28)
+- Migrate physical plan tests to `insta` (Part-2) [#15364](https://github.com/apache/datafusion/pull/15364) (Shreyaskr1409)
+- Migrate physical plan tests to `insta` (Part-3 / Final) [#15399](https://github.com/apache/datafusion/pull/15399) (Shreyaskr1409)
+- Restore lazy evaluation of fallible CASE [#15390](https://github.com/apache/datafusion/pull/15390) (findepi)
+- chore(deps): bump log from 0.4.26 to 0.4.27 [#15410](https://github.com/apache/datafusion/pull/15410) (dependabot[bot])
+- chore(deps): bump chrono-tz from 0.10.2 to 0.10.3 [#15412](https://github.com/apache/datafusion/pull/15412) (dependabot[bot])
+- Perf: Support Utf8View datatype single column comparisons for SortPreservingMergeStream [#15348](https://github.com/apache/datafusion/pull/15348) (zhuqi-lucas)
+- Enforce JOIN plan to require condition [#15334](https://github.com/apache/datafusion/pull/15334) (goldmedal)
+- Fix type coercion for unsigned and signed integers (`Int64` vs `UInt64`, etc) [#15341](https://github.com/apache/datafusion/pull/15341) (Omega359)
+- simplify `array_has` UDF to `InList` expr when haystack is constant [#15354](https://github.com/apache/datafusion/pull/15354) (davidhewitt)
+- Move `DataSink` to `datasource` and add session crate [#15371](https://github.com/apache/datafusion/pull/15371) (jayzhan-synnada)
+- refactor: SpillManager into a separate file [#15407](https://github.com/apache/datafusion/pull/15407) (Weijun-H)
+- Always use `PartitionMode::Auto` in planner [#15339](https://github.com/apache/datafusion/pull/15339) (Dandandan)
+- Fix link to Volcano paper [#15437](https://github.com/apache/datafusion/pull/15437) (JackKelly)
+- minor: Add new crates to labeler [#15426](https://github.com/apache/datafusion/pull/15426) (logan-keede)
+- refactor: Use SpillManager for all spilling scenarios [#15405](https://github.com/apache/datafusion/pull/15405) (2010YOUY01)
+- refactor(hash_join): Move JoinHashMap to separate mod [#15419](https://github.com/apache/datafusion/pull/15419) (ctsk)
+- Migrate datasource tests to insta [#15258](https://github.com/apache/datafusion/pull/15258) (shruti2522)
+- Add `downcast_to_source` method for `DataSourceExec` [#15416](https://github.com/apache/datafusion/pull/15416) (xudong963)
+- refactor: use TypeSignature::Coercible for crypto functions [#14826](https://github.com/apache/datafusion/pull/14826) (Chen-Yuan-Lai)
+- Minor: fix doc for `FileGroupPartitioner` [#15448](https://github.com/apache/datafusion/pull/15448) (xudong963)
+- chore(deps): bump clap from 4.5.32 to 4.5.34 [#15452](https://github.com/apache/datafusion/pull/15452) (dependabot[bot])
+- Fix roundtrip bug with empty projection in DataSourceExec [#15449](https://github.com/apache/datafusion/pull/15449) (XiangpengHao)
+- Triggering extended tests through PR comment: `Run extended tests` [#15101](https://github.com/apache/datafusion/pull/15101) (danila-b)
+- Use `equals_datatype` to compare type when type coercion [#15366](https://github.com/apache/datafusion/pull/15366) (goldmedal)
+- Fix no effect metrics bug in ParquetSource [#15460](https://github.com/apache/datafusion/pull/15460) (XiangpengHao)
+- chore(deps): bump aws-config from 1.6.0 to 1.6.1 [#15470](https://github.com/apache/datafusion/pull/15470) (dependabot[bot])
+- minor: Allow to run TPCH bench for a specific query [#15467](https://github.com/apache/datafusion/pull/15467) (comphead)
+- Migrate subtraits tests to insta, part1 [#15444](https://github.com/apache/datafusion/pull/15444) (qstommyshu)
+- Add `FileScanConfigBuilder` [#15352](https://github.com/apache/datafusion/pull/15352) (blaginin)
+- Update ClickBench queries to avoid to_timestamp_seconds [#15475](https://github.com/apache/datafusion/pull/15475) (acking-you)
+- Remove CoalescePartitions insertion from HashJoinExec [#15476](https://github.com/apache/datafusion/pull/15476) (ctsk)
+- Migrate-substrait-tests-to-insta, part2 [#15480](https://github.com/apache/datafusion/pull/15480) (qstommyshu)
+- Revert #15476 to fix the datafusion-examples CI fail [#15496](https://github.com/apache/datafusion/pull/15496) (goldmedal)
+- Migrate datafusion/sql tests to insta, part1 [#15497](https://github.com/apache/datafusion/pull/15497) (qstommyshu)
+- Allow type coersion of zero input arrays to nullary [#15487](https://github.com/apache/datafusion/pull/15487) (timsaucer)
+- Decimal type support for `to_timestamp` [#15486](https://github.com/apache/datafusion/pull/15486) (jatin510)
+- refactor: Move `Memtable` to catalog [#15459](https://github.com/apache/datafusion/pull/15459) (logan-keede)
+- Migrate optimizer tests to insta [#15446](https://github.com/apache/datafusion/pull/15446) (qstommyshu)
+- FIX : some benchmarks are failing [#15367](https://github.com/apache/datafusion/pull/15367) (getChan)
+- Add query to extended clickbench suite for "complex filter" [#15500](https://github.com/apache/datafusion/pull/15500) (acking-you)
+- Extract tokio runtime creation from hot loop in benchmarks [#15508](https://github.com/apache/datafusion/pull/15508) (Omega359)
+- chore(deps): bump blake3 from 1.7.0 to 1.8.0 [#15502](https://github.com/apache/datafusion/pull/15502) (dependabot[bot])
+- Minor: clone and debug for FileSinkConfig [#15516](https://github.com/apache/datafusion/pull/15516) (jayzhan211)
+- use state machine to refactor the `get_files_with_limit` method [#15521](https://github.com/apache/datafusion/pull/15521) (xudong963)
+- Migrate `datafusion/sql` tests to insta, part2 [#15499](https://github.com/apache/datafusion/pull/15499) (qstommyshu)
+- Disable sccache action to fix gh cache issue [#15536](https://github.com/apache/datafusion/pull/15536) (Omega359)
+- refactor: Cleanup unused `fetch` field inside `ExternalSorter` [#15525](https://github.com/apache/datafusion/pull/15525) (2010YOUY01)
+- Fix duplicate unqualified Field name (schema error) on join queries [#15438](https://github.com/apache/datafusion/pull/15438) (LiaCastaneda)
+- Add utf8view benchmark for aggregate topk [#15518](https://github.com/apache/datafusion/pull/15518) (zhuqi-lucas)
+- ArraySort: support structs [#15527](https://github.com/apache/datafusion/pull/15527) (cht42)
+- Migrate datafusion/sql tests to insta, part3 [#15533](https://github.com/apache/datafusion/pull/15533) (qstommyshu)
+- Migrate datafusion/sql tests to insta, part4 [#15548](https://github.com/apache/datafusion/pull/15548) (qstommyshu)
+- Add topk information into tree explain plans [#15547](https://github.com/apache/datafusion/pull/15547) (kumarlokesh)
+- Minor: add Arc for statistics in FileGroup [#15564](https://github.com/apache/datafusion/pull/15564) (xudong963)
+- Test: configuration fuzzer for (external) sort queries [#15501](https://github.com/apache/datafusion/pull/15501) (2010YOUY01)
+- minor: Organize fields inside SortMergeJoinStream [#15557](https://github.com/apache/datafusion/pull/15557) (suibianwanwank)
+- Minor: rm session downcast [#15575](https://github.com/apache/datafusion/pull/15575) (jayzhan211)
+- Migrate datafusion/sql tests to insta, part5 [#15567](https://github.com/apache/datafusion/pull/15567) (qstommyshu)
+- Add SQL logic tests for compound field access in JOIN conditions [#15556](https://github.com/apache/datafusion/pull/15556) (kosiew)
+- Run audit CI check on all pushes to main [#15572](https://github.com/apache/datafusion/pull/15572) (alamb)
+- Introduce load-balanced `split_groups_by_statistics` method [#15473](https://github.com/apache/datafusion/pull/15473) (xudong963)
+- chore: update clickbench [#15574](https://github.com/apache/datafusion/pull/15574) (chenkovsky)
+- Improve spill performance: Disable re-validation of spilled files [#15454](https://github.com/apache/datafusion/pull/15454) (zebsme)
+- chore: rm duplicated `JoinOn` type [#15590](https://github.com/apache/datafusion/pull/15590) (jayzhan211)
+- Chore: Call arrow's methods `row_count` and `skipped_row_count` [#15587](https://github.com/apache/datafusion/pull/15587) (jayzhan211)
+- Actually run wasm test in ci [#15595](https://github.com/apache/datafusion/pull/15595) (XiangpengHao)
+- Migrate datafusion/sql tests to insta, part6 [#15578](https://github.com/apache/datafusion/pull/15578) (qstommyshu)
+- Add test case for new casting feature from date to tz-aware timestamps [#15609](https://github.com/apache/datafusion/pull/15609) (friendlymatthew)
+- Remove CoalescePartitions insertion from Joins [#15570](https://github.com/apache/datafusion/pull/15570) (ctsk)
+- fix doc and broken api [#15602](https://github.com/apache/datafusion/pull/15602) (logan-keede)
+- Migrate datafusion/sql tests to insta, part7 [#15621](https://github.com/apache/datafusion/pull/15621) (qstommyshu)
+- ignore security_audit CI check proc-macro-error warning [#15626](https://github.com/apache/datafusion/pull/15626) (Jiashu-Hu)
+- chore(deps): bump tokio from 1.44.1 to 1.44.2 [#15627](https://github.com/apache/datafusion/pull/15627) (dependabot[bot])
+- Upgrade toolchain to Rust-1.86 [#15625](https://github.com/apache/datafusion/pull/15625) (jsai28)
+- chore(deps): bump bigdecimal from 0.4.7 to 0.4.8 [#15523](https://github.com/apache/datafusion/pull/15523) (dependabot[bot])
+- chore(deps): bump the arrow-parquet group across 1 directory with 7 updates [#15593](https://github.com/apache/datafusion/pull/15593) (dependabot[bot])
+- chore: improve RepartitionExec display tree [#15606](https://github.com/apache/datafusion/pull/15606) (getChan)
+- Move back schema not matching check and workaround [#15580](https://github.com/apache/datafusion/pull/15580) (LiaCastaneda)
+- Minor: refine comments for statistics compution [#15647](https://github.com/apache/datafusion/pull/15647) (xudong963)
+- Remove uneeded binary_op benchmarks [#15632](https://github.com/apache/datafusion/pull/15632) (alamb)
+- chore(deps): bump blake3 from 1.8.0 to 1.8.1 [#15650](https://github.com/apache/datafusion/pull/15650) (dependabot[bot])
+- chore(deps): bump mimalloc from 0.1.44 to 0.1.46 [#15651](https://github.com/apache/datafusion/pull/15651) (dependabot[bot])
+- chore: avoid erroneuous warning for FFI table operation (only not default value) [#15579](https://github.com/apache/datafusion/pull/15579) (chenkovsky)
+- Update datafusion-testing pin (to fix extended test on main) [#15655](https://github.com/apache/datafusion/pull/15655) (alamb)
+- Ignore false positive only_used_in_recursion Clippy warning [#15635](https://github.com/apache/datafusion/pull/15635) (DerGut)
+- chore: Rename protobuf Java package [#15658](https://github.com/apache/datafusion/pull/15658) (andygrove)
+- Remove redundant `Precision` combination code in favor of `Precision::min/max/add` [#15659](https://github.com/apache/datafusion/pull/15659) (alamb)
+- Introduce DynamicFilterSource and DynamicPhysicalExpr [#15568](https://github.com/apache/datafusion/pull/15568) (adriangb)
+- Public some projected methods in `FileScanConfig` [#15671](https://github.com/apache/datafusion/pull/15671) (xudong963)
+- fix decimal precision issue in simplify expression optimize rule [#15588](https://github.com/apache/datafusion/pull/15588) (jayzhan211)
+- Implement Future for SpawnedTask. [#15653](https://github.com/apache/datafusion/pull/15653) (ashdnazg)
+- chore(deps): bump crossbeam-channel from 0.5.14 to 0.5.15 [#15674](https://github.com/apache/datafusion/pull/15674) (dependabot[bot])
+- chore(deps): bump clap from 4.5.34 to 4.5.35 [#15668](https://github.com/apache/datafusion/pull/15668) (dependabot[bot])
+- [Minor] Use interleave_record_batch in TopK implementation [#15677](https://github.com/apache/datafusion/pull/15677) (Dandandan)
+- Consolidate statistics merging code (try 2) [#15661](https://github.com/apache/datafusion/pull/15661) (alamb)
+- Add Table Functions to FFI Crate [#15581](https://github.com/apache/datafusion/pull/15581) (timsaucer)
+- Remove waits from blocking threads reading spill files. [#15654](https://github.com/apache/datafusion/pull/15654) (ashdnazg)
+- chore(deps): bump sysinfo from 0.33.1 to 0.34.2 [#15682](https://github.com/apache/datafusion/pull/15682) (dependabot[bot])
+- Minor: add order by arg for last value [#15695](https://github.com/apache/datafusion/pull/15695) (jayzhan211)
+- Upgrade to arrow/parquet 55, and `object_store` to `0.12.0` and pyo3 to `0.24.0` [#15466](https://github.com/apache/datafusion/pull/15466) (alamb)
+- tests: only refresh the minimum sysinfo in mem limit tests. [#15702](https://github.com/apache/datafusion/pull/15702) (ashdnazg)
+- ci: fix workflow triggering extended tests from pr comments. [#15704](https://github.com/apache/datafusion/pull/15704) (ashdnazg)
+- chore(deps): bump flate2 from 1.1.0 to 1.1.1 [#15703](https://github.com/apache/datafusion/pull/15703) (dependabot[bot])
+- Fix internal error in sort when hitting memory limit [#15692](https://github.com/apache/datafusion/pull/15692) (DerGut)
+- Update checked in Cargo.lock file to get clean CI [#15725](https://github.com/apache/datafusion/pull/15725) (alamb)
+- chore(deps): bump indexmap from 2.8.0 to 2.9.0 [#15732](https://github.com/apache/datafusion/pull/15732) (dependabot[bot])
+- Minor: include output partition count of `RepartitionExec` to tree explain [#15717](https://github.com/apache/datafusion/pull/15717) (2010YOUY01)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    48	dependabot[bot]
+    34	Andrew Lamb
+    16	xudong.w
+    15	Jay Zhan
+    15	Qi Zhu
+    15	irenjj
+    13	Chen Chongchen
+    13	Yongting You
+    10	Tommy shu
+     7	Shruti Sharma
+     6	Alan Tang
+     6	Arttu
+     6	Jiashu Hu
+     6	Shreyas (Lua)
+     6	logan-keede
+     6	zeb
+     5	Dmitrii Blaginin
+     5	Geoffrey Claude
+     5	Jax Liu
+     5	YuNing Chen
+     4	Bruce Ritchie
+     4	Christian
+     4	Eshed Schacham
+     4	Xiangpeng Hao
+     4	wiedld
+     3	Adrian Garcia Badaracco
+     3	Daniël Heres
+     3	Gabriel
+     3	LB7666
+     3	Namgung Chan
+     3	Ruihang Xia
+     3	Tim Saucer
+     3	jsai28
+     3	kosiew
+     3	suibianwanwan
+     2	Bryce Mecum
+     2	Carol (Nichols || Goulding)
+     2	Heran Lin
+     2	Jannik Steinmann
+     2	Jyotir Sai
+     2	Li-Lun Lin
+     2	Lía Adriana
+     2	Oleks V
+     2	Raz Luvaton
+     2	UBarney
+     2	aditya singh rathore
+     2	westhide
+     2	zjregee
+     1	@clflushopt
+     1	Adam Gutglick
+     1	Alex Huang
+     1	Alex Wilcoxson
+     1	Amos Aidoo
+     1	Andy Grove
+     1	Andy Yen
+     1	Berkay Şahin
+     1	Chang
+     1	Danila Baklazhenko
+     1	David Hewitt
+     1	Emily Matheys
+     1	Eren Avsarogullari
+     1	Hari Varsha
+     1	Ian Lai
+     1	Jack Kelly
+     1	Jagdish Parihar
+     1	Joseph Koshakow
+     1	Lokesh
+     1	LuQQiu
+     1	Matt Butrovich
+     1	Matt Friede
+     1	Matthew Kim
+     1	Matthijs Brobbel
+     1	Om Kenge
+     1	Owen Leung
+     1	Peter L
+     1	Piotr Findeisen
+     1	Rohan Krishnaswamy
+     1	Satyam018
+     1	Sava Vranešević
+     1	Siddhartha Sahu
+     1	Sile Zhou
+     1	Vadim Piven
+     1	Zaki
+     1	christophermcdermott
+     1	cht42
+     1	cjw
+     1	delamarch3
+     1	ding-young
+     1	haruband
+     1	jamxia155
+     1	oznur-synnada
+     1	peasee
+     1	pranavJibhakate
+     1	张林伟
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/dev/update_runtime_config_docs.sh b/dev/update_runtime_config_docs.sh
new file mode 100755
index 0000000000000..0d9d0f1033236
--- /dev/null
+++ b/dev/update_runtime_config_docs.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "${SOURCE_DIR}/../" && pwd
+
+TARGET_FILE="docs/source/user-guide/runtime_configs.md"
+PRINT_CONFIG_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_runtime_config_docs"
+
+echo "Inserting header"
+cat <<'EOF' > "$TARGET_FILE"
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!---
+This file was generated by the dev/update_runtime_config_docs.sh script.
+Do not edit it manually as changes will be overwritten.
+Instead, edit dev/update_runtime_config_docs.sh or the docstrings in datafusion/execution/src/runtime_env.rs.
+-->
+
+# Runtime Environment Configurations
+
+DataFusion runtime configurations can be set via SQL using the `SET` command.
+
+For example, to configure `datafusion.runtime.memory_limit`:
+
+```sql
+SET datafusion.runtime.memory_limit = '2G';
+```
+
+The following runtime configuration settings are available:
+
+EOF
+
+echo "Running CLI and inserting runtime config docs table"
+$PRINT_CONFIG_DOCS_COMMAND >> "$TARGET_FILE"
+
+echo "Running prettier"
+npx prettier@2.3.2 --write "$TARGET_FILE"
+
+echo "'$TARGET_FILE' successfully updated!"
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0dc947fdea579..e920a0f036cbe 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -116,6 +116,7 @@ To get started, see
    user-guide/expressions
    user-guide/sql/index
    user-guide/configs
+   user-guide/runtime_configs
    user-guide/explain-usage
    user-guide/faq
 
diff --git a/docs/source/library-user-guide/profiling.md b/docs/source/library-user-guide/profiling.md
index 40fae6f447056..61e848a2b7d9b 100644
--- a/docs/source/library-user-guide/profiling.md
+++ b/docs/source/library-user-guide/profiling.md
@@ -21,7 +21,7 @@
 
 The section contains examples how to perform CPU profiling for Apache DataFusion on different operating systems.
 
-## Building a flamegraph
+## Building a flame graph
 
 [Video: how to CPU profile DataFusion with a Flamegraph](https://youtu.be/2z11xtYw_xs)
 
@@ -82,6 +82,43 @@ CARGO_PROFILE_RELEASE_DEBUG=true cargo flamegraph --root --bench sql_planner --
 
 [Video: how to CPU profile DataFusion with XCode Instruments](https://youtu.be/P3dXH61Kr5U)
 
-## Linux
+## Profiling using Samply cross platform profiler
 
-## Windows
+There is an opportunity to build flamegraphs, call trees and stack charts on any platform using
+[Samply](https://github.com/mstange/samply)
+
+Install Samply profiler
+
+```shell
+cargo install --locked samply
+```
+
+More Samply [installation options](https://github.com/mstange/samply?tab=readme-ov-file#installation)
+
+Run the profiler
+
+```shell
+samply record --profile profiling ./my-application my-arguments
+```
+
+### Profile the benchmark
+
+[Set up benchmarks](https://github.com/apache/datafusion/blob/main/benchmarks/README.md#running-the-benchmarks) if not yet done
+
+Example: Profile Q22 query from TPC-H benchmark.
+Note: `--profile profiling` to profile release optimized artifact with debug symbols
+
+```shell
+cargo build --profile profiling --bin tpch
+samply record ./target/profiling/tpch benchmark datafusion --iterations 5 --path datafusion/benchmarks/data/tpch_sf10 --prefer_hash_join true --format parquet -o datafusion/benchmarks/results/dev2/tpch_sf10.json --query 22
+```
+
+After sampling has completed the Samply starts a local server and navigates to the profiler
+
+```shell
+Local server listening at http://127.0.0.1:3000
+```
+
+![img.png](samply_profiler.png)
+
+Note: The Firefox profiler cannot be opened in Safari, please use Chrome or Firefox instead
diff --git a/docs/source/library-user-guide/samply_profiler.png b/docs/source/library-user-guide/samply_profiler.png
new file mode 100644
index 0000000000000..08b9907458568
Binary files /dev/null and b/docs/source/library-user-guide/samply_profiler.png differ
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 11fd495665225..db5078d603f2f 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -19,6 +19,122 @@
 
 # Upgrade Guides
 
+## DataFusion `47.0.0`
+
+This section calls out some of the major changes in the `47.0.0` release of DataFusion.
+
+Here are some example upgrade PRs that demonstrate changes required when upgrading from DataFusion 46.0.0:
+
+- [delta-rs Upgrade to `47.0.0`](https://github.com/delta-io/delta-rs/pull/3378)
+- [DataFusion Comet Upgrade to `47.0.0`](https://github.com/apache/datafusion-comet/pull/1563)
+- [Sail Upgrade to `47.0.0`](https://github.com/lakehq/sail/pull/434)
+
+### Upgrades to `arrow-rs` and `arrow-parquet` 55.0.0 and `object_store` 0.12.0
+
+Several APIs are changed in the underlying arrow and parquet libraries to use a
+`u64` instead of `usize` to better support WASM (See [#7371] and [#6961])
+
+Additionally `ObjectStore::list` and `ObjectStore::list_with_offset` have been changed to return `static` lifetimes (See [#6619])
+
+[#6619]: https://github.com/apache/arrow-rs/pull/6619
+[#7371]: https://github.com/apache/arrow-rs/pull/7371
+[#7328]: https://github.com/apache/arrow-rs/pull/6961
+
+This requires converting from `usize` to `u64` occasionally as well as changes to `ObjectStore` implementations such as
+
+```rust
+# /* comment to avoid running
+impl Objectstore {
+    ...
+    // The range is now a u64 instead of usize
+    async fn get_range(&self, location: &Path, range: Range<u64>) -> ObjectStoreResult<Bytes> {
+        self.inner.get_range(location, range).await
+    }
+    ...
+    // the lifetime is now 'static instead of `_ (meaning the captured closure can't contain references)
+    // (this also applies to list_with_offset)
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, ObjectStoreResult<ObjectMeta>> {
+        self.inner.list(prefix)
+    }
+}
+# */
+```
+
+The `ParquetObjectReader` has been updated to no longer require the object size
+(it can be fetched using a single suffix request). See [#7334] for details
+
+[#7334]: https://github.com/apache/arrow-rs/pull/7334
+
+Pattern in DataFusion `46.0.0`:
+
+```rust
+# /* comment to avoid running
+let meta: ObjectMeta = ...;
+let reader = ParquetObjectReader::new(store, meta);
+# */
+```
+
+Pattern in DataFusion `47.0.0`:
+
+```rust
+# /* comment to avoid running
+let meta: ObjectMeta = ...;
+let reader = ParquetObjectReader::new(store, location)
+  .with_file_size(meta.size);
+# */
+```
+
+### `DisplayFormatType::TreeRender`
+
+DataFusion now supports [`tree` style explain plans]. Implementations of
+`Executionplan` must also provide a description in the
+`DisplayFormatType::TreeRender` format. This can be the same as the existing
+`DisplayFormatType::Default`.
+
+[`tree` style explain plans]: https://datafusion.apache.org/user-guide/sql/explain.html#tree-format-default
+
+### Removed Deprecated APIs
+
+Several APIs have been removed in this release. These were either deprecated
+previously or were hard to use correctly such as the multiple different
+`ScalarUDFImpl::invoke*` APIs. See [#15130], [#15123], and [#15027] for more
+details.
+
+[#15130]: https://github.com/apache/datafusion/pull/15130
+[#15123]: https://github.com/apache/datafusion/pull/15123
+[#15027]: https://github.com/apache/datafusion/pull/15027
+
+## `FileScanConfig` --> `FileScanConfigBuilder`
+
+Previously, `FileScanConfig::build()` directly created ExecutionPlans. In
+DataFusion 47.0.0 this has been changed to use `FileScanConfigBuilder`. See
+[#15352] for details.
+
+[#15352]: https://github.com/apache/datafusion/pull/15352
+
+Pattern in DataFusion `46.0.0`:
+
+```rust
+# /* comment to avoid running
+let plan = FileScanConfig::new(url, schema, Arc::new(file_source))
+  .with_statistics(stats)
+  ...
+  .build()
+# */
+```
+
+Pattern in DataFusion `47.0.0`:
+
+```rust
+# /* comment to avoid running
+let config = FileScanConfigBuilder::new(url, schema, Arc::new(file_source))
+  .with_statistics(stats)
+  ...
+  .build();
+let scan = DataSourceExec::from_data_source(config);
+# */
+```
+
 ## DataFusion `46.0.0`
 
 ### Use `invoke_with_args` instead of `invoke()` and `invoke_batch()`
@@ -39,7 +155,7 @@ below. See [PR 14876] for an example.
 Given existing code like this:
 
 ```rust
-# /*
+# /* comment to avoid running
 impl ScalarUDFImpl for SparkConcat {
 ...
     fn invoke_batch(&self, args: &[ColumnarValue], number_rows: usize) -> Result<ColumnarValue> {
@@ -59,7 +175,7 @@ impl ScalarUDFImpl for SparkConcat {
 To
 
 ```rust
-# /* comment out so they don't run
+# /* comment to avoid running
 impl ScalarUDFImpl for SparkConcat {
     ...
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
diff --git a/docs/source/user-guide/cli/datasources.md b/docs/source/user-guide/cli/datasources.md
index 39172e94e5f80..2e14f1f54c6c1 100644
--- a/docs/source/user-guide/cli/datasources.md
+++ b/docs/source/user-guide/cli/datasources.md
@@ -95,8 +95,7 @@ additional configuration options.
 # `CREATE EXTERNAL TABLE`
 
 It is also possible to create a table backed by files or remote locations via
-`CREATE EXTERNAL TABLE` as shown below. Note that wildcards (e.g. `*`) are also
-supported
+`CREATE EXTERNAL TABLE` as shown below. Note that DataFusion does not support wildcards (e.g. `*`) in file paths; instead, specify the directory path directly to read all compatible files in that directory.
 
 For example, to create a table `hits` backed by a local parquet file, use:
 
@@ -126,6 +125,32 @@ select count(*) from hits;
 1 row in set. Query took 0.344 seconds.
 ```
 
+**Why Wildcards Are Not Supported**
+
+Although wildcards (e.g., _.parquet or \*\*/_.parquet) may work for local filesystems in some cases, they are not officially supported by DataFusion. This is because wildcards are not universally applicable across all storage backends (e.g., S3, GCS). Instead, DataFusion expects the user to specify the directory path, and it will automatically read all compatible files within that directory.
+
+For example, the following usage is not supported:
+
+```sql
+CREATE EXTERNAL TABLE test (
+    message TEXT,
+    day DATE
+)
+STORED AS PARQUET
+LOCATION 'gs://bucket/*.parquet';
+```
+
+Instead, you should use:
+
+```sql
+CREATE EXTERNAL TABLE test (
+    message TEXT,
+    day DATE
+)
+STORED AS PARQUET
+LOCATION 'gs://bucket/my_table';
+```
+
 # Formats
 
 ## Parquet
@@ -149,14 +174,6 @@ STORED AS PARQUET
 LOCATION '/mnt/nyctaxi/';
 ```
 
-Register a single folder parquet datasource by specifying a wildcard for files to read
-
-```sql
-CREATE EXTERNAL TABLE taxi
-STORED AS PARQUET
-LOCATION '/mnt/nyctaxi/*.parquet';
-```
-
 ## CSV
 
 DataFusion will infer the CSV schema automatically or you can provide it explicitly.
diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index fb238dad10bb1..68b09d3199840 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -57,6 +57,9 @@ OPTIONS:
         --mem-pool-type <MEM_POOL_TYPE>
             Specify the memory pool type 'greedy' or 'fair', default to 'greedy'
 
+    -d, --disk-limit <DISK_LIMIT>
+            Available disk space for spilling queries (e.g. '10g'), default to None (uses DataFusion's default value of '100g')
+
     -p, --data-path <DATA_PATH>
             Path to your data, default to current directory
 
diff --git a/docs/source/user-guide/concepts-readings-events.md b/docs/source/user-guide/concepts-readings-events.md
index fef677dd3a621..ad444ef91c474 100644
--- a/docs/source/user-guide/concepts-readings-events.md
+++ b/docs/source/user-guide/concepts-readings-events.md
@@ -37,6 +37,10 @@
 
 This is a list of DataFusion related blog posts, articles, and other resources. Please open a PR to add any new resources you create or find
 
+- **2025-03-21** [Blog: Efficient Filter Pushdown in Parquet](https://datafusion.apache.org/blog/2025/03/21/parquet-pushdown/)
+
+- **2025-03-20** [Blog: Parquet Pruning in DataFusion: Read Only What Matters](https://datafusion.apache.org/blog/2025/03/20/parquet-pruning/)
+
 - **2025-02-12** [Video: Alex Kesling on Apache Arrow DataFusion - Papers We Love NYC ](https://www.youtube.com/watch?v=6A4vFRpSq3k)
 
 - **2025-01-30** [Video: Data & Drinks: Building Next-Gen Data Systems with Apache DataFusion](https://www.youtube.com/watch?v=GruBeVDoWq4)
@@ -134,6 +138,8 @@ This is a list of DataFusion related blog posts, articles, and other resources.
 
 ## 📅 Release Notes & Updates
 
+- **2025-03-24** [Apache DataFusion 46.0.0 Released](https://datafusion.apache.org/blog/2025/03/24/datafusion-46.0.0/)
+
 - **2024-09-14** [Apache DataFusion Python 43.1.0 Released](https://datafusion.apache.org/blog/2024/12/14/datafusion-python-43.1.0/)
 
 - **2024-08-24** [Apache DataFusion Python 40.1.0 Released, Significant usability updates](https://datafusion.apache.org/blog/2024/08/20/python-datafusion-40.0.0/)
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 68e21183938b1..7a46d59d893e6 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -58,6 +58,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.schema_force_view_types                    | true                      | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.parquet.binary_as_string                           | false                     | (reading) If true, parquet reader will read columns of `Binary/LargeBinary` with `Utf8`, and `BinaryView` with `Utf8View`. Parquet files generated by some legacy writers do not correctly set the UTF8 flag for strings, causing string columns to be loaded as BLOB instead.                                                                                                                                                                                                                                                                                           |
+| datafusion.execution.parquet.coerce_int96                               | NULL                      | (reading) If true, parquet reader will read columns of physical type int96 as originating from a different resolution than nanosecond. This is useful for reading data from systems like Spark which stores microsecond resolution timestamps in an int96 allowing it to write values with a larger date range than 64-bit timestamps with nanosecond resolution.                                                                                                                                                                                                        |
 | datafusion.execution.parquet.data_pagesize_limit                        | 1048576                   | (writing) Sets best effort maximum size of data page in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.execution.parquet.write_batch_size                           | 1024                      | (writing) Sets write_batch_size in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.writer_version                             | 1.0                       | (writing) Sets parquet writer version valid values are "1.0" and "2.0"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
@@ -68,7 +69,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.max_statistics_size                        | 4096                      | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used                                                                                                                                                                                                                                                                                                                                                                                                        |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                          |
-| datafusion.execution.parquet.created_by                                 | datafusion version 46.0.1 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+| datafusion.execution.parquet.created_by                                 | datafusion version 47.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.parquet.statistics_truncate_length                 | NULL                      | (writing) Sets statictics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md
index 14d6ab177dc34..1879221aa4d11 100644
--- a/docs/source/user-guide/introduction.md
+++ b/docs/source/user-guide/introduction.md
@@ -95,6 +95,7 @@ Here are some active projects using DataFusion:
  <!-- "Active" means github repositories that had at least one commit in the last 6 months -->
 
 - [Arroyo](https://github.com/ArroyoSystems/arroyo) Distributed stream processing engine in Rust
+- [ArkFlow](https://github.com/arkflow-rs/arkflow) High-performance Rust stream processing engine
 - [Ballista](https://github.com/apache/datafusion-ballista) Distributed SQL Query Engine
 - [Blaze](https://github.com/kwai/blaze) The Blaze accelerator for Apache Spark leverages native vectorized execution to accelerate query processing
 - [CnosDB](https://github.com/cnosdb/cnosdb) Open Source Distributed Time Series Database
@@ -104,6 +105,7 @@ Here are some active projects using DataFusion:
 - [datafusion-dft](https://github.com/datafusion-contrib/datafusion-dft) Batteries included CLI, TUI, and server implementations for DataFusion.
 - [delta-rs](https://github.com/delta-io/delta-rs) Native Rust implementation of Delta Lake
 - [Exon](https://github.com/wheretrue/exon) Analysis toolkit for life-science applications
+- [Feldera](https://github.com/feldera/feldera) Fast query engine for incremental computation
 - [Funnel](https://funnel.io/) Data Platform powering Marketing Intelligence applications.
 - [GlareDB](https://github.com/GlareDB/glaredb) Fast SQL database for querying and analyzing distributed data.
 - [GreptimeDB](https://github.com/GreptimeTeam/greptimedb) Open Source & Cloud Native Distributed Time Series Database
diff --git a/docs/source/user-guide/runtime_configs.md b/docs/source/user-guide/runtime_configs.md
new file mode 100644
index 0000000000000..feef709db9929
--- /dev/null
+++ b/docs/source/user-guide/runtime_configs.md
@@ -0,0 +1,40 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!---
+This file was generated by the dev/update_runtime_config_docs.sh script.
+Do not edit it manually as changes will be overwritten.
+Instead, edit dev/update_runtime_config_docs.sh or the docstrings in datafusion/execution/src/runtime_env.rs.
+-->
+
+# Runtime Environment Configurations
+
+DataFusion runtime configurations can be set via SQL using the `SET` command.
+
+For example, to configure `datafusion.runtime.memory_limit`:
+
+```sql
+SET datafusion.runtime.memory_limit = '2G';
+```
+
+The following runtime configuration settings are available:
+
+| key                             | default | description                                                                                                                                 |
+| ------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| datafusion.runtime.memory_limit | NULL    | Maximum memory limit for query execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes. |
diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md
index c7f5c5f674424..774a4fae6bf32 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -371,10 +371,10 @@ min(expression)
 
 ### `string_agg`
 
-Concatenates the values of string expressions and places separator values between them.
+Concatenates the values of string expressions and places separator values between them. If ordering is required, strings are concatenated in the specified order. This aggregation function can only mix DISTINCT and ORDER BY if the ordering expression is exactly the same as the first argument expression.
 
 ```sql
-string_agg(expression, delimiter)
+string_agg([DISTINCT] expression, delimiter [ORDER BY expression])
 ```
 
 #### Arguments
@@ -390,7 +390,21 @@ string_agg(expression, delimiter)
 +--------------------------+
 | names_list               |
 +--------------------------+
-| Alice, Bob, Charlie      |
+| Alice, Bob, Bob, Charlie |
++--------------------------+
+> SELECT string_agg(name, ', ' ORDER BY name DESC) AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Charlie, Bob, Bob, Alice |
++--------------------------+
+> SELECT string_agg(DISTINCT name, ', ' ORDER BY name DESC) AS names_list
+  FROM employee;
++--------------------------+
+| names_list               |
++--------------------------+
+| Charlie, Bob, Alice |
 +--------------------------+
 ```
 
@@ -794,7 +808,7 @@ approx_distinct(expression)
 
 ### `approx_median`
 
-Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`.
+Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY x)`.
 
 ```sql
 approx_median(expression)
@@ -820,7 +834,7 @@ approx_median(expression)
 Returns the approximate percentile of input values using the t-digest algorithm.
 
 ```sql
-approx_percentile_cont(expression, percentile, centroids)
+approx_percentile_cont(percentile, centroids) WITHIN GROUP (ORDER BY expression)
 ```
 
 #### Arguments
@@ -832,12 +846,12 @@ approx_percentile_cont(expression, percentile, centroids)
 #### Example
 
 ```sql
-> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name;
-+-------------------------------------------------+
-| approx_percentile_cont(column_name, 0.75, 100)  |
-+-------------------------------------------------+
-| 65.0                                            |
-+-------------------------------------------------+
+> SELECT approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++-----------------------------------------------------------------------+
+| approx_percentile_cont(0.75, 100) WITHIN GROUP (ORDER BY column_name) |
++-----------------------------------------------------------------------+
+| 65.0                                                                  |
++-----------------------------------------------------------------------+
 ```
 
 ### `approx_percentile_cont_with_weight`
@@ -845,7 +859,7 @@ approx_percentile_cont(expression, percentile, centroids)
 Returns the weighted approximate percentile of input values using the t-digest algorithm.
 
 ```sql
-approx_percentile_cont_with_weight(expression, weight, percentile)
+approx_percentile_cont_with_weight(weight, percentile) WITHIN GROUP (ORDER BY expression)
 ```
 
 #### Arguments
@@ -857,10 +871,10 @@ approx_percentile_cont_with_weight(expression, weight, percentile)
 #### Example
 
 ```sql
-> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name;
-+----------------------------------------------------------------------+
-| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) |
-+----------------------------------------------------------------------+
-| 78.5                                                                 |
-+----------------------------------------------------------------------+
+> SELECT approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++---------------------------------------------------------------------------------------------+
+| approx_percentile_cont_with_weight(weight_column, 0.90) WITHIN GROUP (ORDER BY column_name) |
++---------------------------------------------------------------------------------------------+
+| 78.5                                                                                        |
++---------------------------------------------------------------------------------------------+
 ```
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index 18c95cdea70ed..d977a4396e40d 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -60,20 +60,20 @@ select arrow_cast(now(), 'Timestamp(Second, None)');
 
 ## Numeric Types
 
-| SQL DataType                         | Arrow DataType                 | Notes                                                                                                 |
-| ------------------------------------ | :----------------------------- | ----------------------------------------------------------------------------------------------------- |
-| `TINYINT`                            | `Int8`                         |                                                                                                       |
-| `SMALLINT`                           | `Int16`                        |                                                                                                       |
-| `INT` or `INTEGER`                   | `Int32`                        |                                                                                                       |
-| `BIGINT`                             | `Int64`                        |                                                                                                       |
-| `TINYINT UNSIGNED`                   | `UInt8`                        |                                                                                                       |
-| `SMALLINT UNSIGNED`                  | `UInt16`                       |                                                                                                       |
-| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32`                       |                                                                                                       |
-| `BIGINT UNSIGNED`                    | `UInt64`                       |                                                                                                       |
-| `FLOAT`                              | `Float32`                      |                                                                                                       |
-| `REAL`                               | `Float32`                      |                                                                                                       |
-| `DOUBLE`                             | `Float64`                      |                                                                                                       |
-| `DECIMAL(precision, scale)`          | `Decimal128(precision, scale)` | Decimal support is currently experimental ([#3523](https://github.com/apache/datafusion/issues/3523)) |
+| SQL DataType                         | Arrow DataType                 |
+| ------------------------------------ | :----------------------------- |
+| `TINYINT`                            | `Int8`                         |
+| `SMALLINT`                           | `Int16`                        |
+| `INT` or `INTEGER`                   | `Int32`                        |
+| `BIGINT`                             | `Int64`                        |
+| `TINYINT UNSIGNED`                   | `UInt8`                        |
+| `SMALLINT UNSIGNED`                  | `UInt16`                       |
+| `INT UNSIGNED` or `INTEGER UNSIGNED` | `UInt32`                       |
+| `BIGINT UNSIGNED`                    | `UInt64`                       |
+| `FLOAT`                              | `Float32`                      |
+| `REAL`                               | `Float32`                      |
+| `DOUBLE`                             | `Float64`                      |
+| `DECIMAL(precision, scale)`          | `Decimal128(precision, scale)` |
 
 ## Date/Time Types
 
diff --git a/docs/source/user-guide/sql/ddl.md b/docs/source/user-guide/sql/ddl.md
index 71475cff9a39b..fc18154becda6 100644
--- a/docs/source/user-guide/sql/ddl.md
+++ b/docs/source/user-guide/sql/ddl.md
@@ -74,7 +74,7 @@ LOCATION <literal>
 <key_value_list> := (<literal> <literal, <literal> <literal>, ...)
 ```
 
-For a detailed list of write related options which can be passed in the OPTIONS key_value_list, see [Write Options](write_options).
+For a comprehensive list of format-specific options that can be specified in the `OPTIONS` clause, see [Format Options](format_options.md).
 
 `file_type` is one of `CSV`, `ARROW`, `PARQUET`, `AVRO` or `JSON`
 
diff --git a/docs/source/user-guide/sql/dml.md b/docs/source/user-guide/sql/dml.md
index 4eda59d6dea10..c29447f23cd9c 100644
--- a/docs/source/user-guide/sql/dml.md
+++ b/docs/source/user-guide/sql/dml.md
@@ -49,7 +49,7 @@ The output format is determined by the first match of the following rules:
 1. Value of `STORED AS`
 2. Filename extension (e.g. `foo.parquet` implies `PARQUET` format)
 
-For a detailed list of valid OPTIONS, see [Write Options](write_options).
+For a detailed list of valid OPTIONS, see [Format Options](format_options.md).
 
 ### Examples
 
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index f89e854ebffd5..9984de147ecc5 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -39,39 +39,7 @@ the format from the [configuration value] `datafusion.explain.format`.
 
 [configuration value]: ../configs.md
 
-### `indent` format (default)
-
-The `indent` format shows both the logical and physical plan, with one line for
-each operator in the plan. Child plans are indented to show the hierarchy.
-
-See [Reading Explain Plans](../explain-usage.md) for more information on how to interpret these plans.
-
-```sql
-> CREATE TABLE t(x int, b int) AS VALUES (1, 2), (2, 3);
-0 row(s) fetched.
-Elapsed 0.004 seconds.
-
-> EXPLAIN SELECT SUM(x) FROM t GROUP BY b;
-+---------------+-------------------------------------------------------------------------------+
-| plan_type     | plan                                                                          |
-+---------------+-------------------------------------------------------------------------------+
-| logical_plan  | Projection: sum(t.x)                                                          |
-|               |   Aggregate: groupBy=[[t.b]], aggr=[[sum(CAST(t.x AS Int64))]]                |
-|               |     TableScan: t projection=[x, b]                                            |
-| physical_plan | ProjectionExec: expr=[sum(t.x)@1 as sum(t.x)]                                 |
-|               |   AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(t.x)]       |
-|               |     CoalesceBatchesExec: target_batch_size=8192                               |
-|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=16      |
-|               |         RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1 |
-|               |           AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]        |
-|               |             DataSourceExec: partitions=1, partition_sizes=[1]                 |
-|               |                                                                               |
-+---------------+-------------------------------------------------------------------------------+
-2 row(s) fetched.
-Elapsed 0.004 seconds.
-```
-
-### `tree` format
+### `tree` format (default)
 
 The `tree` format is modeled after [DuckDB plans] and is designed to be easier
 to see the high level structure of the plan
@@ -103,7 +71,7 @@ to see the high level structure of the plan
 |               | ┌─────────────┴─────────────┐ |
 |               | │      RepartitionExec      │ |
 |               | │    --------------------   │ |
-|               | │  output_partition_count:  │ |
+|               | │   input_partition_count:  │ |
 |               | │             16            │ |
 |               | │                           │ |
 |               | │    partitioning_scheme:   │ |
@@ -112,7 +80,7 @@ to see the high level structure of the plan
 |               | ┌─────────────┴─────────────┐ |
 |               | │      RepartitionExec      │ |
 |               | │    --------------------   │ |
-|               | │  output_partition_count:  │ |
+|               | │   input_partition_count:  │ |
 |               | │             1             │ |
 |               | │                           │ |
 |               | │    partitioning_scheme:   │ |
@@ -138,6 +106,38 @@ to see the high level structure of the plan
 Elapsed 0.016 seconds.
 ```
 
+### `indent` format
+
+The `indent` format shows both the logical and physical plan, with one line for
+each operator in the plan. Child plans are indented to show the hierarchy.
+
+See [Reading Explain Plans](../explain-usage.md) for more information on how to interpret these plans.
+
+```sql
+> CREATE TABLE t(x int, b int) AS VALUES (1, 2), (2, 3);
+0 row(s) fetched.
+Elapsed 0.004 seconds.
+
+> EXPLAIN SELECT SUM(x) FROM t GROUP BY b;
++---------------+-------------------------------------------------------------------------------+
+| plan_type     | plan                                                                          |
++---------------+-------------------------------------------------------------------------------+
+| logical_plan  | Projection: sum(t.x)                                                          |
+|               |   Aggregate: groupBy=[[t.b]], aggr=[[sum(CAST(t.x AS Int64))]]                |
+|               |     TableScan: t projection=[x, b]                                            |
+| physical_plan | ProjectionExec: expr=[sum(t.x)@1 as sum(t.x)]                                 |
+|               |   AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(t.x)]       |
+|               |     CoalesceBatchesExec: target_batch_size=8192                               |
+|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=16      |
+|               |         RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1 |
+|               |           AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]        |
+|               |             DataSourceExec: partitions=1, partition_sizes=[1]                 |
+|               |                                                                               |
++---------------+-------------------------------------------------------------------------------+
+2 row(s) fetched.
+Elapsed 0.004 seconds.
+```
+
 ### `pgjson` format
 
 The `pgjson` format is modeled after [Postgres JSON] format.
diff --git a/docs/source/user-guide/sql/format_options.md b/docs/source/user-guide/sql/format_options.md
new file mode 100644
index 0000000000000..e8008eafb166c
--- /dev/null
+++ b/docs/source/user-guide/sql/format_options.md
@@ -0,0 +1,180 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Format Options
+
+DataFusion supports customizing how data is read from or written to disk as a result of a `COPY`, `INSERT INTO`, or `CREATE EXTERNAL TABLE` statements. There are a few special options, file format (e.g., CSV or Parquet) specific options, and Parquet column-specific options. In some cases, Options can be specified in multiple ways with a set order of precedence.
+
+## Specifying Options and Order of Precedence
+
+Format-related options can be specified in three ways, in decreasing order of precedence:
+
+- `CREATE EXTERNAL TABLE` syntax
+- `COPY` option tuples
+- Session-level config defaults
+
+For a list of supported session-level config defaults, see [Configuration Settings](../configs). These defaults apply to all operations but have the lowest level of precedence.
+
+If creating an external table, table-specific format options can be specified when the table is created using the `OPTIONS` clause:
+
+```sql
+CREATE EXTERNAL TABLE
+  my_table(a bigint, b bigint)
+  STORED AS csv
+  LOCATION '/tmp/my_csv_table/'
+  OPTIONS(
+    NULL_VALUE 'NAN',
+    'has_header' 'true',
+    'format.delimiter' ';'
+  );
+```
+
+When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (e.g., gzip compression, special delimiter, and header row included). Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified, the `OPTIONS` setting will be ignored.
+
+For example, with the table defined above, running the following command:
+
+```sql
+INSERT INTO my_table VALUES(1,2);
+```
+
+Results in a new CSV file with the specified options:
+
+```shell
+$ cat /tmp/my_csv_table/bmC8zWFvLMtWX68R_0.csv
+a;b
+1;2
+```
+
+Finally, options can be passed when running a `COPY` command.
+
+```sql
+COPY source_table
+  TO 'test/table_with_options'
+  PARTITIONED BY (column3, column4)
+  OPTIONS (
+    format parquet,
+    compression snappy,
+    'compression::column1' 'zstd(5)',
+  )
+```
+
+In this example, we write the entire `source_table` out to a folder of Parquet files. One Parquet file will be written in parallel to the folder for each partition in the query. The next option `compression` set to `snappy` indicates that unless otherwise specified, all columns should use the snappy compression codec. The option `compression::col1` sets an override, so that the column `col1` in the Parquet file will use the ZSTD compression codec with compression level `5`. In general, Parquet options that support column-specific settings can be specified with the syntax `OPTION::COLUMN.NESTED.PATH`.
+
+# Available Options
+
+## JSON Format Options
+
+The following options are available when reading or writing JSON files. Note: If any unsupported option is specified, an error will be raised and the query will fail.
+
+| Option      | Description                                                                                                                        | Default Value |
+| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------- |
+| COMPRESSION | Sets the compression that should be applied to the entire JSON file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED. | UNCOMPRESSED  |
+
+**Example:**
+
+```sql
+CREATE EXTERNAL TABLE t(a int)
+STORED AS JSON
+LOCATION '/tmp/foo/'
+OPTIONS('COMPRESSION' 'gzip');
+```
+
+## CSV Format Options
+
+The following options are available when reading or writing CSV files. Note: If any unsupported option is specified, an error will be raised and the query will fail.
+
+| Option               | Description                                                                                                                       | Default Value      |
+| -------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ------------------ |
+| COMPRESSION          | Sets the compression that should be applied to the entire CSV file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED. | UNCOMPRESSED       |
+| HAS_HEADER           | Sets if the CSV file should include column headers. If not set, uses session or system default.                                   | None               |
+| DELIMITER            | Sets the character which should be used as the column delimiter within the CSV file.                                              | `,` (comma)        |
+| QUOTE                | Sets the character which should be used for quoting values within the CSV file.                                                   | `"` (double quote) |
+| TERMINATOR           | Sets the character which should be used as the line terminator within the CSV file.                                               | None               |
+| ESCAPE               | Sets the character which should be used for escaping special characters within the CSV file.                                      | None               |
+| DOUBLE_QUOTE         | Sets if quotes within quoted fields should be escaped by doubling them (e.g., `"aaa""bbb"`).                                      | None               |
+| NEWLINES_IN_VALUES   | Sets if newlines in quoted values are supported. If not set, uses session or system default.                                      | None               |
+| DATE_FORMAT          | Sets the format that dates should be encoded in within the CSV file.                                                              | None               |
+| DATETIME_FORMAT      | Sets the format that datetimes should be encoded in within the CSV file.                                                          | None               |
+| TIMESTAMP_FORMAT     | Sets the format that timestamps should be encoded in within the CSV file.                                                         | None               |
+| TIMESTAMP_TZ_FORMAT  | Sets the format that timestamps with timezone should be encoded in within the CSV file.                                           | None               |
+| TIME_FORMAT          | Sets the format that times should be encoded in within the CSV file.                                                              | None               |
+| NULL_VALUE           | Sets the string which should be used to indicate null values within the CSV file.                                                 | None               |
+| NULL_REGEX           | Sets the regex pattern to match null values when loading CSVs.                                                                    | None               |
+| SCHEMA_INFER_MAX_REC | Sets the maximum number of records to scan to infer the schema.                                                                   | None               |
+| COMMENT              | Sets the character which should be used to indicate comment lines in the CSV file.                                                | None               |
+
+**Example:**
+
+```sql
+CREATE EXTERNAL TABLE t (col1 varchar, col2 int, col3 boolean)
+STORED AS CSV
+LOCATION '/tmp/foo/'
+OPTIONS('DELIMITER' '|', 'HAS_HEADER' 'true', 'NEWLINES_IN_VALUES' 'true');
+```
+
+## Parquet Format Options
+
+The following options are available when reading or writing Parquet files. If any unsupported option is specified, an error will be raised and the query will fail. If a column-specific option is specified for a column that does not exist, the option will be ignored without error.
+
+| Option                                     | Can be Column Specific? | Description                                                                                                                                                                                                                                                                                                                                 | OPTIONS Key                                           | Default Value            |
+| ------------------------------------------ | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------- | ------------------------ |
+| COMPRESSION                                | Yes                     | Sets the internal Parquet **compression codec** for data pages, optionally including the compression level. Applies globally if set without `::col`, or specifically to a column if set using `'compression::column_name'`. Valid values: `uncompressed`, `snappy`, `gzip(level)`, `lzo`, `brotli(level)`, `lz4`, `zstd(level)`, `lz4_raw`. | `'compression'` or `'compression::col'`               | zstd(3)                  |
+| ENCODING                                   | Yes                     | Sets the **encoding** scheme for data pages. Valid values: `plain`, `plain_dictionary`, `rle`, `bit_packed`, `delta_binary_packed`, `delta_length_byte_array`, `delta_byte_array`, `rle_dictionary`, `byte_stream_split`. Use key `'encoding'` or `'encoding::col'` in OPTIONS.                                                             | `'encoding'` or `'encoding::col'`                     | None                     |
+| DICTIONARY_ENABLED                         | Yes                     | Sets whether dictionary encoding should be enabled globally or for a specific column.                                                                                                                                                                                                                                                       | `'dictionary_enabled'` or `'dictionary_enabled::col'` | true                     |
+| STATISTICS_ENABLED                         | Yes                     | Sets the level of statistics to write (`none`, `chunk`, `page`).                                                                                                                                                                                                                                                                            | `'statistics_enabled'` or `'statistics_enabled::col'` | page                     |
+| BLOOM_FILTER_ENABLED                       | Yes                     | Sets whether a bloom filter should be written for a specific column.                                                                                                                                                                                                                                                                        | `'bloom_filter_enabled::column_name'`                 | None                     |
+| BLOOM_FILTER_FPP                           | Yes                     | Sets bloom filter false positive probability (global or per column).                                                                                                                                                                                                                                                                        | `'bloom_filter_fpp'` or `'bloom_filter_fpp::col'`     | None                     |
+| BLOOM_FILTER_NDV                           | Yes                     | Sets bloom filter number of distinct values (global or per column).                                                                                                                                                                                                                                                                         | `'bloom_filter_ndv'` or `'bloom_filter_ndv::col'`     | None                     |
+| MAX_ROW_GROUP_SIZE                         | No                      | Sets the maximum number of rows per row group. Larger groups require more memory but can improve compression and scan efficiency.                                                                                                                                                                                                           | `'max_row_group_size'`                                | 1048576                  |
+| ENABLE_PAGE_INDEX                          | No                      | If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce I/O and decoding.                                                                                                                                                                                                                               | `'enable_page_index'`                                 | true                     |
+| PRUNING                                    | No                      | If true, enables row group pruning based on min/max statistics.                                                                                                                                                                                                                                                                             | `'pruning'`                                           | true                     |
+| SKIP_METADATA                              | No                      | If true, skips optional embedded metadata in the file schema.                                                                                                                                                                                                                                                                               | `'skip_metadata'`                                     | true                     |
+| METADATA_SIZE_HINT                         | No                      | Sets the size hint (in bytes) for fetching Parquet file metadata.                                                                                                                                                                                                                                                                           | `'metadata_size_hint'`                                | None                     |
+| PUSHDOWN_FILTERS                           | No                      | If true, enables filter pushdown during Parquet decoding.                                                                                                                                                                                                                                                                                   | `'pushdown_filters'`                                  | false                    |
+| REORDER_FILTERS                            | No                      | If true, enables heuristic reordering of filters during Parquet decoding.                                                                                                                                                                                                                                                                   | `'reorder_filters'`                                   | false                    |
+| SCHEMA_FORCE_VIEW_TYPES                    | No                      | If true, reads Utf8/Binary columns as view types.                                                                                                                                                                                                                                                                                           | `'schema_force_view_types'`                           | true                     |
+| BINARY_AS_STRING                           | No                      | If true, reads Binary columns as strings.                                                                                                                                                                                                                                                                                                   | `'binary_as_string'`                                  | false                    |
+| DATA_PAGESIZE_LIMIT                        | No                      | Sets best effort maximum size of data page in bytes.                                                                                                                                                                                                                                                                                        | `'data_pagesize_limit'`                               | 1048576                  |
+| DATA_PAGE_ROW_COUNT_LIMIT                  | No                      | Sets best effort maximum number of rows in data page.                                                                                                                                                                                                                                                                                       | `'data_page_row_count_limit'`                         | 20000                    |
+| DICTIONARY_PAGE_SIZE_LIMIT                 | No                      | Sets best effort maximum dictionary page size, in bytes.                                                                                                                                                                                                                                                                                    | `'dictionary_page_size_limit'`                        | 1048576                  |
+| WRITE_BATCH_SIZE                           | No                      | Sets write_batch_size in bytes.                                                                                                                                                                                                                                                                                                             | `'write_batch_size'`                                  | 1024                     |
+| WRITER_VERSION                             | No                      | Sets the Parquet writer version (`1.0` or `2.0`).                                                                                                                                                                                                                                                                                           | `'writer_version'`                                    | 1.0                      |
+| SKIP_ARROW_METADATA                        | No                      | If true, skips writing Arrow schema information into the Parquet file metadata.                                                                                                                                                                                                                                                             | `'skip_arrow_metadata'`                               | false                    |
+| CREATED_BY                                 | No                      | Sets the "created by" string in the Parquet file metadata.                                                                                                                                                                                                                                                                                  | `'created_by'`                                        | datafusion version X.Y.Z |
+| COLUMN_INDEX_TRUNCATE_LENGTH               | No                      | Sets the length (in bytes) to truncate min/max values in column indexes.                                                                                                                                                                                                                                                                    | `'column_index_truncate_length'`                      | 64                       |
+| STATISTICS_TRUNCATE_LENGTH                 | No                      | Sets statistics truncate length.                                                                                                                                                                                                                                                                                                            | `'statistics_truncate_length'`                        | None                     |
+| BLOOM_FILTER_ON_WRITE                      | No                      | Sets whether bloom filters should be written for all columns by default (can be overridden per column).                                                                                                                                                                                                                                     | `'bloom_filter_on_write'`                             | false                    |
+| ALLOW_SINGLE_FILE_PARALLELISM              | No                      | Enables parallel serialization of columns in a single file.                                                                                                                                                                                                                                                                                 | `'allow_single_file_parallelism'`                     | true                     |
+| MAXIMUM_PARALLEL_ROW_GROUP_WRITERS         | No                      | Maximum number of parallel row group writers.                                                                                                                                                                                                                                                                                               | `'maximum_parallel_row_group_writers'`                | 1                        |
+| MAXIMUM_BUFFERED_RECORD_BATCHES_PER_STREAM | No                      | Maximum number of buffered record batches per stream.                                                                                                                                                                                                                                                                                       | `'maximum_buffered_record_batches_per_stream'`        | 2                        |
+| KEY_VALUE_METADATA                         | No (Key is specific)    | Adds custom key-value pairs to the file metadata. Use the format `'metadata::your_key_name' 'your_value'`. Multiple entries allowed.                                                                                                                                                                                                        | `'metadata::key_name'`                                | None                     |
+
+**Example:**
+
+```sql
+CREATE EXTERNAL TABLE t (id bigint, value double, category varchar)
+STORED AS PARQUET
+LOCATION '/tmp/parquet_data/'
+OPTIONS(
+  'COMPRESSION::user_id' 'snappy',
+  'ENCODING::col_a' 'delta_binary_packed',
+  'MAX_ROW_GROUP_SIZE' '1000000',
+  'BLOOM_FILTER_ENABLED::id' 'true'
+);
+```
diff --git a/docs/source/user-guide/sql/index.rst b/docs/source/user-guide/sql/index.rst
index 8e3f51bf8b0bc..a13d40334b639 100644
--- a/docs/source/user-guide/sql/index.rst
+++ b/docs/source/user-guide/sql/index.rst
@@ -33,5 +33,5 @@ SQL Reference
    window_functions
    scalar_functions
    special_functions
-   write_options
+   format_options
    prepared_statements
diff --git a/docs/source/user-guide/sql/window_functions.md b/docs/source/user-guide/sql/window_functions.md
index 1c02804f0deed..68a7003803123 100644
--- a/docs/source/user-guide/sql/window_functions.md
+++ b/docs/source/user-guide/sql/window_functions.md
@@ -160,12 +160,31 @@ All [aggregate functions](aggregate_functions.md) can be used as window function
 
 ### `cume_dist`
 
-Relative rank of the current row: (number of rows preceding or peer with current row) / (total rows).
+Relative rank of the current row: (number of rows preceding or peer with the current row) / (total rows).
 
 ```sql
 cume_dist()
 ```
 
+#### Example
+
+```sql
+    --Example usage of the cume_dist window function:
+    SELECT salary,
+       cume_dist() OVER (ORDER BY salary) AS cume_dist
+    FROM employees;
+```
+
+```sql
++--------+-----------+
+| salary | cume_dist |
++--------+-----------+
+| 30000  | 0.33      |
+| 50000  | 0.67      |
+| 70000  | 1.00      |
++--------+-----------+
+```
+
 ### `dense_rank`
 
 Returns the rank of the current row without gaps. This function ranks rows in a dense manner, meaning consecutive ranks are assigned even for identical values.
@@ -272,7 +291,7 @@ lead(expression, offset, default)
 
 ### `nth_value`
 
-Returns value evaluated at the row that is the nth row of the window frame (counting from 1); null if no such row.
+Returns the value evaluated at the nth row of the window frame (counting from 1). Returns NULL if no such row exists.
 
 ```sql
 nth_value(expression, n)
@@ -280,5 +299,37 @@ nth_value(expression, n)
 
 #### Arguments
 
-- **expression**: The name the column of which nth value to retrieve
-- **n**: Integer. Specifies the n in nth
+- **expression**: The column from which to retrieve the nth value.
+- **n**: Integer. Specifies the row number (starting from 1) in the window frame.
+
+#### Example
+
+```sql
+-- Sample employees table:
+CREATE TABLE employees (id INT, salary INT);
+INSERT INTO employees (id, salary) VALUES
+(1, 30000),
+(2, 40000),
+(3, 50000),
+(4, 60000),
+(5, 70000);
+
+-- Example usage of nth_value:
+SELECT nth_value(salary, 2) OVER (
+  ORDER BY salary
+  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+) AS nth_value
+FROM employees;
+```
+
+```text
++-----------+
+| nth_value |
++-----------+
+| 40000     |
+| 40000     |
+| 40000     |
+| 40000     |
+| 40000     |
++-----------+
+```
diff --git a/docs/source/user-guide/sql/write_options.md b/docs/source/user-guide/sql/write_options.md
deleted file mode 100644
index 521e29436212d..0000000000000
--- a/docs/source/user-guide/sql/write_options.md
+++ /dev/null
@@ -1,127 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Write Options
-
-DataFusion supports customizing how data is written out to disk as a result of a `COPY` or `INSERT INTO` query. There are a few special options, file format (e.g. CSV or parquet) specific options, and parquet column specific options. Options can also in some cases be specified in multiple ways with a set order of precedence.
-
-## Specifying Options and Order of Precedence
-
-Write related options can be specified in the following ways:
-
-- Session level config defaults
-- `CREATE EXTERNAL TABLE` options
-- `COPY` option tuples
-
-For a list of supported session level config defaults see [Configuration Settings](../configs). These defaults apply to all write operations but have the lowest level of precedence.
-
-If inserting to an external table, table specific write options can be specified when the table is created using the `OPTIONS` clause:
-
-```sql
-CREATE EXTERNAL TABLE
-  my_table(a bigint, b bigint)
-  STORED AS csv
-  COMPRESSION TYPE gzip
-  LOCATION '/test/location/my_csv_table/'
-  OPTIONS(
-    NULL_VALUE 'NAN',
-    'has_header' 'true',
-    'format.delimiter' ';'
-  )
-```
-
-When running `INSERT INTO my_table ...`, the options from the `CREATE TABLE` will be respected (gzip compression, special delimiter, and header row included). There will be a single output file if the output path doesn't have folder format, i.e. ending with a `\`. Note that compression, header, and delimiter settings can also be specified within the `OPTIONS` tuple list. Dedicated syntax within the SQL statement always takes precedence over arbitrary option tuples, so if both are specified the `OPTIONS` setting will be ignored. NULL_VALUE is a CSV format specific option that determines how null values should be encoded within the CSV file.
-
-Finally, options can be passed when running a `COPY` command.
-
-<!--
- Test the following example with:
- CREATE TABLE source_table AS VALUES ('1','2','3','4');
--->
-
-```sql
-COPY source_table
-  TO 'test/table_with_options'
-  PARTITIONED BY (column3, column4)
-  OPTIONS (
-    format parquet,
-    compression snappy,
-    'compression::column1' 'zstd(5)',
-  )
-```
-
-In this example, we write the entirety of `source_table` out to a folder of parquet files. One parquet file will be written in parallel to the folder for each partition in the query. The next option `compression` set to `snappy` indicates that unless otherwise specified all columns should use the snappy compression codec. The option `compression::col1` sets an override, so that the column `col1` in the parquet file will use `ZSTD` compression codec with compression level `5`. In general, parquet options which support column specific settings can be specified with the syntax `OPTION::COLUMN.NESTED.PATH`.
-
-## Available Options
-
-### Execution Specific Options
-
-The following options are available when executing a `COPY` query.
-
-| Option                              | Description                                                                        | Default Value |
-| ----------------------------------- | ---------------------------------------------------------------------------------- | ------------- |
-| execution.keep_partition_by_columns | Flag to retain the columns in the output data when using `PARTITIONED BY` queries. | false         |
-
-Note: `execution.keep_partition_by_columns` flag can also be enabled through `ExecutionOptions` within `SessionConfig`.
-
-### JSON Format Specific Options
-
-The following options are available when writing JSON files. Note: If any unsupported option is specified, an error will be raised and the query will fail.
-
-| Option      | Description                                                                                                                        | Default Value |
-| ----------- | ---------------------------------------------------------------------------------------------------------------------------------- | ------------- |
-| COMPRESSION | Sets the compression that should be applied to the entire JSON file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED. | UNCOMPRESSED  |
-
-### CSV Format Specific Options
-
-The following options are available when writing CSV files. Note: if any unsupported options is specified an error will be raised and the query will fail.
-
-| Option          | Description                                                                                                                       | Default Value    |
-| --------------- | --------------------------------------------------------------------------------------------------------------------------------- | ---------------- |
-| COMPRESSION     | Sets the compression that should be applied to the entire CSV file. Supported values are GZIP, BZIP2, XZ, ZSTD, and UNCOMPRESSED. | UNCOMPRESSED     |
-| HEADER          | Sets if the CSV file should include column headers                                                                                | false            |
-| DATE_FORMAT     | Sets the format that dates should be encoded in within the CSV file                                                               | arrow-rs default |
-| DATETIME_FORMAT | Sets the format that datetimes should be encoded in within the CSV file                                                           | arrow-rs default |
-| TIME_FORMAT     | Sets the format that times should be encoded in within the CSV file                                                               | arrow-rs default |
-| RFC3339         | If true, uses RFC339 format for date and time encodings                                                                           | arrow-rs default |
-| NULL_VALUE      | Sets the string which should be used to indicate null values within the CSV file.                                                 | arrow-rs default |
-| DELIMITER       | Sets the character which should be used as the column delimiter within the CSV file.                                              | arrow-rs default |
-
-### Parquet Format Specific Options
-
-The following options are available when writing parquet files. If any unsupported option is specified an error will be raised and the query will fail. If a column specific option is specified for a column which does not exist, the option will be ignored without error. For default values, see: [Configuration Settings](https://datafusion.apache.org/user-guide/configs.html).
-
-| Option                       | Can be Column Specific? | Description                                                                                                                         |
-| ---------------------------- | ----------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |
-| COMPRESSION                  | Yes                     | Sets the compression codec and if applicable compression level to use                                                               |
-| MAX_ROW_GROUP_SIZE           | No                      | Sets the maximum number of rows that can be encoded in a single row group. Larger row groups require more memory to write and read. |
-| DATA_PAGESIZE_LIMIT          | No                      | Sets the best effort maximum page size in bytes                                                                                     |
-| WRITE_BATCH_SIZE             | No                      | Maximum number of rows written for each column in a single batch                                                                    |
-| WRITER_VERSION               | No                      | Parquet writer version (1.0 or 2.0)                                                                                                 |
-| DICTIONARY_PAGE_SIZE_LIMIT   | No                      | Sets best effort maximum dictionary page size in bytes                                                                              |
-| CREATED_BY                   | No                      | Sets the "created by" property in the parquet file                                                                                  |
-| COLUMN_INDEX_TRUNCATE_LENGTH | No                      | Sets the max length of min/max value fields in the column index.                                                                    |
-| DATA_PAGE_ROW_COUNT_LIMIT    | No                      | Sets best effort maximum number of rows in a data page.                                                                             |
-| BLOOM_FILTER_ENABLED         | Yes                     | Sets whether a bloom filter should be written into the file.                                                                        |
-| ENCODING                     | Yes                     | Sets the encoding that should be used (e.g. PLAIN or RLE)                                                                           |
-| DICTIONARY_ENABLED           | Yes                     | Sets if dictionary encoding is enabled. Use this instead of ENCODING to set dictionary encoding.                                    |
-| STATISTICS_ENABLED           | Yes                     | Sets if statistics are enabled at PAGE or ROW_GROUP level.                                                                          |
-| MAX_STATISTICS_SIZE          | Yes                     | Sets the maximum size in bytes that statistics can take up.                                                                         |
-| BLOOM_FILTER_FPP             | Yes                     | Sets the false positive probability (fpp) for the bloom filter. Implicitly sets BLOOM_FILTER_ENABLED to true.                       |
-| BLOOM_FILTER_NDV             | Yes                     | Sets the number of distinct values (ndv) for the bloom filter. Implicitly sets bloom_filter_enabled to true.                        |
diff --git a/parquet-testing b/parquet-testing
index f4d7ed772a62a..6e851ddd768d6 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit f4d7ed772a62a95111db50fbcad2460833e8c882
+Subproject commit 6e851ddd768d6af741c7b15dc594874399fc3cff
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 11f4fb798c376..a85e6fa54299d 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -19,5 +19,5 @@
 # to compile this workspace and run CI jobs.
 
 [toolchain]
-channel = "1.85.0"
+channel = "1.86.0"
 components = ["rustfmt", "clippy"]
diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs
index 9db8920833ae5..47f23de4951e3 100644
--- a/test-utils/src/lib.rs
+++ b/test-utils/src/lib.rs
@@ -67,10 +67,9 @@ pub fn add_empty_batches(
         .flat_map(|batch| {
             // insert 0, or 1 empty batches before and after the current batch
             let empty_batch = RecordBatch::new_empty(schema.clone());
-            std::iter::repeat(empty_batch.clone())
-                .take(rng.gen_range(0..2))
+            std::iter::repeat_n(empty_batch.clone(), rng.gen_range(0..2))
                 .chain(std::iter::once(batch))
-                .chain(std::iter::repeat(empty_batch).take(rng.gen_range(0..2)))
+                .chain(std::iter::repeat_n(empty_batch, rng.gen_range(0..2)))
         })
         .collect()
 }