From d05275dd5c9638faa0ddf4f3b7d6f2f330b1a5aa Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Fri, 26 May 2023 18:00:27 +0800
Subject: [PATCH 01/13] init checkin

---
 benchmarks/expected-plans/q11.txt             | 178 ++++----
 benchmarks/expected-plans/q15.txt             | 130 +++---
 benchmarks/expected-plans/q17.txt             |  98 ++---
 benchmarks/expected-plans/q2.txt              |  10 +-
 benchmarks/expected-plans/q20.txt             | 160 +++----
 benchmarks/expected-plans/q22.txt             | 112 +++--
 datafusion/core/tests/sql/joins.rs            |  34 +-
 datafusion/core/tests/sql/subqueries.rs       | 401 +++++++++++++++---
 datafusion/core/tests/tpcds_planning.rs       |  11 +-
 datafusion/expr/src/logical_plan/plan.rs      |  73 ++++
 datafusion/optimizer/src/analyzer/subquery.rs |  10 +
 .../src/decorrelate_predicate_subquery.rs     | 335 ++++++---------
 .../optimizer/src/scalar_subquery_to_join.rs  | 397 +++++++----------
 datafusion/optimizer/src/utils.rs             | 261 ++++++++++--
 .../optimizer/tests/integration-test.rs       |  11 +-
 15 files changed, 1318 insertions(+), 903 deletions(-)

diff --git a/benchmarks/expected-plans/q11.txt b/benchmarks/expected-plans/q11.txt
index 0a732897c38f1..fae9e0ea7f133 100644
--- a/benchmarks/expected-plans/q11.txt
+++ b/benchmarks/expected-plans/q11.txt
@@ -1,89 +1,89 @@
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                            |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: value DESC NULLS FIRST                                                                                                                                                    |
-|               |   Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value                                                                                  |
-|               |     Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.__value                                                              |
-|               |       CrossJoin:                                                                                                                                                                |
-|               |         Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                      |
-|               |           Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost                                                                                         |
-|               |             Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                               |
-|               |               Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                               |
-|               |                 Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                            |
-|               |                   TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]                                                                           |
-|               |                   TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                       |
-|               |               Projection: nation.n_nationkey                                                                                                                                    |
-|               |                 Filter: nation.n_name = Utf8("GERMANY")                                                                                                                         |
-|               |                   TableScan: nation projection=[n_nationkey, n_name]                                                                                                            |
-|               |         SubqueryAlias: __scalar_sq_1                                                                                                                                            |
-|               |           Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) AS __value                              |
-|               |             Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                     |
-|               |               Projection: partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                          |
-|               |                 Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                           |
-|               |                   Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                |
-|               |                     Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                        |
-|               |                       TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]                                                                                   |
-|               |                       TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                   |
-|               |                   Projection: nation.n_nationkey                                                                                                                                |
-|               |                     Filter: nation.n_name = Utf8("GERMANY")                                                                                                                     |
-|               |                       TableScan: nation projection=[n_nationkey, n_name]                                                                                                        |
-| physical_plan | SortExec: expr=[value@1 DESC]                                                                                                                                                   |
-|               |   ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]                                                              |
-|               |     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                 |
-|               |       FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > __value@2                                                                  |
-|               |         CrossJoinExec                                                                                                                                                           |
-|               |           CoalescePartitionsExec                                                                                                                                                |
-|               |             AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                   |
-|               |               CoalesceBatchesExec: target_batch_size=8192                                                                                                                       |
-|               |                 RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2                                                            |
-|               |                   AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                      |
-|               |                     ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]                                           |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                               |
-|               |                         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]              |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                           |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2                                               |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                              |
-|               |                                 ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey] |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                   |
-|               |                                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]     |
-|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                               |
-|               |                                         RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0                                    |
-|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                          |
-|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                               |
-|               |                                         RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                     |
-|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                          |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                           |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                               |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                              |
-|               |                                 ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                             |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                   |
-|               |                                     FilterExec: n_name@1 = GERMANY                                                                                                              |
-|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                              |
-|               |           ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as __value]                          |
-|               |             AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                        |
-|               |               CoalescePartitionsExec                                                                                                                                            |
-|               |                 AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                  |
-|               |                   ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]                                                                         |
-|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                 |
-|               |                       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]                |
-|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                             |
-|               |                           RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2                                                 |
-|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                |
-|               |                               ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]                               |
-|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                     |
-|               |                                   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]       |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                 |
-|               |                                       RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0                                      |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                            |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                 |
-|               |                                       RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                       |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                            |
-|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                             |
-|               |                           RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                 |
-|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                |
-|               |                               ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                               |
-|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                     |
-|               |                                   FilterExec: n_name@1 = GERMANY                                                                                                                |
-|               |                                     MemoryExec: partitions=0, partition_sizes=[]                                                                                                |
-|               |                                                                                                                                                                                 |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                |
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: value DESC NULLS FIRST                                                                                                                                                                                        |
+|               |   Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value                                                                                                                      |
+|               |     Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)                                     |
+|               |       CrossJoin:                                                                                                                                                                                                    |
+|               |         Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                          |
+|               |           Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                             |
+|               |             Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                   |
+|               |               Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                   |
+|               |                 Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                                |
+|               |                   TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]                                                                                                               |
+|               |                   TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                           |
+|               |               Projection: nation.n_nationkey                                                                                                                                                                        |
+|               |                 Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                             |
+|               |                   TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                |
+|               |         SubqueryAlias: __scalar_sq_1                                                                                                                                                                                |
+|               |           Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))                                                                             |
+|               |             Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                                         |
+|               |               Projection: partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                                              |
+|               |                 Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                               |
+|               |                   Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                                    |
+|               |                     Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                            |
+|               |                       TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]                                                                                                                       |
+|               |                       TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                       |
+|               |                   Projection: nation.n_nationkey                                                                                                                                                                    |
+|               |                     Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                         |
+|               |                       TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                            |
+| physical_plan | SortExec: expr=[value@1 DESC]                                                                                                                                                                                       |
+|               |   ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]                                                                                                  |
+|               |     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                     |
+|               |       FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@2                                         |
+|               |         CrossJoinExec                                                                                                                                                                                               |
+|               |           CoalescePartitionsExec                                                                                                                                                                                    |
+|               |             AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                       |
+|               |               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                           |
+|               |                 RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2                                                                                                |
+|               |                   AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                          |
+|               |                     ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]                                                                               |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                   |
+|               |                         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]                                                  |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                               |
+|               |                             RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2                                                                                   |
+|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                  |
+|               |                                 ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey]                                     |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                       |
+|               |                                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]                                         |
+|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                   |
+|               |                                         RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0                                                                        |
+|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                              |
+|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                   |
+|               |                                         RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                         |
+|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                              |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                               |
+|               |                             RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                   |
+|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                  |
+|               |                                 ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                 |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                       |
+|               |                                     FilterExec: n_name@1 = GERMANY                                                                                                                                                  |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                  |
+|               |           ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] |
+|               |             AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                            |
+|               |               CoalescePartitionsExec                                                                                                                                                                                |
+|               |                 AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                      |
+|               |                   ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]                                                                                                             |
+|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                     |
+|               |                       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]                                                    |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2                                                                                     |
+|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                    |
+|               |                               ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]                                                                   |
+|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                         |
+|               |                                   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]                                           |
+|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                     |
+|               |                                       RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0                                                                          |
+|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                |
+|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                     |
+|               |                                       RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                           |
+|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                     |
+|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                    |
+|               |                               ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                   |
+|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                         |
+|               |                                   FilterExec: n_name@1 = GERMANY                                                                                                                                                    |
+|               |                                     MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                    |
+|               |                                                                                                                                                                                                                     |
++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/benchmarks/expected-plans/q15.txt b/benchmarks/expected-plans/q15.txt
index 208f4c6690fb8..ea62c08aeb8d5 100644
--- a/benchmarks/expected-plans/q15.txt
+++ b/benchmarks/expected-plans/q15.txt
@@ -1,66 +1,64 @@
-+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                   |
-+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: supplier.s_suppkey ASC NULLS LAST                                                                                                                                                                                |
-|               |   Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue                                                                                                        |
-|               |     Inner Join: revenue0.total_revenue = __scalar_sq_1.__value                                                                                                                                                         |
-|               |       Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue                                                                                                    |
-|               |         Inner Join: supplier.s_suppkey = revenue0.supplier_no                                                                                                                                                          |
-|               |           TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]                                                                                                                                       |
-|               |           SubqueryAlias: revenue0                                                                                                                                                                                      |
-|               |             Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue                                                                             |
-|               |               Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]   |
-|               |                 Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount                                                                                                                          |
-|               |                   Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")                                                                                                               |
-|               |                     TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]                                                                                                                |
-|               |       SubqueryAlias: __scalar_sq_1                                                                                                                                                                                     |
-|               |         Projection: MAX(revenue0.total_revenue) AS __value                                                                                                                                                             |
-|               |           Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]                                                                                                                                                |
-|               |             SubqueryAlias: revenue0                                                                                                                                                                                    |
-|               |               Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue                                                                                                              |
-|               |                 Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] |
-|               |                   Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount                                                                                                                        |
-|               |                     Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")                                                                                                             |
-|               |                       TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]                                                                                                              |
-| physical_plan | SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]                                                                                                                                                                  |
-|               |   SortExec: expr=[s_suppkey@0 ASC NULLS LAST]                                                                                                                                                                          |
-|               |     ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@4 as total_revenue]                                                              |
-|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                      |
-|               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "__value", index: 0 })]                                                                       |
-|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                  |
-|               |             RepartitionExec: partitioning=Hash([Column { name: "total_revenue", index: 4 }], 2), input_partitions=2                                                                                                    |
-|               |               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                     |
-|               |                 ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@5 as total_revenue]                                                  |
-|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                          |
-|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "supplier_no", index: 0 })]                                                           |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                      |
-|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                                            |
-|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                 |
-|               |                       ProjectionExec: expr=[l_suppkey@0 as supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]                                                             |
-|               |                         AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                    |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                  |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2                                                                                        |
-|               |                               AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                       |
-|               |                                 RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                   |
-|               |                                   ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]                                                                    |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                        |
-|               |                                       FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587                                                                                                                         |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                   |
-|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                  |
-|               |             RepartitionExec: partitioning=Hash([Column { name: "__value", index: 0 }], 2), input_partitions=1                                                                                                          |
-|               |               ProjectionExec: expr=[MAX(revenue0.total_revenue)@0 as __value]                                                                                                                                          |
-|               |                 AggregateExec: mode=Final, gby=[], aggr=[MAX(revenue0.total_revenue)]                                                                                                                                  |
-|               |                   CoalescePartitionsExec                                                                                                                                                                               |
-|               |                     AggregateExec: mode=Partial, gby=[], aggr=[MAX(revenue0.total_revenue)]                                                                                                                            |
-|               |                       ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]                                                                                         |
-|               |                         AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                    |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                  |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2                                                                                        |
-|               |                               AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                       |
-|               |                                 RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                   |
-|               |                                   ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]                                                                    |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                        |
-|               |                                       FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587                                                                                                                         |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                   |
-|               |                                                                                                                                                                                                                        |
-+---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                 |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: supplier.s_suppkey ASC NULLS LAST                                                                                                                                                                              |
+|               |   Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue                                                                                                      |
+|               |     Inner Join: revenue0.total_revenue = __scalar_sq_1.MAX(revenue0.total_revenue)                                                                                                                                   |
+|               |       Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue                                                                                                  |
+|               |         Inner Join: supplier.s_suppkey = revenue0.supplier_no                                                                                                                                                        |
+|               |           TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]                                                                                                                                     |
+|               |           SubqueryAlias: revenue0                                                                                                                                                                                    |
+|               |             Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue                                                                           |
+|               |               Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] |
+|               |                 Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount                                                                                                                        |
+|               |                   Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")                                                                                                             |
+|               |                     TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]                                                                                                              |
+|               |       SubqueryAlias: __scalar_sq_1                                                                                                                                                                                   |
+|               |         Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]]                                                                                                                                                |
+|               |           SubqueryAlias: revenue0                                                                                                                                                                                    |
+|               |             Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue                                                                                                              |
+|               |               Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] |
+|               |                 Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount                                                                                                                        |
+|               |                   Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")                                                                                                             |
+|               |                     TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate]                                                                                                              |
+| physical_plan | SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]                                                                                                                                                                |
+|               |   SortExec: expr=[s_suppkey@0 ASC NULLS LAST]                                                                                                                                                                        |
+|               |     ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@4 as total_revenue]                                                            |
+|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                    |
+|               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "MAX(revenue0.total_revenue)", index: 0 })]                                                 |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "total_revenue", index: 4 }], 2), input_partitions=2                                                                                                  |
+|               |               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                   |
+|               |                 ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@5 as total_revenue]                                                |
+|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                        |
+|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "supplier_no", index: 0 })]                                                         |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                    |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                                          |
+|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                               |
+|               |                       ProjectionExec: expr=[l_suppkey@0 as supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]                                                           |
+|               |                         AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                  |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                |
+|               |                             RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2                                                                                      |
+|               |                               AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                     |
+|               |                                 RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                 |
+|               |                                   ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]                                                                  |
+|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                      |
+|               |                                       FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587                                                                                                                       |
+|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                 |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "MAX(revenue0.total_revenue)", index: 0 }], 2), input_partitions=1                                                                                    |
+|               |               AggregateExec: mode=Final, gby=[], aggr=[MAX(revenue0.total_revenue)]                                                                                                                                  |
+|               |                 CoalescePartitionsExec                                                                                                                                                                               |
+|               |                   AggregateExec: mode=Partial, gby=[], aggr=[MAX(revenue0.total_revenue)]                                                                                                                            |
+|               |                     ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]                                                                                         |
+|               |                       AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                    |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                  |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2                                                                                        |
+|               |                             AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]                                                       |
+|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                   |
+|               |                                 ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]                                                                    |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                        |
+|               |                                     FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587                                                                                                                         |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                   |
+|               |                                                                                                                                                                                                                      |
++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/benchmarks/expected-plans/q17.txt b/benchmarks/expected-plans/q17.txt
index 9924555f6d7c0..4215c9e036409 100644
--- a/benchmarks/expected-plans/q17.txt
+++ b/benchmarks/expected-plans/q17.txt
@@ -1,49 +1,49 @@
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                      |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly                                                                                                                                                                                                                                                                                     |
-|               |   Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]                                                                                                                                                                                                                                                                                                         |
-|               |     Projection: lineitem.l_extendedprice                                                                                                                                                                                                                                                                                                                                  |
-|               |       Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.__value                                                                                                                                                                                                                                |
-|               |         Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey                                                                                                                                                                                                                                                                                         |
-|               |           Inner Join: lineitem.l_partkey = part.p_partkey                                                                                                                                                                                                                                                                                                                 |
-|               |             TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]                                                                                                                                                                                                                                                                                       |
-|               |             Projection: part.p_partkey                                                                                                                                                                                                                                                                                                                                    |
-|               |               Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")                                                                                                                                                                                                                                                                              |
-|               |                 TableScan: part projection=[p_partkey, p_brand, p_container]                                                                                                                                                                                                                                                                                              |
-|               |         SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                      |
-|               |           Projection: lineitem.l_partkey, CAST(Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)) AS __value                                                                                                                                                                                                                                 |
-|               |             Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]                                                                                                                                                                                                                                                                                  |
-|               |               TableScan: lineitem projection=[l_partkey, l_quantity]                                                                                                                                                                                                                                                                                                      |
-| physical_plan | ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly]                                                                                                                                                                                                                                                                                 |
-|               |   AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice)]                                                                                                                                                                                                                                                                                                 |
-|               |     CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                                                |
-|               |       AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)]                                                                                                                                                                                                                                                                                           |
-|               |         ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice]                                                                                                                                                                                                                                                                                                       |
-|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                     |
-|               |             HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 2 }, Column { name: "l_partkey", index: 0 })], filter=BinaryExpr { left: CastExpr { expr: Column { name: "l_quantity", index: 0 }, cast_type: Decimal128(30, 15), cast_options: CastOptions { safe: false } }, op: Lt, right: Column { name: "__value", index: 1 } } |
-|               |               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                 |
-|               |                 RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 2 }], 2), input_partitions=2                                                                                                                                                                                                                                                       |
-|               |                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                    |
-|               |                     ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@3 as p_partkey]                                                                                                                                                                                                                                     |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                         |
-|               |                         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })]                                                                                                                                                                                                            |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                     |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                           |
-|               |                               MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                     |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                           |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                        |
-|               |                                 ProjectionExec: expr=[p_partkey@0 as p_partkey]                                                                                                                                                                                                                                                                                           |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                             |
-|               |                                     FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX                                                                                                                                                                                                                                                                          |
-|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                        |
-|               |               ProjectionExec: expr=[l_partkey@0 as l_partkey, CAST(0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as __value]                                                                                                                                                                                                                   |
-|               |                 AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)]                                                                                                                                                                                                                                                     |
-|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                             |
-|               |                     RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                   |
-|               |                       RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                |
-|               |                         AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)]                                                                                                                                                                                                                                                      |
-|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                    |
-|               |                                                                                                                                                                                                                                                                                                                                                                           |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                      |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly                                                                                                                                                                                                                                                                                                                     |
+|               |   Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]                                                                                                                                                                                                                                                                                                                                         |
+|               |     Projection: lineitem.l_extendedprice                                                                                                                                                                                                                                                                                                                                                                  |
+|               |       Inner Join: part.p_partkey = __scalar_sq_1.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_1.Float64(0.2) * AVG(lineitem.l_quantity)                                                                                                                                                                                                                                |
+|               |         Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey                                                                                                                                                                                                                                                                                                                         |
+|               |           Inner Join: lineitem.l_partkey = part.p_partkey                                                                                                                                                                                                                                                                                                                                                 |
+|               |             TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]                                                                                                                                                                                                                                                                                                                       |
+|               |             Projection: part.p_partkey                                                                                                                                                                                                                                                                                                                                                                    |
+|               |               Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")                                                                                                                                                                                                                                                                                                              |
+|               |                 TableScan: part projection=[p_partkey, p_brand, p_container]                                                                                                                                                                                                                                                                                                                              |
+|               |         SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                                                      |
+|               |           Projection: CAST(Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)), lineitem.l_partkey                                                                                                                                                                                                                                                                            |
+|               |             Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]                                                                                                                                                                                                                                                                                                                  |
+|               |               TableScan: lineitem projection=[l_partkey, l_quantity]                                                                                                                                                                                                                                                                                                                                      |
+| physical_plan | ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly]                                                                                                                                                                                                                                                                                                                 |
+|               |   AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice)]                                                                                                                                                                                                                                                                                                                                 |
+|               |     CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                                                                                |
+|               |       AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)]                                                                                                                                                                                                                                                                                                                           |
+|               |         ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice]                                                                                                                                                                                                                                                                                                                                       |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                     |
+|               |             HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 2 }, Column { name: "l_partkey", index: 1 })], filter=BinaryExpr { left: CastExpr { expr: Column { name: "l_quantity", index: 0 }, cast_type: Decimal128(30, 15), cast_options: CastOptions { safe: false } }, op: Lt, right: Column { name: "Float64(0.2) * AVG(lineitem.l_quantity)", index: 1 } } |
+|               |               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                 |
+|               |                 RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 2 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                       |
+|               |                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                    |
+|               |                     ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@3 as p_partkey]                                                                                                                                                                                                                                                                     |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                         |
+|               |                         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })]                                                                                                                                                                                                                                            |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                     |
+|               |                             RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                           |
+|               |                               MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                     |
+|               |                             RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                           |
+|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                        |
+|               |                                 ProjectionExec: expr=[p_partkey@0 as p_partkey]                                                                                                                                                                                                                                                                                                                           |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                             |
+|               |                                     FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX                                                                                                                                                                                                                                                                                                          |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                        |
+|               |               ProjectionExec: expr=[CAST(0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * AVG(lineitem.l_quantity), l_partkey@0 as l_partkey]                                                                                                                                                                                                                   |
+|               |                 AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                     |
+|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                             |
+|               |                     RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                   |
+|               |                       RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                |
+|               |                         AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                      |
+|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                    |
+|               |                                                                                                                                                                                                                                                                                                                                                                                                           |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q2.txt b/benchmarks/expected-plans/q2.txt
index c503bd2e0b713..bb8d8930a30bb 100644
--- a/benchmarks/expected-plans/q2.txt
+++ b/benchmarks/expected-plans/q2.txt
@@ -3,7 +3,7 @@
 +---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | logical_plan  | Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST                                                                                                                                                                    |
 |               |   Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment                                                                                                                                                   |
-|               |     Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.__value                                                                                                                                                                                                 |
+|               |     Inner Join: part.p_partkey = __scalar_sq_1.ps_partkey, partsupp.ps_supplycost = __scalar_sq_1.MIN(partsupp.ps_supplycost)                                                                                                                                                                             |
 |               |       Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name                                                                                                                       |
 |               |         Inner Join: nation.n_regionkey = region.r_regionkey                                                                                                                                                                                                                                               |
 |               |           Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey                                                                                               |
@@ -22,7 +22,7 @@
 |               |             Filter: region.r_name = Utf8("EUROPE")                                                                                                                                                                                                                                                        |
 |               |               TableScan: region projection=[r_regionkey, r_name]                                                                                                                                                                                                                                          |
 |               |       SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                        |
-|               |         Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value                                                                                                                                                                                                                           |
+|               |         Projection: MIN(partsupp.ps_supplycost), partsupp.ps_partkey                                                                                                                                                                                                                                      |
 |               |           Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]                                                                                                                                                                                                                |
 |               |             Projection: partsupp.ps_partkey, partsupp.ps_supplycost                                                                                                                                                                                                                                       |
 |               |               Inner Join: nation.n_regionkey = region.r_regionkey                                                                                                                                                                                                                                         |
@@ -40,7 +40,7 @@
 |               |   SortExec: expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST]                                                                                                                                                                                            |
 |               |     ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@8 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]                                                                                       |
 |               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                         |
-|               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 }), (Column { name: "ps_supplycost", index: 7 }, Column { name: "__value", index: 1 })]                                                                       |
+|               |         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 1 }), (Column { name: "ps_supplycost", index: 7 }, Column { name: "MIN(partsupp.ps_supplycost)", index: 0 })]                                                   |
 |               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                     |
 |               |             RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 2), input_partitions=2                                                                                                                                               |
 |               |               ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@2 as s_name, s_address@3 as s_address, s_phone@4 as s_phone, s_acctbal@5 as s_acctbal, s_comment@6 as s_comment, ps_supplycost@7 as ps_supplycost, n_name@8 as n_name]                                           |
@@ -85,8 +85,8 @@
 |               |                               FilterExec: r_name@1 = EUROPE                                                                                                                                                                                                                                               |
 |               |                                 MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                              |
 |               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                     |
-|               |             RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "__value", index: 1 }], 2), input_partitions=2                                                                                                                                                    |
-|               |               ProjectionExec: expr=[ps_partkey@0 as ps_partkey, MIN(partsupp.ps_supplycost)@1 as __value]                                                                                                                                                                                                 |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "MIN(partsupp.ps_supplycost)", index: 0 }], 2), input_partitions=2                                                                                                                                |
+|               |               ProjectionExec: expr=[MIN(partsupp.ps_supplycost)@1 as MIN(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]                                                                                                                                                                             |
 |               |                 AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)]                                                                                                                                                                                |
 |               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                             |
 |               |                     RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                  |
diff --git a/benchmarks/expected-plans/q20.txt b/benchmarks/expected-plans/q20.txt
index 41f2dac58300d..683eba24946c2 100644
--- a/benchmarks/expected-plans/q20.txt
+++ b/benchmarks/expected-plans/q20.txt
@@ -1,80 +1,80 @@
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: supplier.s_name ASC NULLS LAST                                                                                                                                                                                                                                                                                                                                                                                                                      |
-|               |   Projection: supplier.s_name, supplier.s_address                                                                                                                                                                                                                                                                                                                                                                                                         |
-|               |     LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey                                                                                                                                                                                                                                                                                                                                                                                      |
-|               |       Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address                                                                                                                                                                                                                                                                                                                                                                                 |
-|               |         Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                             |
-|               |           TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]                                                                                                                                                                                                                                                                                                                                                                      |
-|               |           Projection: nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                  |
-|               |             Filter: nation.n_name = Utf8("CANADA")                                                                                                                                                                                                                                                                                                                                                                                                        |
-|               |               TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                                                                                                                                                                                                                                                          |
-|               |       SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|               |         Projection: partsupp.ps_suppkey                                                                                                                                                                                                                                                                                                                                                                                                                   |
-|               |           Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.__value                                                                                                                                                                                                                                                                  |
-|               |             LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey                                                                                                                                                                                                                                                                                                                                                                              |
-|               |               TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]                                                                                                                                                                                                                                                                                                                                                                        |
-|               |               SubqueryAlias: __correlated_sq_2                                                                                                                                                                                                                                                                                                                                                                                                            |
-|               |                 Projection: part.p_partkey                                                                                                                                                                                                                                                                                                                                                                                                                |
-|               |                   Filter: part.p_name LIKE Utf8("forest%")                                                                                                                                                                                                                                                                                                                                                                                                |
-|               |                     TableScan: part projection=[p_partkey, p_name]                                                                                                                                                                                                                                                                                                                                                                                        |
-|               |             SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                  |
-|               |               Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value                                                                                                                                                                                                                                                                                                                     |
-|               |                 Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]                                                                                                                                                                                                                                                                                                                                          |
-|               |                   Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity                                                                                                                                                                                                                                                                                                                                                                 |
-|               |                     Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")                                                                                                                                                                                                                                                                                                                                                |
-|               |                       TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate]                                                                                                                                                                                                                                                                                                                                                       |
-| physical_plan | SortPreservingMergeExec: [s_name@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                        |
-|               |   SortExec: expr=[s_name@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                                |
-|               |     ProjectionExec: expr=[s_name@1 as s_name, s_address@2 as s_address]                                                                                                                                                                                                                                                                                                                                                                                   |
-|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                         |
-|               |         HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })]                                                                                                                                                                                                                                                                                                        |
-|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                     |
-|               |             RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                           |
-|               |               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                        |
-|               |                 ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address]                                                                                                                                                                                                                                                                                                                                             |
-|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                             |
-|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]                                                                                                                                                                                                                                                                                            |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                         |
-|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                             |
-|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                    |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                         |
-|               |                         RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                             |
-|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                            |
-|               |                             ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                                                                                                                                                                                                                                                           |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                 |
-|               |                                 FilterExec: n_name@1 = CANADA                                                                                                                                                                                                                                                                                                                                                                                             |
-|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                            |
-|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                     |
-|               |             RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                          |
-|               |               ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey]                                                                                                                                                                                                                                                                                                                                                                                           |
-|               |                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                               |
-|               |                   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 0 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 1 })], filter=BinaryExpr { left: CastExpr { expr: Column { name: "ps_availqty", index: 0 }, cast_type: Float64, cast_options: CastOptions { safe: false } }, op: Gt, right: Column { name: "__value", index: 1 } } |
-|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                           |
-|               |                       RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                       |
-|               |                         RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                              |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                     |
-|               |                             HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })]                                                                                                                                                                                                                                                                                    |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                 |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                      |
-|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                            |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                 |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                       |
-|               |                                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                    |
-|               |                                     ProjectionExec: expr=[p_partkey@0 as p_partkey]                                                                                                                                                                                                                                                                                                                                                                       |
-|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                         |
-|               |                                         FilterExec: p_name@1 LIKE forest%                                                                                                                                                                                                                                                                                                                                                                                 |
-|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                    |
-|               |                     ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as __value]                                                                                                                                                                                                                                                                                               |
-|               |                       AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                                     |
-|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                       |
-|               |                           RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                     |
-|               |                             AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                                        |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                        |
-|               |                                 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity]                                                                                                                                                                                                                                                                                                                     |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                             |
-|               |                                     FilterExec: l_shipdate@3 >= 8766 AND l_shipdate@3 < 9131                                                                                                                                                                                                                                                                                                                                                              |
-|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                        |
-|               |                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: supplier.s_name ASC NULLS LAST                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |   Projection: supplier.s_name, supplier.s_address                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |     LeftSemi Join: supplier.s_suppkey = __correlated_sq_1.ps_suppkey                                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |       Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |         Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |           TableScan: supplier projection=[s_suppkey, s_name, s_address, s_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |           Projection: nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+|               |             Filter: nation.n_name = Utf8("CANADA")                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |               TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |       SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |         Projection: partsupp.ps_suppkey                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |           Inner Join: partsupp.ps_partkey = __scalar_sq_1.l_partkey, partsupp.ps_suppkey = __scalar_sq_1.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_1.Float64(0.5) * SUM(lineitem.l_quantity)                                                                                                                                                                                                                                                                  |
+|               |             LeftSemi Join: partsupp.ps_partkey = __correlated_sq_2.p_partkey                                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |               TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |               SubqueryAlias: __correlated_sq_2                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                 Projection: part.p_partkey                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                   Filter: part.p_name LIKE Utf8("forest%")                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                     TableScan: part projection=[p_partkey, p_name]                                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |             SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+|               |               Projection: Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey                                                                                                                                                                                                                                                                                                                                                                |
+|               |                 Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]                                                                                                                                                                                                                                                                                                                                                                          |
+|               |                   Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                     Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                       TableScan: lineitem projection=[l_partkey, l_suppkey, l_quantity, l_shipdate]                                                                                                                                                                                                                                                                                                                                                                                       |
+| physical_plan | SortPreservingMergeExec: [s_name@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |   SortExec: expr=[s_name@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |     ProjectionExec: expr=[s_name@1 as s_name, s_address@2 as s_address]                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |         HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "ps_suppkey", index: 0 })]                                                                                                                                                                                                                                                                                                                                        |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                           |
+|               |               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                 ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address]                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]                                                                                                                                                                                                                                                                                                                            |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                             |
+|               |                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                             |
+|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                             ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                 FilterExec: n_name@1 = CANADA                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                          |
+|               |               ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey]                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |                   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 1 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 2 })], filter=BinaryExpr { left: CastExpr { expr: Column { name: "ps_availqty", index: 0 }, cast_type: Float64, cast_options: CastOptions { safe: false } }, op: Gt, right: Column { name: "Float64(0.5) * SUM(lineitem.l_quantity)", index: 1 } } |
+|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |                       RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                       |
+|               |                         RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                             HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })]                                                                                                                                                                                                                                                                                                                    |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                 RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                      |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                 RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |                                     ProjectionExec: expr=[p_partkey@0 as p_partkey]                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                                         FilterExec: p_name@1 LIKE forest%                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |                     ProjectionExec: expr=[0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * SUM(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]                                                                                                                                                                                                                                                                                               |
+|               |                       AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                                                                     |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                     |
+|               |                             AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)]                                                                                                                                                                                                                                                                                                                                        |
+|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                                 ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, l_quantity@2 as l_quantity]                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                                     FilterExec: l_shipdate@3 >= 8766 AND l_shipdate@3 < 9131                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q22.txt b/benchmarks/expected-plans/q22.txt
index a84830aceafe7..16aebfe90abf3 100644
--- a/benchmarks/expected-plans/q22.txt
+++ b/benchmarks/expected-plans/q22.txt
@@ -1,57 +1,55 @@
-+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                                                                                                                                            |
-+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: custsale.cntrycode ASC NULLS LAST                                                                                                                                                                                                                                                                                                         |
-|               |   Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal                                                                                                                                                                                                                                             |
-|               |     Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]                                                                                                                                                                                                                                                |
-|               |       SubqueryAlias: custsale                                                                                                                                                                                                                                                                                                                   |
-|               |         Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal                                                                                                                                                                                                                                               |
-|               |           Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.__value                                                                                                                                                                                                                                                         |
-|               |             CrossJoin:                                                                                                                                                                                                                                                                                                                          |
-|               |               Projection: customer.c_phone, customer.c_acctbal                                                                                                                                                                                                                                                                                  |
-|               |                 LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey                                                                                                                                                                                                                                                                 |
-|               |                   Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                                                                                |
-|               |                     TableScan: customer projection=[c_custkey, c_phone, c_acctbal]                                                                                                                                                                                                                                                              |
-|               |                   SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                              |
-|               |                     TableScan: orders projection=[o_custkey]                                                                                                                                                                                                                                                                                    |
-|               |               SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                      |
-|               |                 Projection: AVG(customer.c_acctbal) AS __value                                                                                                                                                                                                                                                                                  |
-|               |                   Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]                                                                                                                                                                                                                                                                     |
-|               |                     Projection: customer.c_acctbal                                                                                                                                                                                                                                                                                              |
-|               |                       Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                          |
-|               |                         TableScan: customer projection=[c_phone, c_acctbal]                                                                                                                                                                                                                                                                     |
-| physical_plan | SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                           |
-|               |   SortExec: expr=[cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                   |
-|               |     ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal]                                                                                                                                                                                                                      |
-|               |       AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                     |
-|               |         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                             |
-|               |           RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2), input_partitions=1                                                                                                                                                                                                                                   |
-|               |             AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                        |
-|               |               ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                             |
-|               |                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                     |
-|               |                   FilterExec: CAST(c_acctbal@1 AS Decimal128(19, 6)) > __value@2                                                                                                                                                                                                                                                                |
-|               |                     CrossJoinExec                                                                                                                                                                                                                                                                                                               |
-|               |                       CoalescePartitionsExec                                                                                                                                                                                                                                                                                                    |
-|               |                         ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]                                                                                                                                                                                                                                                   |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                           |
-|               |                             HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]                                                                                                                                                                           |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                       |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                             |
-|               |                                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                          |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                 |
-|               |                                       FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])                              |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                            |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                       |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                             |
-|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                  |
-|               |                       ProjectionExec: expr=[AVG(customer.c_acctbal)@0 as __value]                                                                                                                                                                                                                                                               |
-|               |                         AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                       |
-|               |                           CoalescePartitionsExec                                                                                                                                                                                                                                                                                                |
-|               |                             AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                 |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                              |
-|               |                                 ProjectionExec: expr=[c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                                                 |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                   |
-|               |                                     FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) |
-|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                              |
-|               |                                                                                                                                                                                                                                                                                                                                                 |
-+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                          |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: custsale.cntrycode ASC NULLS LAST                                                                                                                                                                                                                                                                                                       |
+|               |   Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal                                                                                                                                                                                                                                           |
+|               |     Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]                                                                                                                                                                                                                                              |
+|               |       SubqueryAlias: custsale                                                                                                                                                                                                                                                                                                                 |
+|               |         Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal                                                                                                                                                                                                                                             |
+|               |           Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.AVG(customer.c_acctbal)                                                                                                                                                                                                                                       |
+|               |             CrossJoin:                                                                                                                                                                                                                                                                                                                        |
+|               |               Projection: customer.c_phone, customer.c_acctbal                                                                                                                                                                                                                                                                                |
+|               |                 LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey                                                                                                                                                                                                                                                               |
+|               |                   Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                                                                              |
+|               |                     TableScan: customer projection=[c_custkey, c_phone, c_acctbal]                                                                                                                                                                                                                                                            |
+|               |                   SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                            |
+|               |                     TableScan: orders projection=[o_custkey]                                                                                                                                                                                                                                                                                  |
+|               |               SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                    |
+|               |                 Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]                                                                                                                                                                                                                                                                     |
+|               |                   Projection: customer.c_acctbal                                                                                                                                                                                                                                                                                              |
+|               |                     Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                          |
+|               |                       TableScan: customer projection=[c_phone, c_acctbal]                                                                                                                                                                                                                                                                     |
+| physical_plan | SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                         |
+|               |   SortExec: expr=[cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                 |
+|               |     ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal]                                                                                                                                                                                                                    |
+|               |       AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                   |
+|               |         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                           |
+|               |           RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2), input_partitions=1                                                                                                                                                                                                                                 |
+|               |             AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                      |
+|               |               ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                           |
+|               |                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                   |
+|               |                   FilterExec: CAST(c_acctbal@1 AS Decimal128(19, 6)) > AVG(customer.c_acctbal)@2                                                                                                                                                                                                                                              |
+|               |                     CrossJoinExec                                                                                                                                                                                                                                                                                                             |
+|               |                       CoalescePartitionsExec                                                                                                                                                                                                                                                                                                  |
+|               |                         ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]                                                                                                                                                                                                                                                 |
+|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                         |
+|               |                             HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]                                                                                                                                                                         |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                     |
+|               |                                 RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                           |
+|               |                                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                        |
+|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                               |
+|               |                                       FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])                            |
+|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                          |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                     |
+|               |                                 RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                           |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                |
+|               |                       AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                       |
+|               |                         CoalescePartitionsExec                                                                                                                                                                                                                                                                                                |
+|               |                           AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                 |
+|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                              |
+|               |                               ProjectionExec: expr=[c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                                                 |
+|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                   |
+|               |                                   FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) |
+|               |                                     MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                              |
+|               |                                                                                                                                                                                                                                                                                                                                               |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 57343ea95c6df..273dc91b76714 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -2028,7 +2028,7 @@ async fn subquery_to_join_with_both_side_expr() -> Result<()> {
         "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2072,7 +2072,7 @@ async fn subquery_to_join_with_muti_filter() -> Result<()> {
         "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N]",
         "        Filter: t2.t2_int > UInt32(0) [t2_id:UInt32;N, t2_int:UInt32;N]",
         "          TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
     ];
@@ -2116,7 +2116,7 @@ async fn three_projection_exprs_subquery_to_join() -> Result<()> {
         "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int AND t1.t1_name != __correlated_sq_1.t2_name [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
         "        Filter: t2.t2_int > UInt32(0) [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
         "          TableScan: t2 projection=[t2_id, t2_name, t2_int] [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
     ];
@@ -2160,7 +2160,7 @@ async fn in_subquery_to_join_with_correlated_outer_filter() -> Result<()> {
         "    Filter: t1.t1_int > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2189,7 +2189,7 @@ async fn not_in_subquery_to_join_with_correlated_outer_filter() -> Result<()> {
         "  LeftAnti Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2220,7 +2220,7 @@ async fn in_subquery_to_join_with_outer_filter() -> Result<()> {
         "    Filter: t1.t1_id > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
         "        TableScan: t2 projection=[t2_id, t2_name, t2_int] [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
     ];
 
@@ -2267,10 +2267,10 @@ async fn two_in_subquery_to_join_with_outer_filter() -> Result<()> {
         "      Filter: t1.t1_id > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "        TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "        Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "        Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
         "          TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
         "    SubqueryAlias: __correlated_sq_2 [CAST(t2_int AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_int AS Int64) + Int64(1) AS CAST(t2_int AS Int64) + Int64(1) [CAST(t2_int AS Int64) + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_int AS Int64) + Int64(1) AS t2.t2_int + Int64(1) AS CAST(t2_int AS Int64) + Int64(1) [CAST(t2_int AS Int64) + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
     ];
 
@@ -2579,8 +2579,10 @@ async fn exists_distinct_subquery_to_join() -> Result<()> {
             "  LeftAnti Join:  Filter: CAST(t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    SubqueryAlias: __correlated_sq_1 [t2_id:UInt32;N]",
-            "      Aggregate: groupBy=[[t2.t2_id]], aggr=[[]] [t2_id:UInt32;N]",
-            "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+            "      Projection: t2.t2_id [t2_id:UInt32;N]",
+            "        Aggregate: groupBy=[[t2.t2_int, t2.t2_id]], aggr=[[]] [t2_int:UInt32;N, t2_id:UInt32;N]",
+            "          Projection: t2.t2_int, t2.t2_id [t2_int:UInt32;N, t2_id:UInt32;N]",
+            "            TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
         ];
         let formatted = plan.display_indent_schema().to_string();
         let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -2620,8 +2622,10 @@ async fn exists_distinct_subquery_to_join_with_expr() -> Result<()> {
             "  LeftAnti Join:  Filter: CAST(t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    SubqueryAlias: __correlated_sq_1 [t2_id:UInt32;N]",
-            "      Aggregate: groupBy=[[t2.t2_id]], aggr=[[]] [t2_id:UInt32;N]",
-            "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+            "      Projection: t2.t2_id [t2_id:UInt32;N]",
+            "        Aggregate: groupBy=[[t2.t2_id + t2.t2_int, t2.t2_int, t2.t2_id]], aggr=[[]] [t2.t2_id + t2.t2_int:UInt32;N, t2_int:UInt32;N, t2_id:UInt32;N]",
+            "          Projection: t2.t2_id + t2.t2_int, t2.t2_int, t2.t2_id [t2.t2_id + t2.t2_int:UInt32;N, t2_int:UInt32;N, t2_id:UInt32;N]",
+            "            TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
         ];
         let formatted = plan.display_indent_schema().to_string();
         let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -2661,8 +2665,10 @@ async fn exists_distinct_subquery_to_join_with_literal() -> Result<()> {
             "  LeftAnti Join:  Filter: CAST(t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_1.t2_id AS Int64) * Int64(2) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
             "    SubqueryAlias: __correlated_sq_1 [t2_id:UInt32;N]",
-            "      Aggregate: groupBy=[[t2.t2_id]], aggr=[[]] [t2_id:UInt32;N]",
-            "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+            "      Projection: t2.t2_id [t2_id:UInt32;N]",
+            "        Aggregate: groupBy=[[Int64(1), t2.t2_int, t2.t2_id]], aggr=[[]] [Int64(1):Int64, t2_int:UInt32;N, t2_id:UInt32;N]",
+            "          Projection: Int64(1), t2.t2_int, t2.t2_id [Int64(1):Int64, t2_int:UInt32;N, t2_id:UInt32;N]",
+            "            TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
         ];
         let formatted = plan.display_indent_schema().to_string();
         let actual: Vec<&str> = formatted.trim().lines().collect();
diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index 640628e0b5006..5b55b2e031ca4 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -52,16 +52,16 @@ where c_acctbal < (
     let actual = format!("{}", plan.display_indent());
     let expected =  "Sort: customer.c_custkey ASC NULLS LAST\
     \n  Projection: customer.c_custkey\
-    \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey Filter: CAST(customer.c_acctbal AS Decimal128(25, 2)) < __scalar_sq_1.__value\
+    \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey Filter: CAST(customer.c_acctbal AS Decimal128(25, 2)) < __scalar_sq_1.SUM(orders.o_totalprice)\
     \n      TableScan: customer projection=[c_custkey, c_acctbal]\
     \n      SubqueryAlias: __scalar_sq_1\
-    \n        Projection: orders.o_custkey, SUM(orders.o_totalprice) AS __value\
+    \n        Projection: SUM(orders.o_totalprice), orders.o_custkey\
     \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[SUM(orders.o_totalprice)]]\
     \n            Projection: orders.o_custkey, orders.o_totalprice\
-    \n              Inner Join: orders.o_orderkey = __scalar_sq_2.l_orderkey Filter: CAST(orders.o_totalprice AS Decimal128(25, 2)) < __scalar_sq_2.__value\
+    \n              Inner Join: orders.o_orderkey = __scalar_sq_2.l_orderkey Filter: CAST(orders.o_totalprice AS Decimal128(25, 2)) < __scalar_sq_2.price\
     \n                TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice]\
     \n                SubqueryAlias: __scalar_sq_2\
-    \n                  Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice) AS price AS __value\
+    \n                  Projection: SUM(lineitem.l_extendedprice) AS price, lineitem.l_orderkey\
     \n                    Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_extendedprice)]]\
     \n                      TableScan: lineitem projection=[l_orderkey, l_extendedprice]";
     assert_eq!(actual, expected);
@@ -337,13 +337,12 @@ async fn non_aggregated_correlated_scalar_subquery_with_single_row() -> Result<(
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: t1.t1_id, (<subquery>) AS t2_int [t1_id:UInt32;N, t2_int:Int64]",
-        "  Subquery: [a:Int64]",
-        "    Projection: a [a:Int64]",
-        "      Filter: a = CAST(outer_ref(t1.t1_int) AS Int64) [a:Int64]",
-        "        Projection: Int64(1) AS a [a:Int64]",
-        "          EmptyRelation []",
-        "  TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "Projection: t1.t1_id, __scalar_sq_5.a AS t2_int [t1_id:UInt32;N, t2_int:Int64;N]",
+        "  Left Join: CAST(t1.t1_int AS Int64) = __scalar_sq_5.a [t1_id:UInt32;N, t1_int:UInt32;N, a:Int64;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_5 [a:Int64]",
+        "      Projection: Int64(1) AS a [a:Int64]",
+        "        EmptyRelation []",
     ];
     let formatted = plan.display_indent_schema().to_string();
     let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -352,6 +351,21 @@ async fn non_aggregated_correlated_scalar_subquery_with_single_row() -> Result<(
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
+    // TODO infer nullability in the schema has bug
+    // // assert data
+    // let results = execute_to_batches(&ctx, sql).await;
+    // let expected = vec![
+    //     "+-------+--------+",
+    //     "| t1_id | t2_int |",
+    //     "+-------+--------+",
+    //     "| 22    |        |",
+    //     "| 33    |        |",
+    //     "| 11    | 1      |",
+    //     "| 44    |        |",
+    //     "+-------+--------+",
+    // ];
+    // assert_batches_eq!(expected, &results);
+
     Ok(())
 }
 
@@ -382,11 +396,11 @@ async fn aggregated_correlated_scalar_subquery() -> Result<()> {
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: t1.t1_id, __scalar_sq_1.__value AS t2_sum [t1_id:UInt32;N, t2_sum:UInt64;N]",
-        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, t2_id:UInt32;N, __value:UInt64;N]",
+        "Projection: t1.t1_id, __scalar_sq_1.SUM(t2.t2_int) AS t2_sum [t1_id:UInt32;N, t2_sum:UInt64;N]",
+        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
         "    TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
-        "    SubqueryAlias: __scalar_sq_1 [t2_id:UInt32;N, __value:UInt64;N]",
-        "      Projection: t2.t2_id, SUM(t2.t2_int) AS __value [t2_id:UInt32;N, __value:UInt64;N]",
+        "    SubqueryAlias: __scalar_sq_1 [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "      Projection: SUM(t2.t2_int), t2.t2_id [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
         "        Aggregate: groupBy=[[t2.t2_id]], aggr=[[SUM(t2.t2_int)]] [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
         "          TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
     ];
@@ -397,6 +411,105 @@ async fn aggregated_correlated_scalar_subquery() -> Result<()> {
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+--------+",
+        "| t1_id | t2_sum |",
+        "+-------+--------+",
+        "| 11    | 3      |",
+        "| 22    | 1      |",
+        "| 44    | 3      |",
+        "| 33    |        |",
+        "+-------+--------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn aggregated_correlated_scalar_subquery_with_having() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id having sum(t2_int) < 3) as t2_sum from t1";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, __scalar_sq_1.SUM(t2.t2_int) AS t2_sum [t1_id:UInt32;N, t2_sum:UInt64;N]",
+        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "    TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "      Projection: SUM(t2.t2_int), t2.t2_id [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "        Filter: SUM(t2.t2_int) < UInt64(3) [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
+        "          Aggregate: groupBy=[[t2.t2_id]], aggr=[[SUM(t2.t2_int)]] [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
+        "            TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+--------+",
+        "| t1_id | t2_sum |",
+        "+-------+--------+",
+        "| 22    | 1      |",
+        "| 11    |        |",
+        "| 33    |        |",
+        "| 44    |        |",
+        "+-------+--------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn aggregated_correlated_scalar_subquery_with_cast() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, __scalar_sq_1.SUM(t2.t2_int * Float64(1)) + Int64(1) AS t2_sum [t1_id:UInt32;N, t2_sum:Float64;N]",
+        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, SUM(t2.t2_int * Float64(1)) + Int64(1):Float64;N, t2_id:UInt32;N]",
+        "    TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [SUM(t2.t2_int * Float64(1)) + Int64(1):Float64;N, t2_id:UInt32;N]",
+        "      Projection: SUM(t2.t2_int * Float64(1)) + Float64(1) AS SUM(t2.t2_int * Float64(1)) + Int64(1), t2.t2_id [SUM(t2.t2_int * Float64(1)) + Int64(1):Float64;N, t2_id:UInt32;N]",
+        "        Aggregate: groupBy=[[t2.t2_id]], aggr=[[SUM(CAST(t2.t2_int AS Float64)) AS SUM(t2.t2_int * Float64(1))]] [t2_id:UInt32;N, SUM(t2.t2_int * Float64(1)):Float64;N]",
+        "          TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+--------+",
+        "| t1_id | t2_sum |",
+        "+-------+--------+",
+        "| 11    | 4.0    |",
+        "| 22    | 2.0    |",
+        "| 44    | 4.0    |",
+        "| 33    |        |",
+        "+-------+--------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
     Ok(())
 }
 
@@ -429,12 +542,12 @@ async fn aggregated_correlated_scalar_subquery_with_extra_group_by_constant() ->
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: t1.t1_id, __scalar_sq_1.__value AS t2_sum [t1_id:UInt32;N, t2_sum:UInt64;N]",
-        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, t2_id:UInt32;N, __value:UInt64;N]",
+        "Projection: t1.t1_id, __scalar_sq_1.SUM(t2.t2_int) AS t2_sum [t1_id:UInt32;N, t2_sum:UInt64;N]",
+        "  Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
         "    TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
-        "    SubqueryAlias: __scalar_sq_1 [t2_id:UInt32;N, __value:UInt64;N]",
-        "      Projection: t2.t2_id, SUM(t2.t2_int) AS __value [t2_id:UInt32;N, __value:UInt64;N]",
-        "        Aggregate: groupBy=[[t2.t2_id]], aggr=[[SUM(t2.t2_int)]] [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
+        "    SubqueryAlias: __scalar_sq_1 [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "      Projection: SUM(t2.t2_int), t2.t2_id [SUM(t2.t2_int):UInt64;N, t2_id:UInt32;N]",
+        "        Aggregate: groupBy=[[t2.t2_id, Utf8(\"a\")]], aggr=[[SUM(t2.t2_int)]] [t2_id:UInt32;N, Utf8(\"a\"):Utf8, SUM(t2.t2_int):UInt64;N]",
         "          TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
     ];
     let formatted = plan.display_indent_schema().to_string();
@@ -444,6 +557,20 @@ async fn aggregated_correlated_scalar_subquery_with_extra_group_by_constant() ->
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+--------+",
+        "| t1_id | t2_sum |",
+        "+-------+--------+",
+        "| 11    | 3      |",
+        "| 22    | 1      |",
+        "| 44    | 3      |",
+        "| 33    |        |",
+        "+-------+--------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
     Ok(())
 }
 
@@ -549,7 +676,7 @@ async fn support_join_correlated_columns() -> Result<()> {
 }
 
 #[tokio::test]
-async fn support_join_correlated_columns2() -> Result<()> {
+async fn subquery_contains_join_contains_correlated_columns() -> Result<()> {
     let ctx = create_sub_query_join_context("t0_id", "t1_id", "t2_id", true)?;
     let sql = "SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN (select * from t2 where t2.t2_name = t0.t0_name) as t2 ON(t1.t1_id = t2.t2_id ))";
     let msg = format!("Creating logical plan for '{sql}'");
@@ -557,16 +684,44 @@ async fn support_join_correlated_columns2() -> Result<()> {
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Filter: EXISTS (<subquery>) [t0_id:UInt32;N, t0_name:Utf8;N]",
-        "  Subquery: [Int64(1):Int64]",
-        "    Projection: Int64(1) [Int64(1):Int64]",
-        "      Inner Join:  Filter: t1.t1_id = t2.t2_id [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N, t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "        TableScan: t1 [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "        SubqueryAlias: t2 [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "          Projection: t2.t2_id, t2.t2_name, t2.t2_int [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "            Filter: t2.t2_name = outer_ref(t0.t0_name) [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "              TableScan: t2 [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
+        "LeftSemi Join: t0.t0_name = __correlated_sq_1.t2_name [t0_id:UInt32;N, t0_name:Utf8;N]",
+        "  TableScan: t0 projection=[t0_id, t0_name] [t0_id:UInt32;N, t0_name:Utf8;N]",
+        "  SubqueryAlias: __correlated_sq_1 [t2_name:Utf8;N]",
+        "    Projection: t2.t2_name [t2_name:Utf8;N]",
+        "      Inner Join: t1.t1_id = t2.t2_id [t1_id:UInt32;N, t2_id:UInt32;N, t2_name:Utf8;N]",
+        "        TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "        SubqueryAlias: t2 [t2_id:UInt32;N, t2_name:Utf8;N]",
+        "          TableScan: t2 projection=[t2_id, t2_name] [t2_id:UInt32;N, t2_name:Utf8;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn subquery_contains_join_contains_sub_query_alias_correlated_columns() -> Result<()>
+{
+    let ctx = create_sub_query_join_context("t0_id", "t1_id", "t2_id", true)?;
+    let sql = "SELECT t0_id, t0_name FROM t0 WHERE EXISTS (select 1 from (SELECT * FROM t1 where t1.t1_id = t0.t0_id) as x INNER JOIN (select * from t2 where t2.t2_name = t0.t0_name) as y ON(x.t1_id = y.t2_id))";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "LeftSemi Join: t0.t0_id = __correlated_sq_1.t1_id, t0.t0_name = __correlated_sq_1.t2_name [t0_id:UInt32;N, t0_name:Utf8;N]",
         "  TableScan: t0 projection=[t0_id, t0_name] [t0_id:UInt32;N, t0_name:Utf8;N]",
+        "  SubqueryAlias: __correlated_sq_1 [t1_id:UInt32;N, t2_name:Utf8;N]",
+        "    Projection: x.t1_id, y.t2_name [t1_id:UInt32;N, t2_name:Utf8;N]",
+        "      Inner Join: x.t1_id = y.t2_id [t1_id:UInt32;N, t2_id:UInt32;N, t2_name:Utf8;N]",
+        "        SubqueryAlias: x [t1_id:UInt32;N]",
+        "          TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "        SubqueryAlias: y [t2_id:UInt32;N, t2_name:Utf8;N]",
+        "          TableScan: t2 projection=[t2_id, t2_name] [t2_id:UInt32;N, t2_name:Utf8;N]",
     ];
     let formatted = plan.display_indent_schema().to_string();
     let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -606,24 +761,21 @@ async fn support_order_by_correlated_columns() -> Result<()> {
     Ok(())
 }
 
-// TODO: issue https://github.com/apache/arrow-datafusion/issues/6263
-#[ignore]
 #[tokio::test]
-async fn support_limit_subquery() -> Result<()> {
+async fn exists_subquery_with_select_null() -> Result<()> {
     let ctx = create_join_context("t1_id", "t2_id", true)?;
 
-    let sql = "SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 1)";
+    let sql = "SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT NULL)";
     let msg = format!("Creating logical plan for '{sql}'");
     let dataframe = ctx.sql(sql).await.expect(&msg);
     let plan = dataframe.into_optimized_plan()?;
 
+    // decorrelated, limit is removed
     let expected = vec![
         "Filter: EXISTS (<subquery>) [t1_id:UInt32;N, t1_name:Utf8;N]",
-        "  Subquery: [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "    Limit: skip=0, fetch=1 [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "      Projection: t2.t2_id, t2.t2_name, t2.t2_int [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "        Filter: t2.t2_id = outer_ref(t1.t1_id) [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
-        "          TableScan: t2 [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
+        "  Subquery: [NULL:Null;N]",
+        "    Projection: NULL [NULL:Null;N]",
+        "      EmptyRelation []",
         "  TableScan: t1 projection=[t1_id, t1_name] [t1_id:UInt32;N, t1_name:Utf8;N]",
     ];
     let formatted = plan.display_indent_schema().to_string();
@@ -633,11 +785,127 @@ async fn support_limit_subquery() -> Result<()> {
         "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
     );
 
+    Ok(())
+}
+
+#[tokio::test]
+async fn exists_subquery_with_limit() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 1)";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // de-correlated, limit is removed
+    let expected = vec![
+        "LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  TableScan: t1 projection=[t1_id, t1_name] [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  SubqueryAlias: __correlated_sq_1 [t2_id:UInt32;N]",
+        "    TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+---------+",
+        "| t1_id | t1_name |",
+        "+-------+---------+",
+        "| 11    | a       |",
+        "| 22    | b       |",
+        "| 44    | d       |",
+        "+-------+---------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn exists_subquery_with_limit0() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // de-correlated, limit is removed and replaced with EmptyRelation
+    let expected = vec![
+        "LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  TableScan: t1 projection=[t1_id, t1_name] [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  EmptyRelation [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec!["++", "++"];
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn not_exists_subquery_with_limit0() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // de-correlated, limit is removed and replaced with EmptyRelation
+    let expected = vec![
+        "LeftAnti Join: t1.t1_id = __correlated_sq_1.t2_id [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  TableScan: t1 projection=[t1_id, t1_name] [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  EmptyRelation [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+---------+",
+        "| t1_id | t1_name |",
+        "+-------+---------+",
+        "| 11    | a       |",
+        "| 22    | b       |",
+        "| 33    | c       |",
+        "| 44    | d       |",
+        "+-------+---------+",
+    ];
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn in_correlated_subquery_with_limit() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
     let sql = "SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 where t1_name = t2_name limit 10)";
     let msg = format!("Creating logical plan for '{sql}'");
     let dataframe = ctx.sql(sql).await.expect(&msg);
     let plan = dataframe.into_optimized_plan()?;
 
+    // not de-correlated
     let expected = vec![
         "Filter: t1.t1_id IN (<subquery>) [t1_id:UInt32;N, t1_name:Utf8;N]",
         "  Subquery: [t2_id:UInt32;N]",
@@ -657,6 +925,34 @@ async fn support_limit_subquery() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn in_non_correlated_subquery_with_limit() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql =
+        "SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 limit 10)";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // de-correlated, limit is kept
+    let expected = vec![
+        "LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  TableScan: t1 projection=[t1_id, t1_name] [t1_id:UInt32;N, t1_name:Utf8;N]",
+        "  SubqueryAlias: __correlated_sq_1 [t2_id:UInt32;N]",
+        "    Limit: skip=0, fetch=10 [t2_id:UInt32;N]",
+        "      TableScan: t2 projection=[t2_id], fetch=10 [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn support_union_subquery() -> Result<()> {
     let ctx = create_join_context("t1_id", "t2_id", true)?;
@@ -702,11 +998,10 @@ async fn simple_uncorrelated_scalar_subquery() -> Result<()> {
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: __scalar_sq_1.__value AS b [b:Int64;N]",
-        "  SubqueryAlias: __scalar_sq_1 [__value:Int64;N]",
-        "    Projection: COUNT(UInt8(1)) AS __value [__value:Int64;N]",
-        "      Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]] [COUNT(UInt8(1)):Int64;N]",
-        "        TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "Projection: __scalar_sq_1.COUNT(UInt8(1)) AS b [b:Int64;N]",
+        "  SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N]",
+        "    Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]] [COUNT(UInt8(1)):Int64;N]",
+        "      TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
     ];
     let formatted = plan.display_indent_schema().to_string();
     let actual: Vec<&str> = formatted.trim().lines().collect();
@@ -734,16 +1029,14 @@ async fn simple_uncorrelated_scalar_subquery2() -> Result<()> {
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: __scalar_sq_1.__value AS b, __scalar_sq_2.__value AS c [b:Int64;N, c:Int64;N]",
-        "  CrossJoin: [__value:Int64;N, __value:Int64;N]",
-        "    SubqueryAlias: __scalar_sq_1 [__value:Int64;N]",
-        "      Projection: COUNT(UInt8(1)) AS __value [__value:Int64;N]",
-        "        Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]] [COUNT(UInt8(1)):Int64;N]",
-        "          TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
-        "    SubqueryAlias: __scalar_sq_2 [__value:Int64;N]",
-        "      Projection: COUNT(Int64(1)) AS __value [__value:Int64;N]",
-        "        Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1))]] [COUNT(Int64(1)):Int64;N]",
-        "          TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+        "Projection: __scalar_sq_1.COUNT(UInt8(1)) AS b, __scalar_sq_2.COUNT(Int64(1)) AS c [b:Int64;N, c:Int64;N]",
+        "  CrossJoin: [COUNT(UInt8(1)):Int64;N, COUNT(Int64(1)):Int64;N]",
+        "    SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N]",
+        "      Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]] [COUNT(UInt8(1)):Int64;N]",
+        "        TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_2 [COUNT(Int64(1)):Int64;N]",
+        "      Aggregate: groupBy=[[]], aggr=[[COUNT(Int64(1))]] [COUNT(Int64(1)):Int64;N]",
+        "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
     let formatted = plan.display_indent_schema().to_string();
     let actual: Vec<&str> = formatted.trim().lines().collect();
diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs
index a4875d5cbf338..b6eca18fe5ecb 100644
--- a/datafusion/core/tests/tpcds_planning.rs
+++ b/datafusion/core/tests/tpcds_planning.rs
@@ -557,7 +557,6 @@ async fn tpcds_physical_q5() -> Result<()> {
     create_physical_plan(5).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q6() -> Result<()> {
     create_physical_plan(6).await
@@ -568,13 +567,11 @@ async fn tpcds_physical_q7() -> Result<()> {
     create_physical_plan(7).await
 }
 
-#[ignore] // The type of Int32 = Int64 of binary physical should be same
 #[tokio::test]
 async fn tpcds_physical_q8() -> Result<()> {
     create_physical_plan(8).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q9() -> Result<()> {
     create_physical_plan(9).await
@@ -601,7 +598,6 @@ async fn tpcds_physical_q13() -> Result<()> {
     create_physical_plan(13).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q14() -> Result<()> {
     create_physical_plan(14).await
@@ -647,7 +643,6 @@ async fn tpcds_physical_q22() -> Result<()> {
     create_physical_plan(22).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q23() -> Result<()> {
     create_physical_plan(23).await
@@ -755,7 +750,6 @@ async fn tpcds_physical_q43() -> Result<()> {
     create_physical_plan(43).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q44() -> Result<()> {
     create_physical_plan(44).await
@@ -807,7 +801,7 @@ async fn tpcds_physical_q53() -> Result<()> {
     create_physical_plan(53).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
+//#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q54() -> Result<()> {
     create_physical_plan(54).await
@@ -828,7 +822,6 @@ async fn tpcds_physical_q57() -> Result<()> {
     create_physical_plan(57).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q58() -> Result<()> {
     create_physical_plan(58).await
@@ -859,7 +852,6 @@ async fn tpcds_physical_q63() -> Result<()> {
     create_physical_plan(63).await
 }
 
-#[ignore] // thread 'q64' has overflowed its stack
 #[tokio::test]
 async fn tpcds_physical_q64() -> Result<()> {
     create_physical_plan(64).await
@@ -965,7 +957,6 @@ async fn tpcds_physical_q84() -> Result<()> {
     create_physical_plan(84).await
 }
 
-#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q85() -> Result<()> {
     create_physical_plan(85).await
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index d95edb7e7c3a4..f1ef72d3148bc 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -17,6 +17,7 @@
 
 use crate::expr::InSubquery;
 use crate::expr::{Exists, Placeholder};
+use crate::expr_rewriter::unnormalize_col;
 ///! Logical plan types
 use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
@@ -402,6 +403,78 @@ impl LogicalPlan {
         Ok(using_columns)
     }
 
+    pub fn head_output_expr(&self) -> Result<Option<Expr>> {
+        match self {
+            LogicalPlan::Projection(projection) => {
+                Ok(Some(projection.expr.as_slice()[0].clone()))
+            }
+            LogicalPlan::Aggregate(agg) => {
+                if agg.group_expr.is_empty() {
+                    Ok(Some(agg.aggr_expr.as_slice()[0].clone()))
+                } else {
+                    Ok(Some(agg.group_expr.as_slice()[0].clone()))
+                }
+            }
+            LogicalPlan::Filter(filter) => filter.input.head_output_expr(),
+            LogicalPlan::Distinct(distinct) => distinct.input.head_output_expr(),
+            LogicalPlan::Sort(sort) => sort.input.head_output_expr(),
+            LogicalPlan::Limit(limit) => limit.input.head_output_expr(),
+            LogicalPlan::Join(Join {
+                left,
+                right,
+                join_type,
+                ..
+            }) => match join_type {
+                JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                    if left.schema().fields().is_empty() {
+                        right.head_output_expr()
+                    } else {
+                        left.head_output_expr()
+                    }
+                }
+                JoinType::LeftSemi | JoinType::LeftAnti => left.head_output_expr(),
+                JoinType::RightSemi | JoinType::RightAnti => right.head_output_expr(),
+            },
+            LogicalPlan::CrossJoin(cross) => {
+                if cross.left.schema().fields().is_empty() {
+                    cross.right.head_output_expr()
+                } else {
+                    cross.left.head_output_expr()
+                }
+            }
+            LogicalPlan::Repartition(repartition) => repartition.input.head_output_expr(),
+            LogicalPlan::Window(window) => window.input.head_output_expr(),
+            LogicalPlan::Union(union) => Ok(Some(Expr::Column(
+                union.schema.fields()[0].qualified_column(),
+            ))),
+            LogicalPlan::TableScan(table) => Ok(Some(Expr::Column(
+                table.projected_schema.fields()[0].qualified_column(),
+            ))),
+            LogicalPlan::SubqueryAlias(subquery_alias) => {
+                let expr_opt = subquery_alias.input.head_output_expr()?;
+                Ok(expr_opt.map(|expr| {
+                    let col_name = format!("{:?}", unnormalize_col(expr));
+                    Expr::Column(Column::new(
+                        Some(subquery_alias.alias.clone()),
+                        col_name,
+                    ))
+                }))
+            }
+            LogicalPlan::Subquery(_) => Ok(None),
+            LogicalPlan::EmptyRelation(_)
+            | LogicalPlan::Prepare(_)
+            | LogicalPlan::Statement(_)
+            | LogicalPlan::Values(_)
+            | LogicalPlan::Explain(_)
+            | LogicalPlan::Analyze(_)
+            | LogicalPlan::Extension(_)
+            | LogicalPlan::Dml(_)
+            | LogicalPlan::Ddl(_)
+            | LogicalPlan::DescribeTable(_)
+            | LogicalPlan::Unnest(_) => Ok(None),
+        }
+    }
+
     pub fn with_new_inputs(&self, inputs: &[LogicalPlan]) -> Result<LogicalPlan> {
         from_plan(self, &self.expressions(), inputs)
     }
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index 7cdedc06b4530..e946cc22b7b91 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -100,6 +100,16 @@ pub fn check_subquery_expr(
         }
         check_correlations_in_subquery(inner_plan, true)
     } else {
+        if let Expr::InSubquery(subquery) = expr {
+            // InSubquery should only return one column
+            if subquery.subquery.subquery.schema().fields().len() > 1 {
+                return Err(datafusion_common::DataFusionError::Plan(format!(
+                    "InSubquery should only return one column, but found {}: {}",
+                    subquery.subquery.subquery.schema().fields().len(),
+                    subquery.subquery.subquery.schema().field_names().join(", "),
+                )));
+            }
+        }
         match outer_plan {
             LogicalPlan::Projection(_)
             | LogicalPlan::Filter(_)
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 8630c606499b3..bfa5bbba32f8b 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -18,19 +18,20 @@
 use crate::alias::AliasGenerator;
 use crate::optimizer::ApplyOrder;
 use crate::utils::{
-    collect_subquery_cols, conjunction, extract_join_filters, only_or_err,
-    replace_qualified_name, split_conjunction,
+    conjunction, replace_qualified_name, split_conjunction, PullUpCorrelatedExpr,
 };
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::{context, Column, DataFusionError, Result};
+use datafusion_common::tree_node::TreeNode;
+use datafusion_common::{Column, DataFusionError, Result};
 use datafusion_expr::expr::{Exists, InSubquery};
 use datafusion_expr::expr_rewriter::unnormalize_col;
-use datafusion_expr::logical_plan::{JoinType, Projection, Subquery};
+use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::{
-    exists, in_subquery, not_exists, not_in_subquery, BinaryExpr, Distinct, Expr, Filter,
+    exists, in_subquery, not_exists, not_in_subquery, BinaryExpr, Expr, Filter,
     LogicalPlan, LogicalPlanBuilder, Operator,
 };
 use log::debug;
+use std::collections::BTreeSet;
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -200,29 +201,82 @@ fn build_join(
     left: &LogicalPlan,
     alias: &AliasGenerator,
 ) -> Result<Option<LogicalPlan>> {
-    let in_predicate = query_info
-        .where_in_expr
+    let where_in_expr_opt = &query_info.where_in_expr;
+    let in_predicate_opt = where_in_expr_opt
         .clone()
-        .map(|in_expr| {
-            let projection = Projection::try_from_plan(&query_info.query.subquery)
-                .map_err(|e| context!("a projection is required", e))?;
-            // TODO add the validate logic to Analyzer
-            let subquery_expr = only_or_err(projection.expr.as_slice())
-                .map_err(|e| context!("single expression projection required", e))?;
-
-            // in_predicate may be also include in the join filters
-            Ok(Expr::eq(in_expr, subquery_expr.clone()))
+        .map(|where_in_expr| {
+            query_info.query.subquery.head_output_expr()?.map_or(
+                Err(DataFusionError::Plan(
+                    "single expression required.".to_string(),
+                )),
+                |expr| Ok(Expr::eq(where_in_expr, expr)),
+            )
         })
-        .map_or(Ok(None), |v: Result<Expr, DataFusionError>| v.map(Some))?;
+        .map_or(Ok(None), |v| v.map(Some))?;
 
     let subquery = query_info.query.subquery.as_ref();
     let subquery_alias = alias.next("__correlated_sq");
-    if let Some((join_filter, subquery_plan)) =
-        pull_up_correlated_expr(subquery, in_predicate, &subquery_alias)?
-    {
-        let sub_query_alias = LogicalPlanBuilder::from(subquery_plan)
-            .alias(subquery_alias.clone())?
-            .build()?;
+
+    let mut pull_up = PullUpCorrelatedExpr {
+        join_filters: vec![],
+        correlated_subquery_cols_map: Default::default(),
+        in_predicate_opt: in_predicate_opt.clone(),
+        exists_sub_query: in_predicate_opt.is_none(),
+        can_pull_up: true,
+    };
+    let new_plan = subquery.clone().rewrite(&mut pull_up)?;
+    if !pull_up.can_pull_up {
+        return Ok(None);
+    }
+
+    let sub_query_alias = LogicalPlanBuilder::from(new_plan)
+        .alias(subquery_alias.to_string())?
+        .build()?;
+    let mut all_correlated_cols = BTreeSet::new();
+    pull_up
+        .correlated_subquery_cols_map
+        .values()
+        .for_each(|cols| all_correlated_cols.extend(cols.clone()));
+
+    // alias the join filter
+    let join_filter_opt =
+        conjunction(pull_up.join_filters).map_or(Ok(None), |filter| {
+            replace_qualified_name(filter, &all_correlated_cols, &subquery_alias)
+                .map(Option::Some)
+        })?;
+
+    if let Some(join_filter) = match (join_filter_opt, in_predicate_opt) {
+        (
+            Some(join_filter),
+            Some(Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Eq,
+                right,
+            })),
+        ) => {
+            let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
+            let right_col =
+                Column::new(Some(subquery_alias), right_expr_name);
+            let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
+            Some(in_predicate.and(join_filter))
+        }
+        (Some(join_filter), _) => Some(join_filter),
+        (
+            _,
+            Some(Expr::BinaryExpr(BinaryExpr {
+                left,
+                op: Operator::Eq,
+                right,
+            })),
+        ) => {
+            let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
+            let right_col =
+                Column::new(Some(subquery_alias), right_expr_name);
+            let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
+            Some(in_predicate)
+        }
+        _ => None,
+    } {
         // join our sub query into the main plan
         let join_type = match query_info.negated {
             true => JoinType::LeftAnti,
@@ -246,141 +300,6 @@ fn build_join(
     }
 }
 
-/// This function pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
-/// It adds the inner reference columns to the [Projection] of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
-///
-/// This function can't handle the non-correlated subquery, and will return None.
-fn pull_up_correlated_expr(
-    subquery: &LogicalPlan,
-    in_predicate_opt: Option<Expr>,
-    subquery_alias: &str,
-) -> Result<Option<(Expr, LogicalPlan)>> {
-    match subquery {
-        LogicalPlan::Distinct(subqry_distinct) => {
-            let distinct_input = &subqry_distinct.input;
-            let optimized_plan = pull_up_correlated_expr(
-                distinct_input,
-                in_predicate_opt,
-                subquery_alias,
-            )?
-            .map(|(filters, right)| {
-                (
-                    filters,
-                    LogicalPlan::Distinct(Distinct {
-                        input: Arc::new(right),
-                    }),
-                )
-            });
-            Ok(optimized_plan)
-        }
-        LogicalPlan::Projection(projection) => {
-            // extract join filters from the inner subquery's Filter
-            let (mut join_filters, subquery_input) =
-                extract_join_filters(&projection.input)?;
-            if in_predicate_opt.is_none() && join_filters.is_empty() {
-                // cannot rewrite non-correlated subquery
-                return Ok(None);
-            }
-
-            if let Some(in_predicate) = &in_predicate_opt {
-                // in_predicate may be already included in the join filters, remove it from the join filters first.
-                join_filters = remove_duplicated_filter(join_filters, in_predicate);
-            }
-            let input_schema = subquery_input.schema();
-            let correlated_subquery_cols =
-                collect_subquery_cols(&join_filters, input_schema.clone())?;
-
-            // add missing columns to projection
-            let mut project_exprs: Vec<Expr> =
-                if let Some(Expr::BinaryExpr(BinaryExpr {
-                    left: _,
-                    op: Operator::Eq,
-                    right,
-                })) = &in_predicate_opt
-                {
-                    if !matches!(right.deref(), Expr::Column(_)) {
-                        vec![right.deref().clone().alias(format!(
-                            "{:?}",
-                            unnormalize_col(right.deref().clone())
-                        ))]
-                    } else {
-                        vec![right.deref().clone()]
-                    }
-                } else {
-                    vec![]
-                };
-            // the inner reference cols need to added to the projection if they are missing.
-            for col in correlated_subquery_cols.iter() {
-                let col_expr = Expr::Column(col.clone());
-                if !project_exprs.contains(&col_expr) {
-                    project_exprs.push(col_expr)
-                }
-            }
-
-            // alias the join filter
-            let join_filter_opt =
-                conjunction(join_filters).map_or(Ok(None), |filter| {
-                    replace_qualified_name(
-                        filter,
-                        &correlated_subquery_cols,
-                        subquery_alias,
-                    )
-                    .map(Option::Some)
-                })?;
-
-            let join_filter = if let Some(Expr::BinaryExpr(BinaryExpr {
-                left,
-                op: Operator::Eq,
-                right,
-            })) = in_predicate_opt
-            {
-                let right_expr_name =
-                    format!("{:?}", unnormalize_col(right.deref().clone()));
-                let right_col =
-                    Column::new(Some(subquery_alias.to_string()), right_expr_name);
-                let in_predicate =
-                    Expr::eq(left.deref().clone(), Expr::Column(right_col));
-                join_filter_opt
-                    .map(|filter| in_predicate.clone().and(filter))
-                    .unwrap_or_else(|| in_predicate)
-            } else {
-                join_filter_opt.ok_or_else(|| {
-                    DataFusionError::Internal(
-                        "join filters should not be empty".to_string(),
-                    )
-                })?
-            };
-
-            let right = LogicalPlanBuilder::from(subquery_input)
-                .project(project_exprs)?
-                .build()?;
-            Ok(Some((join_filter, right)))
-        }
-        _ => Ok(None),
-    }
-}
-
-fn remove_duplicated_filter(filters: Vec<Expr>, in_predicate: &Expr) -> Vec<Expr> {
-    filters
-        .into_iter()
-        .filter(|filter| {
-            if filter == in_predicate {
-                return false;
-            }
-
-            // ignore the binary order
-            !match (filter, in_predicate) {
-                (Expr::BinaryExpr(a_expr), Expr::BinaryExpr(b_expr)) => {
-                    (a_expr.op == b_expr.op)
-                        && (a_expr.left == b_expr.left && a_expr.right == b_expr.right)
-                        || (a_expr.left == b_expr.right && a_expr.right == b_expr.left)
-                }
-                _ => false,
-            }
-        })
-        .collect::<Vec<_>>()
-}
-
 struct SubqueryInfo {
     query: Subquery,
     where_in_expr: Option<Expr>,
@@ -914,11 +833,11 @@ mod tests {
             .build()?;
 
         // Maybe okay if the table only has a single column?
-        assert_optimizer_err(
-            Arc::new(DecorrelatePredicateSubquery::new()),
-            &plan,
-            "a projection is required",
-        );
+        let expected = "check_analyzed_plan\
+        \ncaused by\
+        \nError during planning: InSubquery should only return one column, but found 4";
+        assert_analyzer_check_err(vec![], &plan, expected);
+
         Ok(())
     }
 
@@ -976,8 +895,8 @@ mod tests {
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
         \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n    SubqueryAlias: __correlated_sq_1 [o_custkey + Int32(1):Int64, o_custkey:Int64]\
-        \n      Projection: orders.o_custkey + Int32(1) AS o_custkey + Int32(1), orders.o_custkey [o_custkey + Int32(1):Int64, o_custkey:Int64]\
+        \n    SubqueryAlias: __correlated_sq_1 [o_custkey + Int32(1):Int64, orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
+        \n      Projection: orders.o_custkey + Int32(1) AS o_custkey + Int32(1), orders.o_custkey + Int32(1), orders.o_custkey [o_custkey + Int32(1):Int64, orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
         \n        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_optimized_plan_eq_display_indent(
@@ -1009,11 +928,11 @@ mod tests {
             .project(vec![col("customer.c_custkey")])?
             .build()?;
 
-        assert_optimizer_err(
-            Arc::new(DecorrelatePredicateSubquery::new()),
-            &plan,
-            "single expression projection required",
-        );
+        let expected = "check_analyzed_plan\
+        \ncaused by\
+        \nError during planning: InSubquery should only return one column";
+        assert_analyzer_check_err(vec![], &plan, expected);
+
         Ok(())
     }
 
@@ -1187,8 +1106,8 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
         \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2) [c * UInt32(2):UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32]\
+        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2) [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32]\
         \n        TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_eq_display_indent(
@@ -1221,8 +1140,8 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
         \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, a:UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.a [c * UInt32(2):UInt32, a:UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32]\
+        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2), sq.a [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32]\
         \n        Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32]\
         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -1257,8 +1176,8 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
         \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.a, sq.b [c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
+        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2), sq.a, sq.b [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
         \n        Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32]\
         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -1301,11 +1220,11 @@ mod tests {
         \n    LeftSemi Join:  Filter: test.c * UInt32(2) = __correlated_sq_2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32]\
         \n      LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
         \n        TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n        SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, a:UInt32]\
-        \n          Projection: sq1.c * UInt32(2) AS c * UInt32(2), sq1.a [c * UInt32(2):UInt32, a:UInt32]\
+        \n        SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq1.c * UInt32(2):UInt32, a:UInt32]\
+        \n          Projection: sq1.c * UInt32(2) AS c * UInt32(2), sq1.c * UInt32(2), sq1.a [c * UInt32(2):UInt32, sq1.c * UInt32(2):UInt32, a:UInt32]\
         \n            TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32]\
-        \n      SubqueryAlias: __correlated_sq_2 [c * UInt32(2):UInt32, a:UInt32]\
-        \n        Projection: sq2.c * UInt32(2) AS c * UInt32(2), sq2.a [c * UInt32(2):UInt32, a:UInt32]\
+        \n      SubqueryAlias: __correlated_sq_2 [c * UInt32(2):UInt32, sq2.c * UInt32(2):UInt32, a:UInt32]\
+        \n        Projection: sq2.c * UInt32(2) AS c * UInt32(2), sq2.c * UInt32(2), sq2.a [c * UInt32(2):UInt32, sq2.c * UInt32(2):UInt32, a:UInt32]\
         \n          TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_eq_display_indent(
@@ -1466,12 +1385,11 @@ mod tests {
             .build()?;
 
         // Other rule will pushdown `customer.c_custkey = 1`,
-        // TODO revisit the logic, is it a valid physical plan when no cols in projection?
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
                         \n  LeftSemi Join:  Filter: customer.c_custkey = UInt32(1) [c_custkey:Int64, c_name:Utf8]\
                         \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-                        \n    SubqueryAlias: __correlated_sq_1 []\
-                        \n      Projection:  []\
+                        \n    SubqueryAlias: __correlated_sq_1 [o_custkey:Int64]\
+                        \n      Projection: orders.o_custkey [o_custkey:Int64]\
                         \n        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1585,7 +1503,10 @@ mod tests {
     fn exists_subquery_no_projection() -> Result<()> {
         let sq = Arc::new(
             LogicalPlanBuilder::from(scan_tpch_table("orders"))
-                .filter(col("customer.c_custkey").eq(col("orders.o_custkey")))?
+                .filter(
+                    out_ref_col(DataType::Int64, "customer.c_custkey")
+                        .eq(col("orders.o_custkey")),
+                )?
                 .build()?,
         );
 
@@ -1594,7 +1515,13 @@ mod tests {
             .project(vec![col("customer.c_custkey")])?
             .build()?;
 
-        assert_optimization_skipped(Arc::new(DecorrelatePredicateSubquery::new()), &plan)
+        let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
+                        \n  LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
+                        \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+                        \n    SubqueryAlias: __correlated_sq_1 [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+                        \n      TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+
+        assert_optimized_plan_equal(&plan, expected)
     }
 
     /// Test for correlated exists expressions
@@ -1618,8 +1545,8 @@ mod tests {
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
                         \n  LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
                         \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-                        \n    SubqueryAlias: __correlated_sq_1 [o_custkey:Int64]\
-                        \n      Projection: orders.o_custkey [o_custkey:Int64]\
+                        \n    SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
+                        \n      Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
                         \n        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1698,8 +1625,8 @@ mod tests {
         let expected  = "Projection: test.c [c:UInt32]\
                         \n  LeftSemi Join:  Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
                         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n    SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n      Projection: sq.a [a:UInt32]\
+                        \n    SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32]\
+                        \n      Projection: sq.c, sq.a [c:UInt32, a:UInt32]\
                         \n        TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1758,11 +1685,11 @@ mod tests {
                         \n    LeftSemi Join:  Filter: test.a = __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32]\
                         \n      LeftSemi Join:  Filter: test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
                         \n        TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n        SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n          Projection: sq1.a [a:UInt32]\
+                        \n        SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32]\
+                        \n          Projection: sq1.c, sq1.a [c:UInt32, a:UInt32]\
                         \n            TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32]\
-                        \n      SubqueryAlias: __correlated_sq_2 [a:UInt32]\
-                        \n        Projection: sq2.a [a:UInt32]\
+                        \n      SubqueryAlias: __correlated_sq_2 [c:UInt32, a:UInt32]\
+                        \n        Projection: sq2.c, sq2.a [c:UInt32, a:UInt32]\
                         \n          TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1787,8 +1714,8 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
                         \n  LeftSemi Join:  Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
                         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n    SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n      Projection: sq.a [a:UInt32]\
+                        \n    SubqueryAlias: __correlated_sq_1 [UInt32(1):UInt32, a:UInt32]\
+                        \n      Projection: UInt32(1), sq.a [UInt32(1):UInt32, a:UInt32]\
                         \n        TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1840,9 +1767,9 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
                         \n  LeftSemi Join:  Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
                         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n    SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n      Distinct: [a:UInt32]\
-                        \n        Projection: sq.a [a:UInt32]\
+                        \n    SubqueryAlias: __correlated_sq_1 [c:UInt32, a:UInt32]\
+                        \n      Distinct: [c:UInt32, a:UInt32]\
+                        \n        Projection: sq.c, sq.a [c:UInt32, a:UInt32]\
                         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1868,9 +1795,9 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
                         \n  LeftSemi Join:  Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
                         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n    SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n      Distinct: [a:UInt32]\
-                        \n        Projection: sq.a [a:UInt32]\
+                        \n    SubqueryAlias: __correlated_sq_1 [sq.b + sq.c:UInt32, a:UInt32]\
+                        \n      Distinct: [sq.b + sq.c:UInt32, a:UInt32]\
+                        \n        Projection: sq.b + sq.c, sq.a [sq.b + sq.c:UInt32, a:UInt32]\
                         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
@@ -1896,9 +1823,9 @@ mod tests {
         let expected = "Projection: test.b [b:UInt32]\
                         \n  LeftSemi Join:  Filter: UInt32(1) + __correlated_sq_1.a > test.a * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
                         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-                        \n    SubqueryAlias: __correlated_sq_1 [a:UInt32]\
-                        \n      Distinct: [a:UInt32]\
-                        \n        Projection: sq.a [a:UInt32]\
+                        \n    SubqueryAlias: __correlated_sq_1 [UInt32(1):UInt32, c:UInt32, a:UInt32]\
+                        \n      Distinct: [UInt32(1):UInt32, c:UInt32, a:UInt32]\
+                        \n        Projection: UInt32(1), sq.c, sq.a [UInt32(1):UInt32, c:UInt32, a:UInt32]\
                         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_equal(&plan, expected)
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 26f86c607a22b..4d8002c1a9129 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -17,16 +17,13 @@
 
 use crate::alias::AliasGenerator;
 use crate::optimizer::ApplyOrder;
-use crate::utils::{
-    collect_subquery_cols, conjunction, extract_join_filters, only_or_err,
-    replace_qualified_name,
-};
+use crate::utils::{conjunction, replace_qualified_name, PullUpCorrelatedExpr};
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
-use datafusion_common::{context, Column, Result};
+use datafusion_common::{Column, DataFusionError, Result};
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::{EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
-use log::debug;
+use std::collections::BTreeSet;
 use std::sync::Arc;
 
 /// Optimizer rule for rewriting subquery filters to joins
@@ -81,7 +78,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 let mut cur_input = filter.input.as_ref().clone();
                 for (subquery, alias) in subqueries {
                     if let Some(optimized_subquery) =
-                        optimize_scalar(&subquery, &cur_input, &alias)?
+                        build_join(&subquery, &cur_input, &alias)?
                     {
                         cur_input = optimized_subquery;
                     } else {
@@ -89,8 +86,9 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                         return Ok(None);
                     }
                 }
-                let new_plan = LogicalPlanBuilder::from(cur_input);
-                Ok(Some(new_plan.filter(expr)?.build()?))
+                let new_plan =
+                    LogicalPlanBuilder::from(cur_input).filter(expr)?.build()?;
+                Ok(Some(new_plan))
             }
             LogicalPlan::Projection(projection) => {
                 let mut all_subqueryies = vec![];
@@ -109,7 +107,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 let mut cur_input = projection.input.as_ref().clone();
                 for (subquery, alias) in all_subqueryies {
                     if let Some(optimized_subquery) =
-                        optimize_scalar(&subquery, &cur_input, &alias)?
+                        build_join(&subquery, &cur_input, &alias)?
                     {
                         cur_input = optimized_subquery;
                     } else {
@@ -117,8 +115,10 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                         return Ok(None);
                     }
                 }
-                let new_plan = LogicalPlanBuilder::from(cur_input);
-                Ok(Some(new_plan.project(rewrite_exprs)?.build()?))
+                let new_plan = LogicalPlanBuilder::from(cur_input)
+                    .project(rewrite_exprs)?
+                    .build()?;
+                Ok(Some(new_plan))
             }
 
             _ => Ok(None),
@@ -153,9 +153,26 @@ impl TreeNodeRewriter for ExtractScalarSubQuery {
         match expr {
             Expr::ScalarSubquery(subquery) => {
                 let subqry_alias = self.alias_gen.next("__scalar_sq");
-                self.sub_query_info.push((subquery, subqry_alias.clone()));
-                let scalar_column = "__value";
-                Ok(Expr::Column(Column::new(Some(subqry_alias), scalar_column)))
+                self.sub_query_info
+                    .push((subquery.clone(), subqry_alias.clone()));
+                let scalar_expr = subquery.subquery.head_output_expr()?.map_or(
+                    Err(DataFusionError::Plan(
+                        "single expression required.".to_string(),
+                    )),
+                    Ok,
+                )?;
+                match scalar_expr {
+                    Expr::Alias(_, alias) => {
+                        Ok(Expr::Column(Column::new(Some(subqry_alias), alias)))
+                    }
+                    Expr::Column(Column { relation: _, name }) => {
+                        Ok(Expr::Column(Column::new(Some(subqry_alias), name)))
+                    }
+                    _ => {
+                        let scalar_column = scalar_expr.display_name()?;
+                        Ok(Expr::Column(Column::new(Some(subqry_alias), scalar_column)))
+                    }
+                }
             }
             _ => Ok(expr),
         }
@@ -198,82 +215,51 @@ impl TreeNodeRewriter for ExtractScalarSubQuery {
 /// * `filter_input` - The non-subquery portion (from customers)
 /// * `outer_others` - Any additional parts to the `where` expression (and c.x = y)
 /// * `subquery_alias` - Subquery aliases
-fn optimize_scalar(
+fn build_join(
     subquery: &Subquery,
     filter_input: &LogicalPlan,
     subquery_alias: &str,
 ) -> Result<Option<LogicalPlan>> {
     let subquery_plan = subquery.subquery.as_ref();
-    let proj = match &subquery_plan {
-        LogicalPlan::Projection(proj) => proj,
-        _ => {
-            // this rule does not support this type of scalar subquery
-            // TODO support more types
-            debug!(
-                "cannot translate this type of scalar subquery to a join: {}",
-                subquery_plan.display_indent()
-            );
-            return Ok(None);
-        }
-    };
-    let proj = only_or_err(proj.expr.as_slice())
-        .map_err(|e| context!("exactly one expression should be projected", e))?;
-    let proj = Expr::Alias(Box::new(proj.clone()), "__value".to_string());
-    let sub_inputs = subquery_plan.inputs();
-    let sub_input = only_or_err(sub_inputs.as_slice())
-        .map_err(|e| context!("Exactly one input is expected. Is this a join?", e))?;
-
-    let aggr = match sub_input {
-        LogicalPlan::Aggregate(aggr) => aggr,
-        _ => {
-            // this rule does not support this type of scalar subquery
-            // TODO support more types
-            debug!(
-                "cannot translate this type of scalar subquery to a join: {}",
-                subquery_plan.display_indent()
-            );
-            return Ok(None);
-        }
+    let mut pull_up = PullUpCorrelatedExpr {
+        join_filters: vec![],
+        correlated_subquery_cols_map: Default::default(),
+        in_predicate_opt: None,
+        exists_sub_query: false,
+        can_pull_up: true,
     };
+    let new_plan = subquery_plan.clone().rewrite(&mut pull_up)?;
+    if !pull_up.can_pull_up {
+        return Ok(None);
+    }
 
-    // extract join filters
-    let (join_filters, subquery_input) = extract_join_filters(&aggr.input)?;
-    // Only operate if one column is present and the other closed upon from outside scope
-    let input_schema = subquery_input.schema();
-    let subqry_cols = collect_subquery_cols(&join_filters, input_schema.clone())?;
-    let join_filter = conjunction(join_filters).map_or(Ok(None), |filter| {
-        replace_qualified_name(filter, &subqry_cols, subquery_alias).map(Option::Some)
-    })?;
-
-    let group_by: Vec<_> = subqry_cols
-        .iter()
-        .map(|it| Expr::Column(it.clone()))
-        .collect();
-    let subqry_plan = LogicalPlanBuilder::from(subquery_input);
-
-    // project the prior projection + any correlated (and now grouped) columns
-    let proj: Vec<_> = group_by
-        .iter()
-        .cloned()
-        .chain(vec![proj].iter().cloned())
-        .collect();
-    let subqry_plan = subqry_plan
-        .aggregate(group_by, aggr.aggr_expr.clone())?
-        .project(proj)?
+    let sub_query_alias = LogicalPlanBuilder::from(new_plan)
         .alias(subquery_alias.to_string())?
         .build()?;
+    let mut all_correlated_cols = BTreeSet::new();
+    pull_up
+        .correlated_subquery_cols_map
+        .values()
+        .for_each(|cols| all_correlated_cols.extend(cols.clone()));
+
+    // alias the join filter
+    let join_filter_opt =
+        conjunction(pull_up.join_filters).map_or(Ok(None), |filter| {
+            replace_qualified_name(filter, &all_correlated_cols, subquery_alias)
+                .map(Option::Some)
+        })?;
 
     // join our sub query into the main plan
-    let new_plan = if join_filter.is_none() {
+    let new_plan = if join_filter_opt.is_none() {
         match filter_input {
             LogicalPlan::EmptyRelation(EmptyRelation {
                 produce_one_row: true,
                 schema: _,
-            }) => subqry_plan,
+            }) => sub_query_alias,
             _ => {
                 // if not correlated, group down to 1 row and cross join on that (preserving row count)
                 LogicalPlanBuilder::from(filter_input.clone())
-                    .cross_join(subqry_plan)?
+                    .cross_join(sub_query_alias)?
                     .build()?
             }
         }
@@ -281,23 +267,19 @@ fn optimize_scalar(
         // left join if correlated, grouping by the join keys so we don't change row count
         LogicalPlanBuilder::from(filter_input.clone())
             .join(
-                subqry_plan,
+                sub_query_alias,
                 JoinType::Left,
                 (Vec::<Column>::new(), Vec::<Column>::new()),
-                join_filter,
+                join_filter_opt,
             )?
             .build()?
     };
-
     Ok(Some(new_plan))
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::eliminate_cross_join::EliminateCrossJoin;
-    use crate::eliminate_outer_join::EliminateOuterJoin;
-    use crate::extract_equijoin_predicate::ExtractEquijoinPredicate;
     use crate::test::*;
     use arrow::datatypes::DataType;
     use datafusion_common::Result;
@@ -337,24 +319,20 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: Int32(1) < __scalar_sq_1.__value AND Int32(1) < __scalar_sq_2.__value [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N, o_custkey:Int64, __value:Int64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N, o_custkey:Int64, __value:Int64;N]\
-        \n      Left Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: Int32(1) < __scalar_sq_1.MAX(orders.o_custkey) AND Int32(1) < __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: __scalar_sq_2.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n      Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n          Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(EliminateOuterJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -396,25 +374,21 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_acctbal < __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, o_custkey:Int64, __value:Float64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64, __value:Float64;N]\
+        \n  Filter: customer.c_acctbal < __scalar_sq_1.SUM(orders.o_totalprice) [c_custkey:Int64, c_name:Utf8, SUM(orders.o_totalprice):Float64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: __scalar_sq_1.o_custkey = customer.c_custkey [c_custkey:Int64, c_name:Utf8, SUM(orders.o_totalprice):Float64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Float64;N]\
-        \n        Projection: orders.o_custkey, SUM(orders.o_totalprice) AS __value [o_custkey:Int64, __value:Float64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [SUM(orders.o_totalprice):Float64;N, o_custkey:Int64]\
+        \n        Projection: SUM(orders.o_totalprice), orders.o_custkey [SUM(orders.o_totalprice):Float64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[SUM(orders.o_totalprice)]] [o_custkey:Int64, SUM(orders.o_totalprice):Float64;N]\
-        \n            Filter: orders.o_totalprice < __scalar_sq_2.__value [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, l_orderkey:Int64;N, __value:Float64;N]\
-        \n              Inner Join: orders.o_orderkey = __scalar_sq_2.l_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, l_orderkey:Int64;N, __value:Float64;N]\
+        \n            Filter: orders.o_totalprice < __scalar_sq_2.SUM(lineitem.l_extendedprice) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, SUM(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N]\
+        \n              Left Join:  Filter: __scalar_sq_2.l_orderkey = orders.o_orderkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N, SUM(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64;N]\
         \n                TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n                SubqueryAlias: __scalar_sq_2 [l_orderkey:Int64, __value:Float64;N]\
-        \n                  Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice) AS __value [l_orderkey:Int64, __value:Float64;N]\
+        \n                SubqueryAlias: __scalar_sq_2 [SUM(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64]\
+        \n                  Projection: SUM(lineitem.l_extendedprice), lineitem.l_orderkey [SUM(lineitem.l_extendedprice):Float64;N, l_orderkey:Int64]\
         \n                    Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[SUM(lineitem.l_extendedprice)]] [l_orderkey:Int64, SUM(lineitem.l_extendedprice):Float64;N]\
         \n                      TableScan: lineitem [l_orderkey:Int64, l_partkey:Int64, l_suppkey:Int64, l_linenumber:Int32, l_quantity:Float64, l_extendedprice:Float64]";
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -442,21 +416,17 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            Filter: orders.o_orderkey = Int32(1) [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -484,18 +454,15 @@ mod tests {
 
         // it will optimize, but fail for the same reason the unoptimized query would
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Inner Join: customer.c_custkey = __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
-        \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n    SubqueryAlias: __scalar_sq_1 [__value:Int64;N]\
-        \n      Projection: MAX(orders.o_custkey) AS __value [__value:Int64;N]\
-        \n        Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
-        \n          TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
+        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateCrossJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -519,20 +486,17 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Inner Join: customer.c_custkey = __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
-        \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n    SubqueryAlias: __scalar_sq_1 [__value:Int64;N]\
-        \n      Projection: MAX(orders.o_custkey) AS __value [__value:Int64;N]\
-        \n        Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
-        \n          Filter: orders.o_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
+        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n            Filter: orders.o_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
+        \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateCrossJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -635,24 +599,6 @@ mod tests {
             .project(vec![col("customer.c_custkey")])?
             .build()?;
 
-        // we expect the plan to be unchanged because this subquery is not supported by this rule
-        let expected = r#"Projection: customer.c_custkey [c_custkey:Int64]
-  Filter: customer.c_custkey = (<subquery>) [c_custkey:Int64, c_name:Utf8]
-    Subquery: [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]
-      Filter: customer.c_custkey = orders.o_custkey [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]
-        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]
-    TableScan: customer [c_custkey:Int64, c_name:Utf8]"#;
-
-        assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
-            &plan,
-            expected,
-        );
-
         let expected = "check_analyzed_plan\
         \ncaused by\
         \nError during planning: Scalar subquery should only return one column";
@@ -680,20 +626,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) + Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) + Int32(1) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey) + Int32(1), orders.o_custkey [MAX(orders.o_custkey) + Int32(1):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -750,20 +692,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey >= __scalar_sq_1.__value AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey >= __scalar_sq_1.MAX(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -793,20 +731,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.__value AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) AND customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(EliminateOuterJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -837,20 +771,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey = __scalar_sq_1.__value OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Left Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) OR customer.c_custkey = Int32(1) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateCrossJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -874,20 +804,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.c [c:UInt32]\
-        \n  Filter: test.c < __scalar_sq_1.__value [a:UInt32, b:UInt32, c:UInt32, a:UInt32;N, __value:UInt32;N]\
-        \n    Inner Join: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, a:UInt32;N, __value:UInt32;N]\
+        \n  Filter: test.c < __scalar_sq_1.MIN(sq.c) [a:UInt32, b:UInt32, c:UInt32, MIN(sq.c):UInt32;N, a:UInt32;N]\
+        \n    Left Join:  Filter: test.a = __scalar_sq_1.a [a:UInt32, b:UInt32, c:UInt32, MIN(sq.c):UInt32;N, a:UInt32;N]\
         \n      TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n      SubqueryAlias: __scalar_sq_1 [a:UInt32, __value:UInt32;N]\
-        \n        Projection: sq.a, MIN(sq.c) AS __value [a:UInt32, __value:UInt32;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MIN(sq.c):UInt32;N, a:UInt32]\
+        \n        Projection: MIN(sq.c), sq.a [MIN(sq.c):UInt32;N, a:UInt32]\
         \n          Aggregate: groupBy=[[sq.a]], aggr=[[MIN(sq.c)]] [a:UInt32, MIN(sq.c):UInt32;N]\
         \n            TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -910,20 +836,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey < __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
+        \n  Filter: customer.c_custkey < __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n      SubqueryAlias: __scalar_sq_1 [__value:Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) AS __value [__value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
+        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
         \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateCrossJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -945,19 +867,16 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Inner Join: customer.c_custkey = __scalar_sq_1.__value [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
-        \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n    SubqueryAlias: __scalar_sq_1 [__value:Int64;N]\
-        \n      Projection: MAX(orders.o_custkey) AS __value [__value:Int64;N]\
-        \n        Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
-        \n          TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
+        \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
+        \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
+        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
+        \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
+        \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateCrossJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -1000,25 +919,21 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.__value AND __scalar_sq_2.__value [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N, o_custkey:Int64;N, __value:Int64;N]\
-        \n    Left Join: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N, o_custkey:Int64;N, __value:Int64;N]\
-        \n      Left Join: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, o_custkey:Int64;N, __value:Int64;N]\
+        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.MIN(orders.o_custkey) AND __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n    Left Join:  Filter: customer.c_custkey = __scalar_sq_2.o_custkey [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N, MAX(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
+        \n      Left Join:  Filter: customer.c_custkey = __scalar_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, o_custkey:Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [o_custkey:Int64, __value:Int64;N]\
-        \n          Projection: orders.o_custkey, MIN(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [MIN(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n          Projection: MIN(orders.o_custkey), orders.o_custkey [MIN(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n            Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MIN(orders.o_custkey)]] [o_custkey:Int64, MIN(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [o_custkey:Int64, __value:Int64;N]\
-        \n        Projection: orders.o_custkey, MAX(orders.o_custkey) AS __value [o_custkey:Int64, __value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
+        \n        Projection: MAX(orders.o_custkey), orders.o_custkey [MAX(orders.o_custkey):Int64;N, o_custkey:Int64]\
         \n          Aggregate: groupBy=[[orders.o_custkey]], aggr=[[MAX(orders.o_custkey)]] [o_custkey:Int64, MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
@@ -1053,25 +968,21 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.__value AND __scalar_sq_2.__value [c_custkey:Int64, c_name:Utf8, __value:Int64;N, __value:Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, __value:Int64;N, __value:Int64;N]\
-        \n      CrossJoin: [c_custkey:Int64, c_name:Utf8, __value:Int64;N]\
+        \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.MIN(orders.o_custkey) AND __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
+        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
+        \n      CrossJoin: [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n        SubqueryAlias: __scalar_sq_1 [__value:Int64;N]\
-        \n          Projection: MIN(orders.o_custkey) AS __value [__value:Int64;N]\
+        \n        SubqueryAlias: __scalar_sq_1 [MIN(orders.o_custkey):Int64;N]\
+        \n          Projection: MIN(orders.o_custkey) [MIN(orders.o_custkey):Int64;N]\
         \n            Aggregate: groupBy=[[]], aggr=[[MIN(orders.o_custkey)]] [MIN(orders.o_custkey):Int64;N]\
         \n              TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]\
-        \n      SubqueryAlias: __scalar_sq_2 [__value:Int64;N]\
-        \n        Projection: MAX(orders.o_custkey) AS __value [__value:Int64;N]\
+        \n      SubqueryAlias: __scalar_sq_2 [MAX(orders.o_custkey):Int64;N]\
+        \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
         \n          Aggregate: groupBy=[[]], aggr=[[MAX(orders.o_custkey)]] [MAX(orders.o_custkey):Int64;N]\
         \n            TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_multi_rules_optimized_plan_eq_display_indent(
-            vec![
-                Arc::new(ScalarSubqueryToJoin::new()),
-                Arc::new(ExtractEquijoinPredicate::new()),
-                Arc::new(EliminateOuterJoin::new()),
-            ],
+            vec![Arc::new(ScalarSubqueryToJoin::new())],
             &plan,
             expected,
         );
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 266d0a0be7145..fd8dda79c51d1 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -18,20 +18,23 @@
 //! Collection of utility functions that are leveraged by the query optimizer rules
 
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::{TreeNode, TreeNodeRewriter};
+use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
 use datafusion_common::{plan_err, Column, DFSchemaRef};
 use datafusion_common::{DFSchema, Result};
 use datafusion_expr::expr::{BinaryExpr, Sort};
-use datafusion_expr::expr_rewriter::{replace_col, strip_outer_reference};
+use datafusion_expr::expr_rewriter::{
+    replace_col, strip_outer_reference, unnormalize_col,
+};
 use datafusion_expr::logical_plan::LogicalPlanBuilder;
 use datafusion_expr::utils::from_plan;
 use datafusion_expr::{
     and,
     logical_plan::{Filter, LogicalPlan},
-    Expr, Operator,
+    EmptyRelation, Expr, Operator,
 };
 use log::{debug, trace};
 use std::collections::{BTreeSet, HashMap};
+use std::ops::Deref;
 use std::sync::Arc;
 
 /// Convenience rule for writing optimizers: recursively invoke
@@ -346,29 +349,6 @@ pub fn merge_schema(inputs: Vec<&LogicalPlan>) -> DFSchema {
     }
 }
 
-/// Extract join predicates from the correlated subquery's [Filter] expressions.
-/// The join predicate means that the expression references columns
-/// from both the subquery and outer table or only from the outer table.
-///
-/// Returns join predicates and subquery(extracted).
-pub(crate) fn extract_join_filters(
-    maybe_filter: &LogicalPlan,
-) -> Result<(Vec<Expr>, LogicalPlan)> {
-    if let LogicalPlan::Filter(plan_filter) = maybe_filter {
-        let subquery_filter_exprs = split_conjunction(&plan_filter.predicate);
-        let (join_filters, subquery_filters) = find_join_exprs(subquery_filter_exprs)?;
-        // if the subquery still has filter expressions, restore them.
-        let mut plan = LogicalPlanBuilder::from((*plan_filter.input).clone());
-        if let Some(expr) = conjunction(subquery_filters) {
-            plan = plan.filter(expr)?
-        }
-
-        Ok((join_filters, plan.build()?))
-    } else {
-        Ok((vec![], maybe_filter.clone()))
-    }
-}
-
 pub(crate) fn collect_subquery_cols(
     exprs: &[Expr],
     subquery_schema: DFSchemaRef,
@@ -409,6 +389,235 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
     trace!("{description}::\n{}\n", plan.display_indent_schema());
 }
 
+/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
+/// It adds the inner reference columns to the [Projection] or [Aggregate] of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
+pub struct PullUpCorrelatedExpr {
+    pub join_filters: Vec<Expr>,
+    // map of the plan and its holding correlated columns
+    pub correlated_subquery_cols_map: HashMap<LogicalPlan, BTreeSet<Column>>,
+    pub in_predicate_opt: Option<Expr>,
+    // indicate whether it is Exists(Not Exists) SubQuery
+    pub exists_sub_query: bool,
+    // indicate whether the correlated expressions can pull up or not
+    pub can_pull_up: bool,
+}
+
+impl TreeNodeRewriter for PullUpCorrelatedExpr {
+    type N = LogicalPlan;
+
+    fn pre_visit(&mut self, plan: &LogicalPlan) -> Result<RewriteRecursion> {
+        match plan {
+            LogicalPlan::Filter(_) => Ok(RewriteRecursion::Continue),
+            LogicalPlan::Union(_) | LogicalPlan::Sort(_) | LogicalPlan::Extension(_) => {
+                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
+                if plan_hold_outer {
+                    // the unsupported case
+                    self.can_pull_up = false;
+                    Ok(RewriteRecursion::Stop)
+                } else {
+                    Ok(RewriteRecursion::Continue)
+                }
+            }
+            LogicalPlan::Limit(_) => {
+                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
+                match (self.exists_sub_query, plan_hold_outer) {
+                    (false, true) => {
+                        // the unsupported case
+                        self.can_pull_up = false;
+                        Ok(RewriteRecursion::Stop)
+                    }
+                    _ => Ok(RewriteRecursion::Continue),
+                }
+            }
+            _ if plan.expressions().iter().any(|expr| expr.contains_outer()) => {
+                // the unsupported cases, the plan expressions contain out reference columns(like window expressions or agg expressions)
+                self.can_pull_up = false;
+                Ok(RewriteRecursion::Stop)
+            }
+            _ => Ok(RewriteRecursion::Continue),
+        }
+    }
+
+    fn mutate(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
+        let subquery_schema = plan.schema().clone();
+        match &plan {
+            LogicalPlan::Filter(plan_filter) => {
+                let subquery_filter_exprs = split_conjunction(&plan_filter.predicate);
+                let (mut join_filters, subquery_filters) =
+                    find_join_exprs(subquery_filter_exprs)?;
+                if let Some(in_predicate) = &self.in_predicate_opt {
+                    // in_predicate may be already included in the join filters, remove it from the join filters first.
+                    join_filters = remove_duplicated_filter(join_filters, in_predicate);
+                }
+                let correlated_subquery_cols =
+                    collect_subquery_cols(&join_filters, subquery_schema)?;
+                for expr in join_filters {
+                    if !self.join_filters.contains(&expr) {
+                        self.join_filters.push(expr)
+                    }
+                }
+                // if the subquery still has filter expressions, restore them.
+                let mut plan = LogicalPlanBuilder::from((*plan_filter.input).clone());
+                if let Some(expr) = conjunction(subquery_filters) {
+                    plan = plan.filter(expr)?
+                }
+                let new_plan = plan.build()?;
+                self.correlated_subquery_cols_map
+                    .insert(new_plan.clone(), correlated_subquery_cols);
+                Ok(new_plan)
+            }
+            LogicalPlan::Projection(projection)
+                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
+            {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                // add missing columns to Projection
+                let missing_exprs =
+                    self.collect_missing_exprs(&projection.expr, &local_correlated_cols)?;
+                let new_plan = LogicalPlanBuilder::from((*projection.input).clone())
+                    .project(missing_exprs)?
+                    .build()?;
+                Ok(new_plan)
+            }
+            LogicalPlan::Aggregate(aggregate)
+                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
+            {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                // add missing columns to Aggregation's group expression
+                let missing_exprs = self.collect_missing_exprs(
+                    &aggregate.group_expr,
+                    &local_correlated_cols,
+                )?;
+                let new_plan = LogicalPlanBuilder::from((*aggregate.input).clone())
+                    .aggregate(missing_exprs, aggregate.aggr_expr.to_vec())?
+                    .build()?;
+                Ok(new_plan)
+            }
+            LogicalPlan::SubqueryAlias(alias) => {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                let mut new_correlated_cols = BTreeSet::new();
+                for col in local_correlated_cols.iter() {
+                    new_correlated_cols
+                        .insert(Column::new(Some(alias.alias.clone()), col.name.clone()));
+                }
+                self.correlated_subquery_cols_map
+                    .insert(plan.clone(), new_correlated_cols);
+                Ok(plan)
+            }
+            LogicalPlan::Limit(limit) => {
+                // handling the limit clause in the subquery
+                match (self.exists_sub_query, self.join_filters.is_empty()) {
+                    // un-correlated exist subquery, keep the limit
+                    (true, true) => Ok(plan),
+                    // Correlated exist subquery, remove the limit(so that correlated expressions can pull up)
+                    (true, false) => {
+                        if limit.fetch.filter(|limit_row| *limit_row == 0).is_some() {
+                            Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                                produce_one_row: false,
+                                schema: limit.input.schema().clone(),
+                            }))
+                        } else {
+                            LogicalPlanBuilder::from((*limit.input).clone()).build()
+                        }
+                    }
+                    _ => Ok(plan),
+                }
+            }
+            _ => Ok(plan),
+        }
+    }
+}
+
+impl PullUpCorrelatedExpr {
+    fn collect_missing_exprs(
+        &self,
+        exprs: &[Expr],
+        correlated_subquery_cols: &BTreeSet<Column>,
+    ) -> Result<Vec<Expr>> {
+        let mut missing_exprs = vec![];
+        if let Some(Expr::BinaryExpr(BinaryExpr {
+            left: _,
+            op: Operator::Eq,
+            right,
+        })) = &self.in_predicate_opt
+        {
+            if !matches!(right.deref(), Expr::Column(_))
+                && !matches!(right.deref(), Expr::Literal(_))
+                && !matches!(right.deref(), Expr::Alias(_, _))
+            {
+                let alias_expr = right
+                    .deref()
+                    .clone()
+                    .alias(format!("{:?}", unnormalize_col(right.deref().clone())));
+                missing_exprs.push(alias_expr)
+            }
+        }
+        for expr in exprs {
+            if !missing_exprs.contains(expr) {
+                missing_exprs.push(expr.clone())
+            }
+        }
+        for col in correlated_subquery_cols.iter() {
+            let col_expr = Expr::Column(col.clone());
+            if !missing_exprs.contains(&col_expr) {
+                missing_exprs.push(col_expr)
+            }
+        }
+        Ok(missing_exprs)
+    }
+}
+
+fn collect_local_correlated_cols(
+    plan: &LogicalPlan,
+    all_cols_map: &HashMap<LogicalPlan, BTreeSet<Column>>,
+    local_cols: &mut BTreeSet<Column>,
+) {
+    for child in plan.inputs() {
+        if let Some(cols) = all_cols_map.get(child) {
+            local_cols.extend(cols.clone());
+        }
+        // SubqueryAlias is treated as the leaf node
+        if !matches!(child, LogicalPlan::SubqueryAlias(_)) {
+            collect_local_correlated_cols(child, all_cols_map, local_cols);
+        }
+    }
+}
+
+fn remove_duplicated_filter(filters: Vec<Expr>, in_predicate: &Expr) -> Vec<Expr> {
+    filters
+        .into_iter()
+        .filter(|filter| {
+            if filter == in_predicate {
+                return false;
+            }
+
+            // ignore the binary order
+            !match (filter, in_predicate) {
+                (Expr::BinaryExpr(a_expr), Expr::BinaryExpr(b_expr)) => {
+                    (a_expr.op == b_expr.op)
+                        && (a_expr.left == b_expr.left && a_expr.right == b_expr.right)
+                        || (a_expr.left == b_expr.right && a_expr.right == b_expr.left)
+                }
+                _ => false,
+            }
+        })
+        .collect::<Vec<_>>()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/optimizer/tests/integration-test.rs b/datafusion/optimizer/tests/integration-test.rs
index 761d6539b23c0..350e6e3a75448 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -66,15 +66,14 @@ fn subquery_filter_with_cast() -> Result<()> {
     )";
     let plan = test_sql(sql)?;
     let expected = "Projection: test.col_int32\
-    \n  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.__value\
+    \n  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.AVG(test.col_int32)\
     \n    CrossJoin:\
     \n      TableScan: test projection=[col_int32]\
     \n      SubqueryAlias: __scalar_sq_1\
-    \n        Projection: AVG(test.col_int32) AS __value\
-    \n          Aggregate: groupBy=[[]], aggr=[[AVG(test.col_int32)]]\
-    \n            Projection: test.col_int32\
-    \n              Filter: test.col_utf8 >= Utf8(\"2002-05-08\") AND test.col_utf8 <= Utf8(\"2002-05-13\")\
-    \n                TableScan: test projection=[col_int32, col_utf8]";
+    \n        Aggregate: groupBy=[[]], aggr=[[AVG(test.col_int32)]]\
+    \n          Projection: test.col_int32\
+    \n            Filter: test.col_utf8 >= Utf8(\"2002-05-08\") AND test.col_utf8 <= Utf8(\"2002-05-13\")\
+    \n              TableScan: test projection=[col_int32, col_utf8]";
     assert_eq!(expected, format!("{plan:?}"));
     Ok(())
 }

From ea27c706a5c05626d53c2032fc3a94a2d763efa5 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Fri, 26 May 2023 18:17:19 +0800
Subject: [PATCH 02/13] fix comment

---
 datafusion/core/tests/sql/subqueries.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index 5b55b2e031ca4..d68433a1fc92b 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -770,7 +770,6 @@ async fn exists_subquery_with_select_null() -> Result<()> {
     let dataframe = ctx.sql(sql).await.expect(&msg);
     let plan = dataframe.into_optimized_plan()?;
 
-    // decorrelated, limit is removed
     let expected = vec![
         "Filter: EXISTS (<subquery>) [t1_id:UInt32;N, t1_name:Utf8;N]",
         "  Subquery: [NULL:Null;N]",

From d11a0ef5fb31ccbcfe119fb382c11dab9ddfb2c8 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Fri, 26 May 2023 18:26:02 +0800
Subject: [PATCH 03/13] fix fmt

---
 datafusion/core/tests/tpcds_planning.rs                    | 1 -
 datafusion/optimizer/src/decorrelate_predicate_subquery.rs | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs
index b6eca18fe5ecb..3b6ec0ad3214f 100644
--- a/datafusion/core/tests/tpcds_planning.rs
+++ b/datafusion/core/tests/tpcds_planning.rs
@@ -801,7 +801,6 @@ async fn tpcds_physical_q53() -> Result<()> {
     create_physical_plan(53).await
 }
 
-//#[ignore] // Physical plan does not support logical expression (<subquery>)
 #[tokio::test]
 async fn tpcds_physical_q54() -> Result<()> {
     create_physical_plan(54).await
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index bfa5bbba32f8b..8514a95014266 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -255,8 +255,7 @@ fn build_join(
             })),
         ) => {
             let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
-            let right_col =
-                Column::new(Some(subquery_alias), right_expr_name);
+            let right_col = Column::new(Some(subquery_alias), right_expr_name);
             let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
             Some(in_predicate.and(join_filter))
         }
@@ -270,8 +269,7 @@ fn build_join(
             })),
         ) => {
             let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
-            let right_col =
-                Column::new(Some(subquery_alias), right_expr_name);
+            let right_col = Column::new(Some(subquery_alias), right_expr_name);
             let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
             Some(in_predicate)
         }

From 81286590803844d724beda6e067d8d23d4b38217 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Fri, 26 May 2023 23:01:40 +0800
Subject: [PATCH 04/13] q64 still overflow stack

---
 datafusion/core/tests/tpcds_planning.rs | 1 +
 datafusion/optimizer/src/utils.rs       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/tests/tpcds_planning.rs b/datafusion/core/tests/tpcds_planning.rs
index 3b6ec0ad3214f..3f55049ecd3cc 100644
--- a/datafusion/core/tests/tpcds_planning.rs
+++ b/datafusion/core/tests/tpcds_planning.rs
@@ -851,6 +851,7 @@ async fn tpcds_physical_q63() -> Result<()> {
     create_physical_plan(63).await
 }
 
+#[ignore] // thread 'q64' has overflowed its stack
 #[tokio::test]
 async fn tpcds_physical_q64() -> Result<()> {
     create_physical_plan(64).await
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index fd8dda79c51d1..2d072b1e12429 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -390,7 +390,7 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
 }
 
 /// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
-/// It adds the inner reference columns to the [Projection] or [Aggregate] of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
+/// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
 pub struct PullUpCorrelatedExpr {
     pub join_filters: Vec<Expr>,
     // map of the plan and its holding correlated columns

From 27c449b0a4f3c3fdf7bb0984756a7f69a2f0959c Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Tue, 30 May 2023 15:50:56 +0800
Subject: [PATCH 05/13] fix count agg bug

---
 benchmarks/expected-plans/q11.txt             | 175 ++++-----
 benchmarks/expected-plans/q22.txt             | 106 +++--
 datafusion/core/tests/sql/subqueries.rs       | 367 +++++++++++++++++-
 .../src/decorrelate_predicate_subquery.rs     |   3 +
 .../optimizer/src/scalar_subquery_to_join.rs  | 146 +++++--
 datafusion/optimizer/src/utils.rs             | 126 +++++-
 .../optimizer/tests/integration-test.rs       |  15 +-
 .../physical-expr/src/aggregate/average.rs    |  17 +-
 8 files changed, 754 insertions(+), 201 deletions(-)

diff --git a/benchmarks/expected-plans/q11.txt b/benchmarks/expected-plans/q11.txt
index fae9e0ea7f133..c5b1d6a0d3925 100644
--- a/benchmarks/expected-plans/q11.txt
+++ b/benchmarks/expected-plans/q11.txt
@@ -1,89 +1,86 @@
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: value DESC NULLS FIRST                                                                                                                                                                                        |
-|               |   Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value                                                                                                                      |
-|               |     Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)                                     |
-|               |       CrossJoin:                                                                                                                                                                                                    |
-|               |         Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                          |
-|               |           Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                             |
-|               |             Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                   |
-|               |               Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                   |
-|               |                 Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                                |
-|               |                   TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]                                                                                                               |
-|               |                   TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                           |
-|               |               Projection: nation.n_nationkey                                                                                                                                                                        |
-|               |                 Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                             |
-|               |                   TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                |
-|               |         SubqueryAlias: __scalar_sq_1                                                                                                                                                                                |
-|               |           Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))                                                                             |
-|               |             Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                                         |
-|               |               Projection: partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                                              |
-|               |                 Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                               |
-|               |                   Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                                    |
-|               |                     Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                            |
-|               |                       TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]                                                                                                                       |
-|               |                       TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                       |
-|               |                   Projection: nation.n_nationkey                                                                                                                                                                    |
-|               |                     Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                         |
-|               |                       TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                            |
-| physical_plan | SortExec: expr=[value@1 DESC]                                                                                                                                                                                       |
-|               |   ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]                                                                                                  |
-|               |     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                     |
-|               |       FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@2                                         |
-|               |         CrossJoinExec                                                                                                                                                                                               |
-|               |           CoalescePartitionsExec                                                                                                                                                                                    |
-|               |             AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                       |
-|               |               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                           |
-|               |                 RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2                                                                                                |
-|               |                   AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                          |
-|               |                     ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]                                                                               |
-|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                   |
-|               |                         HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]                                                  |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                               |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2                                                                                   |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                  |
-|               |                                 ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey]                                     |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                       |
-|               |                                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]                                         |
-|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                   |
-|               |                                         RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0                                                                        |
-|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                              |
-|               |                                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                   |
-|               |                                         RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                         |
-|               |                                           MemoryExec: partitions=0, partition_sizes=[]                                                                                                                              |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                               |
-|               |                             RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                   |
-|               |                               RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                  |
-|               |                                 ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                 |
-|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                       |
-|               |                                     FilterExec: n_name@1 = GERMANY                                                                                                                                                  |
-|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                  |
-|               |           ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)] |
-|               |             AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                            |
-|               |               CoalescePartitionsExec                                                                                                                                                                                |
-|               |                 AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                      |
-|               |                   ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]                                                                                                             |
-|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                     |
-|               |                       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]                                                    |
-|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                 |
-|               |                           RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2                                                                                     |
-|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                    |
-|               |                               ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]                                                                   |
-|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                         |
-|               |                                   HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]                                           |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                     |
-|               |                                       RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0                                                                          |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                     |
-|               |                                       RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                           |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                |
-|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                 |
-|               |                           RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                     |
-|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                    |
-|               |                               ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                   |
-|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                         |
-|               |                                   FilterExec: n_name@1 = GERMANY                                                                                                                                                    |
-|               |                                     MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                    |
-|               |                                                                                                                                                                                                                     |
-+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: value DESC NULLS FIRST                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+|               |   Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |     Inner Join:  Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)                                                                                                                                                                                                                                                                                                                                                  |
+|               |       Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |         Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |           Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |             Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |               Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                 TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |                 TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |             Projection: nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |               Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                 TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |       SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |         Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |           Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |             Projection: partsupp.ps_availqty, partsupp.ps_supplycost                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |               Inner Join: supplier.s_nationkey = nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+|               |                 Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                   Inner Join: partsupp.ps_suppkey = supplier.s_suppkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                     TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |                     TableScan: supplier projection=[s_suppkey, s_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |                 Projection: nation.n_nationkey                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                   Filter: nation.n_name = Utf8("GERMANY")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                     TableScan: nation projection=[n_nationkey, n_name]                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| physical_plan | SortPreservingMergeExec: [value@1 DESC]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |   SortExec: expr=[value@1 DESC]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |     ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |       NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: CastExpr { expr: Column { name: "SUM(partsupp.ps_supplycost * partsupp.ps_availqty)", index: 0 }, cast_type: Decimal128(38, 15), cast_options: CastOptions { safe: false, format_options: FormatOptions { safe: true, null: "", date_format: None, datetime_format: None, timestamp_format: None, timestamp_tz_format: None, time_format: None } } }, op: Gt, right: Column { name: "SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)", index: 1 } } |
+|               |         AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |             RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |               AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                 ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                             ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey]                                                                                                                                                                                                                                                                                                                                                                   |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                     RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                     RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                             ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                 FilterExec: n_name@1 = GERMANY                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |         ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]                                                                                                                                                                                                                                                                                                                             |
+|               |           AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |             CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |               AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+|               |                 ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                     HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                             ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                 HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                     RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                      |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                                   CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                                     RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                                       MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |                       CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|               |                         RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                           RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                             ProjectionExec: expr=[n_nationkey@0 as n_nationkey]                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                 FilterExec: n_name@1 = GERMANY                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/benchmarks/expected-plans/q22.txt b/benchmarks/expected-plans/q22.txt
index 16aebfe90abf3..479727b8fd555 100644
--- a/benchmarks/expected-plans/q22.txt
+++ b/benchmarks/expected-plans/q22.txt
@@ -1,55 +1,51 @@
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| plan_type     | plan                                                                                                                                                                                                                                                                                                                                          |
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| logical_plan  | Sort: custsale.cntrycode ASC NULLS LAST                                                                                                                                                                                                                                                                                                       |
-|               |   Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal                                                                                                                                                                                                                                           |
-|               |     Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]                                                                                                                                                                                                                                              |
-|               |       SubqueryAlias: custsale                                                                                                                                                                                                                                                                                                                 |
-|               |         Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal                                                                                                                                                                                                                                             |
-|               |           Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.AVG(customer.c_acctbal)                                                                                                                                                                                                                                       |
-|               |             CrossJoin:                                                                                                                                                                                                                                                                                                                        |
-|               |               Projection: customer.c_phone, customer.c_acctbal                                                                                                                                                                                                                                                                                |
-|               |                 LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey                                                                                                                                                                                                                                                               |
-|               |                   Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                                                                              |
-|               |                     TableScan: customer projection=[c_custkey, c_phone, c_acctbal]                                                                                                                                                                                                                                                            |
-|               |                   SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                            |
-|               |                     TableScan: orders projection=[o_custkey]                                                                                                                                                                                                                                                                                  |
-|               |               SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                    |
-|               |                 Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]                                                                                                                                                                                                                                                                     |
-|               |                   Projection: customer.c_acctbal                                                                                                                                                                                                                                                                                              |
-|               |                     Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                          |
-|               |                       TableScan: customer projection=[c_phone, c_acctbal]                                                                                                                                                                                                                                                                     |
-| physical_plan | SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                         |
-|               |   SortExec: expr=[cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                 |
-|               |     ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal]                                                                                                                                                                                                                    |
-|               |       AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                   |
-|               |         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                           |
-|               |           RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2), input_partitions=1                                                                                                                                                                                                                                 |
-|               |             AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                      |
-|               |               ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                           |
-|               |                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                   |
-|               |                   FilterExec: CAST(c_acctbal@1 AS Decimal128(19, 6)) > AVG(customer.c_acctbal)@2                                                                                                                                                                                                                                              |
-|               |                     CrossJoinExec                                                                                                                                                                                                                                                                                                             |
-|               |                       CoalescePartitionsExec                                                                                                                                                                                                                                                                                                  |
-|               |                         ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]                                                                                                                                                                                                                                                 |
-|               |                           CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                         |
-|               |                             HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]                                                                                                                                                                         |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                     |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                           |
-|               |                                   RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                        |
-|               |                                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                               |
-|               |                                       FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])                            |
-|               |                                         MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                          |
-|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                     |
-|               |                                 RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                           |
-|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                |
-|               |                       AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                       |
-|               |                         CoalescePartitionsExec                                                                                                                                                                                                                                                                                                |
-|               |                           AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                 |
-|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                              |
-|               |                               ProjectionExec: expr=[c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                                                 |
-|               |                                 CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                   |
-|               |                                   FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }]) |
-|               |                                     MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                              |
-|               |                                                                                                                                                                                                                                                                                                                                               |
-+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| logical_plan  | Sort: custsale.cntrycode ASC NULLS LAST                                                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |   Projection: custsale.cntrycode, COUNT(UInt8(1)) AS numcust, SUM(custsale.c_acctbal) AS totacctbal                                                                                                                                                                                                                                                                                                                                                              |
+|               |     Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]                                                                                                                                                                                                                                                                                                                                                                 |
+|               |       SubqueryAlias: custsale                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |         Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal                                                                                                                                                                                                                                                                                                                                                                |
+|               |           Inner Join:  Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_1.AVG(customer.c_acctbal)                                                                                                                                                                                                                                                                                                                                             |
+|               |             Projection: customer.c_phone, customer.c_acctbal                                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |               LeftAnti Join: customer.c_custkey = __correlated_sq_1.o_custkey                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |                 Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                                                                                                                                                                                                   |
+|               |                   TableScan: customer projection=[c_custkey, c_phone, c_acctbal]                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                 SubqueryAlias: __correlated_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                   TableScan: orders projection=[o_custkey]                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |             SubqueryAlias: __scalar_sq_1                                                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |               Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |                 Projection: customer.c_acctbal                                                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |                   Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])                                                                                                                                                                                                                                               |
+|               |                     TableScan: customer projection=[c_phone, c_acctbal]                                                                                                                                                                                                                                                                                                                                                                                          |
+| physical_plan | SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                            |
+|               |   SortExec: expr=[cntrycode@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                                                                                                                                                    |
+|               |     ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal]                                                                                                                                                                                                                                                                                                                                       |
+|               |       AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                                                                                                                                      |
+|               |         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |           RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                                    |
+|               |             AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]                                                                                                                                                                                                                                                                                                                                         |
+|               |               ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                                                                                                                                              |
+|               |                 NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: CastExpr { expr: Column { name: "c_acctbal", index: 0 }, cast_type: Decimal128(19, 6), cast_options: CastOptions { safe: false, format_options: FormatOptions { safe: true, null: "", date_format: None, datetime_format: None, timestamp_format: None, timestamp_tz_format: None, time_format: None } } }, op: Gt, right: Column { name: "AVG(customer.c_acctbal)", index: 1 } } |
+|               |                   ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]                                                                                                                                                                                                                                                                                                                                                                          |
+|               |                     CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                                  |
+|               |                       HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]                                                                                                                                                                                                                                                                                                  |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2                                                                                                                                                                                                                                                                                                                                    |
+|               |                             RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                 |
+|               |                               CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                                 FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])                                                                                                                                                     |
+|               |                                   MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                   |
+|               |                         CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                              |
+|               |                           RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 2), input_partitions=0                                                                                                                                                                                                                                                                                                                                    |
+|               |                             MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                         |
+|               |                   AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                                                                                                                                              |
+|               |                     CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                                                                                                                       |
+|               |                       AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                         RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                           ProjectionExec: expr=[c_acctbal@1 as c_acctbal]                                                                                                                                                                                                                                                                                                                                                                                        |
+|               |                             CoalesceBatchesExec: target_batch_size=8192                                                                                                                                                                                                                                                                                                                                                                                          |
+|               |                               FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])                                                                                                                        |
+|               |                                 MemoryExec: partitions=0, partition_sizes=[]                                                                                                                                                                                                                                                                                                                                                                                     |
+|               |                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index d68433a1fc92b..f3da709f88b86 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -821,7 +821,7 @@ async fn exists_subquery_with_limit() -> Result<()> {
         "| 44    | d       |",
         "+-------+---------+",
     ];
-    assert_batches_eq!(expected, &results);
+    assert_batches_sorted_eq!(expected, &results);
 
     Ok(())
 }
@@ -890,7 +890,7 @@ async fn not_exists_subquery_with_limit0() -> Result<()> {
         "| 44    | d       |",
         "+-------+---------+",
     ];
-    assert_batches_eq!(expected, &results);
+    assert_batches_sorted_eq!(expected, &results);
 
     Ok(())
 }
@@ -952,6 +952,46 @@ async fn in_non_correlated_subquery_with_limit() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn uncorrelated_scalar_subquery_with_limit0() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT t2_id FROM t2 limit 0) FROM t1";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // not de-correlated
+    let expected = vec![
+        "Projection: t1.t1_id, __scalar_sq_1.t2_id AS t2_id [t1_id:UInt32;N, t2_id:UInt32;N]",
+        "  Left Join:  [t1_id:UInt32;N, t2_id:UInt32;N]",
+        "    TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
+        "    EmptyRelation [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+-------+",
+        "| t1_id | t2_id |",
+        "+-------+-------+",
+        "| 11    |       |",
+        "| 22    |       |",
+        "| 33    |       |",
+        "| 44    |       |",
+        "+-------+-------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn support_union_subquery() -> Result<()> {
     let ctx = create_join_context("t1_id", "t2_id", true)?;
@@ -1021,15 +1061,15 @@ async fn simple_uncorrelated_scalar_subquery() -> Result<()> {
 async fn simple_uncorrelated_scalar_subquery2() -> Result<()> {
     let ctx = create_join_context("t1_id", "t2_id", true)?;
 
-    let sql = "select (select count(*) from t1) as b, (select count(1) from t2) as c";
+    let sql = "select (select count(*) from t1) as b, (select count(1) from t2)";
 
     let msg = format!("Creating logical plan for '{sql}'");
     let dataframe = ctx.sql(sql).await.expect(&msg);
     let plan = dataframe.into_optimized_plan()?;
 
     let expected = vec![
-        "Projection: __scalar_sq_1.COUNT(UInt8(1)) AS b, __scalar_sq_2.COUNT(Int64(1)) AS c [b:Int64;N, c:Int64;N]",
-        "  CrossJoin: [COUNT(UInt8(1)):Int64;N, COUNT(Int64(1)):Int64;N]",
+        "Projection: __scalar_sq_1.COUNT(UInt8(1)) AS b, __scalar_sq_2.COUNT(Int64(1)) AS COUNT(Int64(1)) [b:Int64;N, COUNT(Int64(1)):Int64;N]",
+        "  Left Join:  [COUNT(UInt8(1)):Int64;N, COUNT(Int64(1)):Int64;N]",
         "    SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N]",
         "      Aggregate: groupBy=[[]], aggr=[[COUNT(UInt8(1))]] [COUNT(UInt8(1)):Int64;N]",
         "        TableScan: t1 projection=[t1_id] [t1_id:UInt32;N]",
@@ -1047,13 +1087,318 @@ async fn simple_uncorrelated_scalar_subquery2() -> Result<()> {
     // assert data
     let results = execute_to_batches(&ctx, sql).await;
     let expected = vec![
-        "+---+---+",
-        "| b | c |",
-        "+---+---+",
-        "| 4 | 4 |",
-        "+---+---+",
+        "+---+-----------------+",
+        "| b | COUNT(Int64(1)) |",
+        "+---+-----------------+",
+        "| 4 | 4               |",
+        "+---+-----------------+",
     ];
-    assert_batches_eq!(expected, &results);
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql =
+        "SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int) from t1";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.COUNT(UInt8(1)) END AS COUNT(UInt8(1)) [t1_id:UInt32;N, COUNT(UInt8(1)):Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "      Projection: COUNT(UInt8(1)), t2.t2_int, __always_true [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "        Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "          TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+-----------------+",
+        "| t1_id | COUNT(UInt8(1)) |",
+        "+-------+-----------------+",
+        "| 33    | 3               |",
+        "| 22    | 0               |",
+        "| 11    | 1               |",
+        "| 44    | 0               |",
+        "+-------+-----------------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg2() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int) as cnt from t1";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.COUNT(UInt8(1)) END AS cnt [t1_id:UInt32;N, cnt:Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "      Projection: COUNT(UInt8(1)), t2.t2_int, __always_true [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "        Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "          TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+-----+",
+        "| t1_id | cnt |",
+        "+-------+-----+",
+        "| 33    | 3   |",
+        "| 22    | 0   |",
+        "| 11    | 1   |",
+        "| 44    | 0   |",
+        "+-------+-----+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_with_alias() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT count(*) as _cnt FROM t2 WHERE t2.t2_int = t1.t1_int) as cnt from t1";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) AS _cnt ELSE __scalar_sq_1._cnt END AS cnt [t1_id:UInt32;N, cnt:Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, _cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [_cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "      Projection: COUNT(UInt8(1)) AS _cnt, t2.t2_int, __always_true [_cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "        Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "          TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+-----+",
+        "| t1_id | cnt |",
+        "+-------+-----+",
+        "| 33    | 3   |",
+        "| 22    | 0   |",
+        "| 11    | 1   |",
+        "| 44    | 0   |",
+        "+-------+-----+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_complex_expr() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT count(*) + 2 as _cnt FROM t2 WHERE t2.t2_int = t1.t1_int) from t1";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) AS _cnt ELSE __scalar_sq_1._cnt END AS _cnt [t1_id:UInt32;N, _cnt:Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, _cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [_cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "      Projection: COUNT(UInt8(1)) + Int64(2) AS _cnt, t2.t2_int, __always_true [_cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "        Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "          TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+------+",
+        "| t1_id | _cnt |",
+        "+-------+------+",
+        "| 11    | 3    |",
+        "| 22    | 2    |",
+        "| 33    | 5    |",
+        "| 44    | 2    |",
+        "+-------+------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_where_clause() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 where (select count(*) from t2 where t1.t1_id = t2.t2_id) < t1.t1_int";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.COUNT(UInt8(1)) END < CAST(t1.t1_int AS Int64) [t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "    Projection: t1.t1_int, __scalar_sq_1.COUNT(UInt8(1)), __scalar_sq_1.__always_true [t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "      Left Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, t2_id:UInt32;N, __always_true:Boolean;N]",
+        "        TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "        SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N, t2_id:UInt32;N, __always_true:Boolean]",
+        "          Projection: COUNT(UInt8(1)), t2.t2_id, __always_true [COUNT(UInt8(1)):Int64;N, t2_id:UInt32;N, __always_true:Boolean]",
+        "            Aggregate: groupBy=[[t2.t2_id, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_id:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "| 3      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+#[ignore]
+async fn correlated_scalar_subquery_sum_agg_bug() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 where (select sum(t2_int) is null from t2 where t1.t1_id = t2.t2_id)";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Inner Join: t1.t1_id = __scalar_sq_1.t2_id [t1_id:UInt32;N, t1_int:UInt32;N, t2_id:UInt32;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [t2_id:UInt32;N]",
+        "      Projection: t2.t2_id [t2_id:UInt32;N]",
+        "        Filter: SUM(t2.t2_int) IS NULL [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
+        "          Aggregate: groupBy=[[t2.t2_id]], aggr=[[SUM(t2.t2_int)]] [t2_id:UInt32;N, SUM(t2.t2_int):UInt64;N]",
+        "            TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "| 3      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_in_having() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 group by t1.t1_int having (select count(*) from t2 where t1.t1_int = t2.t2_int) = 0";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.COUNT(UInt8(1)) END = Int64(0) [t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "    Projection: t1.t1_int, __scalar_sq_1.COUNT(UInt8(1)), __scalar_sq_1.__always_true [t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "      Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_int:UInt32;N, COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "        Aggregate: groupBy=[[t1.t1_int]], aggr=[[]] [t1_int:UInt32;N]",
+        "          TableScan: t1 projection=[t1_int] [t1_int:UInt32;N]",
+        "        SubqueryAlias: __scalar_sq_1 [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "          Projection: COUNT(UInt8(1)), t2.t2_int, __always_true [COUNT(UInt8(1)):Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "            Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
 
     Ok(())
 }
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 8514a95014266..449bec48b051f 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -223,6 +223,9 @@ fn build_join(
         in_predicate_opt: in_predicate_opt.clone(),
         exists_sub_query: in_predicate_opt.is_none(),
         can_pull_up: true,
+        need_collect_count_expr_map: false,
+        collected_count_expr_map: Default::default(),
+        expr_check_map: Default::default(),
     };
     let new_plan = subquery.clone().rewrite(&mut pull_up)?;
     if !pull_up.can_pull_up {
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 4d8002c1a9129..46ad5f2a80fac 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -17,13 +17,17 @@
 
 use crate::alias::AliasGenerator;
 use crate::optimizer::ApplyOrder;
-use crate::utils::{conjunction, replace_qualified_name, PullUpCorrelatedExpr};
+use crate::utils::{
+    conjunction, replace_qualified_name, ExprCheckMap, PullUpCorrelatedExpr,
+};
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
+use datafusion_common::tree_node::{
+    RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
+};
 use datafusion_common::{Column, DataFusionError, Result};
 use datafusion_expr::logical_plan::{JoinType, Subquery};
-use datafusion_expr::{EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
-use std::collections::BTreeSet;
+use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
+use std::collections::{BTreeSet, HashMap};
 use std::sync::Arc;
 
 /// Optimizer rule for rewriting subquery filters to joins
@@ -66,7 +70,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
     ) -> Result<Option<LogicalPlan>> {
         match plan {
             LogicalPlan::Filter(filter) => {
-                let (subqueries, expr) =
+                let (subqueries, mut rewrite_expr) =
                     self.extract_subquery_exprs(&filter.predicate, self.alias.clone())?;
 
                 if subqueries.is_empty() {
@@ -77,27 +81,62 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = filter.input.as_ref().clone();
                 for (subquery, alias) in subqueries {
-                    if let Some(optimized_subquery) =
-                        build_join(&subquery, &cur_input, &alias)?
+                    if let Some((optimized_subquery, expr_check_map)) =
+                        build_join(&subquery, &cur_input, &alias, true)?
                     {
+                        if !expr_check_map.is_empty() {
+                            rewrite_expr =
+                                rewrite_expr.clone().transform_up(&|expr| {
+                                    if let Expr::Column(col) = &expr {
+                                        if let Some((expr1, expr2)) =
+                                            expr_check_map.get(&col.name)
+                                        {
+                                            let new_expr = Expr::Case(expr::Case {
+                                                expr: None,
+                                                when_then_expr: vec![(
+                                                    Box::new(Expr::IsNull(Box::new(
+                                                        Expr::Column(
+                                                            Column::new_unqualified(
+                                                                "__always_true",
+                                                            ),
+                                                        ),
+                                                    ))),
+                                                    Box::new(expr2.clone()),
+                                                )],
+                                                else_expr: Some(Box::new(expr1.clone())),
+                                            });
+                                            Ok(Transformed::Yes(new_expr))
+                                        } else {
+                                            Ok(Transformed::No(expr))
+                                        }
+                                    } else {
+                                        Ok(Transformed::No(expr))
+                                    }
+                                })?;
+                        }
                         cur_input = optimized_subquery;
                     } else {
                         // if we can't handle all of the subqueries then bail for now
                         return Ok(None);
                     }
                 }
-                let new_plan =
-                    LogicalPlanBuilder::from(cur_input).filter(expr)?.build()?;
+                let new_plan = LogicalPlanBuilder::from(cur_input)
+                    .filter(rewrite_expr)?
+                    .build()?;
                 Ok(Some(new_plan))
             }
             LogicalPlan::Projection(projection) => {
                 let mut all_subqueryies = vec![];
-                let mut rewrite_exprs = vec![];
+                let mut expr_to_rewrite_expr_map = HashMap::new();
+                let mut subquery_to_expr_map = HashMap::new();
                 for expr in projection.expr.iter() {
-                    let (subqueries, expr) =
+                    let (subqueries, rewrite_exprs) =
                         self.extract_subquery_exprs(expr, self.alias.clone())?;
+                    for (subquery, _) in &subqueries {
+                        subquery_to_expr_map.insert(subquery.clone(), expr.clone());
+                    }
                     all_subqueryies.extend(subqueries);
-                    rewrite_exprs.push(expr);
+                    expr_to_rewrite_expr_map.insert(expr, rewrite_exprs);
                 }
                 if all_subqueryies.is_empty() {
                     // regular projection, no subquery exists clause here
@@ -106,17 +145,62 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 // iterate through all subqueries in predicate, turning each into a left join
                 let mut cur_input = projection.input.as_ref().clone();
                 for (subquery, alias) in all_subqueryies {
-                    if let Some(optimized_subquery) =
-                        build_join(&subquery, &cur_input, &alias)?
+                    if let Some((optimized_subquery, expr_check_map)) =
+                        build_join(&subquery, &cur_input, &alias, true)?
                     {
                         cur_input = optimized_subquery;
+                        if !expr_check_map.is_empty() {
+                            if let Some(expr) = subquery_to_expr_map.get(&subquery) {
+                                if let Some(rewrite_expr) =
+                                    expr_to_rewrite_expr_map.get(expr)
+                                {
+                                    let new_expr = rewrite_expr.clone().transform_up(&|expr| {
+                                        if let Expr::Column(col) = &expr {
+                                            if let Some((expr1, expr2)) = expr_check_map.get(&col.name)
+                                            {
+                                                let new_expr = Expr::Case(expr::Case {
+                                                    expr: None,
+                                                    when_then_expr: vec![(
+                                                        Box::new(Expr::IsNull(Box::new(
+                                                            Expr::Column(Column::new_unqualified("__always_true")),
+                                                        ))),
+                                                        Box::new(expr2.clone()),
+                                                    )],
+                                                    else_expr: Some(Box::new(expr1.clone())),
+                                                });
+                                                Ok(Transformed::Yes(new_expr))
+                                            } else {
+                                                Ok(Transformed::No(expr))
+                                            }
+                                        } else {
+                                            Ok(Transformed::No(expr))
+                                        }
+
+                                    })?;
+                                    expr_to_rewrite_expr_map.insert(expr, new_expr);
+                                }
+                            }
+                        }
                     } else {
                         // if we can't handle all of the subqueries then bail for now
                         return Ok(None);
                     }
                 }
+
+                let mut proj_exprs = vec![];
+                for expr in projection.expr.iter() {
+                    let old_expr_name = expr.display_name()?;
+                    let new_expr = expr_to_rewrite_expr_map.get(expr).unwrap();
+                    let new_expr_name = new_expr.display_name()?;
+                    if new_expr_name != old_expr_name {
+                        proj_exprs
+                            .push(Expr::Alias(Box::new(new_expr.clone()), old_expr_name))
+                    } else {
+                        proj_exprs.push(new_expr.clone());
+                    }
+                }
                 let new_plan = LogicalPlanBuilder::from(cur_input)
-                    .project(rewrite_exprs)?
+                    .project(proj_exprs)?
                     .build()?;
                 Ok(Some(new_plan))
             }
@@ -205,7 +289,7 @@ impl TreeNodeRewriter for ExtractScalarSubQuery {
 ///
 /// ```text
 /// select c.id from customers c
-/// cross join (select avg(total) as val from orders) a
+/// left join (select avg(total) as val from orders) a
 /// where c.balance > a.val
 /// ```
 ///
@@ -219,7 +303,8 @@ fn build_join(
     subquery: &Subquery,
     filter_input: &LogicalPlan,
     subquery_alias: &str,
-) -> Result<Option<LogicalPlan>> {
+    need_collect_count_expr_map: bool,
+) -> Result<Option<(LogicalPlan, ExprCheckMap)>> {
     let subquery_plan = subquery.subquery.as_ref();
     let mut pull_up = PullUpCorrelatedExpr {
         join_filters: vec![],
@@ -227,6 +312,9 @@ fn build_join(
         in_predicate_opt: None,
         exists_sub_query: false,
         can_pull_up: true,
+        need_collect_count_expr_map,
+        collected_count_expr_map: Default::default(),
+        expr_check_map: Default::default(),
     };
     let new_plan = subquery_plan.clone().rewrite(&mut pull_up)?;
     if !pull_up.can_pull_up {
@@ -236,6 +324,7 @@ fn build_join(
     let sub_query_alias = LogicalPlanBuilder::from(new_plan)
         .alias(subquery_alias.to_string())?
         .build()?;
+
     let mut all_correlated_cols = BTreeSet::new();
     pull_up
         .correlated_subquery_cols_map
@@ -257,9 +346,14 @@ fn build_join(
                 schema: _,
             }) => sub_query_alias,
             _ => {
-                // if not correlated, group down to 1 row and cross join on that (preserving row count)
+                // if not correlated, group down to 1 row and left join on that (preserving row count)
                 LogicalPlanBuilder::from(filter_input.clone())
-                    .cross_join(sub_query_alias)?
+                    .join(
+                        sub_query_alias,
+                        JoinType::Left,
+                        (Vec::<Column>::new(), Vec::<Column>::new()),
+                        None,
+                    )?
                     .build()?
             }
         }
@@ -274,7 +368,7 @@ fn build_join(
             )?
             .build()?
     };
-    Ok(Some(new_plan))
+    Ok(Some((new_plan, pull_up.expr_check_map.clone())))
 }
 
 #[cfg(test)]
@@ -455,7 +549,7 @@ mod tests {
         // it will optimize, but fail for the same reason the unoptimized query would
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
         \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
         \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
@@ -487,7 +581,7 @@ mod tests {
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
         \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
         \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
@@ -837,7 +931,7 @@ mod tests {
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  Filter: customer.c_custkey < __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
         \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
         \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
@@ -868,7 +962,7 @@ mod tests {
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  Filter: customer.c_custkey = __scalar_sq_1.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MAX(orders.o_custkey):Int64;N]\
         \n      TableScan: customer [c_custkey:Int64, c_name:Utf8]\
         \n      SubqueryAlias: __scalar_sq_1 [MAX(orders.o_custkey):Int64;N]\
         \n        Projection: MAX(orders.o_custkey) [MAX(orders.o_custkey):Int64;N]\
@@ -969,8 +1063,8 @@ mod tests {
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
         \n  Filter: customer.c_custkey BETWEEN __scalar_sq_1.MIN(orders.o_custkey) AND __scalar_sq_2.MAX(orders.o_custkey) [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
-        \n    CrossJoin: [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
-        \n      CrossJoin: [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N]\
+        \n    Left Join:  [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N, MAX(orders.o_custkey):Int64;N]\
+        \n      Left Join:  [c_custkey:Int64, c_name:Utf8, MIN(orders.o_custkey):Int64;N]\
         \n        TableScan: customer [c_custkey:Int64, c_name:Utf8]\
         \n        SubqueryAlias: __scalar_sq_1 [MIN(orders.o_custkey):Int64;N]\
         \n          Projection: MIN(orders.o_custkey) [MIN(orders.o_custkey):Int64;N]\
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 2d072b1e12429..177e9bf4ab545 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -17,9 +17,12 @@
 
 //! Collection of utility functions that are leveraged by the query optimizer rules
 
+use crate::simplify_expressions::{ExprSimplifier, SimplifyContext};
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::{RewriteRecursion, TreeNode, TreeNodeRewriter};
-use datafusion_common::{plan_err, Column, DFSchemaRef};
+use datafusion_common::tree_node::{
+    RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
+};
+use datafusion_common::{plan_err, Column, DFSchemaRef, ScalarValue};
 use datafusion_common::{DFSchema, Result};
 use datafusion_expr::expr::{BinaryExpr, Sort};
 use datafusion_expr::expr_rewriter::{
@@ -30,8 +33,9 @@ use datafusion_expr::utils::from_plan;
 use datafusion_expr::{
     and,
     logical_plan::{Filter, LogicalPlan},
-    EmptyRelation, Expr, Operator,
+    AggregateFunction, EmptyRelation, Expr, Operator,
 };
+use datafusion_physical_expr::execution_props::ExecutionProps;
 use log::{debug, trace};
 use std::collections::{BTreeSet, HashMap};
 use std::ops::Deref;
@@ -393,15 +397,22 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
 /// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
 pub struct PullUpCorrelatedExpr {
     pub join_filters: Vec<Expr>,
-    // map of the plan and its holding correlated columns
+    // mapping from the plan to its holding correlated columns
     pub correlated_subquery_cols_map: HashMap<LogicalPlan, BTreeSet<Column>>,
     pub in_predicate_opt: Option<Expr>,
     // indicate whether it is Exists(Not Exists) SubQuery
     pub exists_sub_query: bool,
     // indicate whether the correlated expressions can pull up or not
     pub can_pull_up: bool,
+    // indicate whether the subquery need to collect count expr mapping
+    pub need_collect_count_expr_map: bool,
+    // mapping from expr name to the pair of agg expr and its evaluation result on empty record batch
+    pub collected_count_expr_map: HashMap<String, (Expr, Expr)>,
+    pub expr_check_map: ExprCheckMap,
 }
 
+pub type ExprCheckMap = HashMap<String, (Expr, Expr)>;
+
 impl TreeNodeRewriter for PullUpCorrelatedExpr {
     type N = LogicalPlan;
 
@@ -430,7 +441,7 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                 }
             }
             _ if plan.expressions().iter().any(|expr| expr.contains_outer()) => {
-                // the unsupported cases, the plan expressions contain out reference columns(like window expressions or agg expressions)
+                // the unsupported cases, the plan expressions contain out reference columns(like window expressions)
                 self.can_pull_up = false;
                 Ok(RewriteRecursion::Stop)
             }
@@ -476,8 +487,48 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                     &mut local_correlated_cols,
                 );
                 // add missing columns to Projection
-                let missing_exprs =
+                let mut missing_exprs =
                     self.collect_missing_exprs(&projection.expr, &local_correlated_cols)?;
+                if !self.collected_count_expr_map.is_empty() {
+                    let head_expr = missing_exprs.get(0);
+                    if let Some(expr) = head_expr {
+                        let result_expr = expr.clone().transform_up(&|expr| {
+                            if let Expr::Column(Column { name, .. }) = &expr {
+                                if let Some((_, result_expr)) =
+                                    self.collected_count_expr_map.get(name)
+                                {
+                                    Ok(Transformed::Yes(result_expr.clone()))
+                                } else {
+                                    Ok(Transformed::No(expr))
+                                }
+                            } else {
+                                Ok(Transformed::No(expr))
+                            }
+                        })?;
+                        let scalar_expr = match expr {
+                            Expr::Alias(_, alias) => (
+                                alias.to_string(),
+                                Expr::Column(Column::new_unqualified(alias)),
+                            ),
+                            Expr::Column(Column { relation: _, name }) => {
+                                (name.to_string(), expr.clone())
+                            }
+                            _ => {
+                                let scalar_column = expr.display_name()?;
+                                (
+                                    scalar_column.clone(),
+                                    Expr::Column(Column::new_unqualified(scalar_column)),
+                                )
+                            }
+                        };
+                        self.expr_check_map
+                            .insert(scalar_expr.0, (scalar_expr.1, result_expr));
+                        missing_exprs.push(Expr::Column(Column::new_unqualified(
+                            "__always_true".to_string(),
+                        )));
+                    }
+                }
+
                 let new_plan = LogicalPlanBuilder::from((*projection.input).clone())
                     .project(missing_exprs)?
                     .build()?;
@@ -493,10 +544,37 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                     &mut local_correlated_cols,
                 );
                 // add missing columns to Aggregation's group expression
-                let missing_exprs = self.collect_missing_exprs(
+                let mut missing_exprs = self.collect_missing_exprs(
                     &aggregate.group_expr,
                     &local_correlated_cols,
                 )?;
+
+                if self.need_collect_count_expr_map && aggregate.group_expr.is_empty() {
+                    let agg_result_exprs = agg_exprs_eva_result_on_empty_batch(
+                        &aggregate.aggr_expr,
+                        subquery_schema,
+                    )?;
+                    if !missing_exprs.is_empty() {
+                        let scalar_agg = !agg_result_exprs.values().any(|result_expr| {
+                            matches!(result_expr, Expr::Literal(ScalarValue::Null))
+                        });
+                        if scalar_agg {
+                            let internal_always_true_col = Expr::Alias(
+                                Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)))),
+                                "__always_true".to_string(),
+                            );
+                            missing_exprs.push(internal_always_true_col);
+                            for (agg_expr, result_expr_on_empty) in agg_result_exprs {
+                                let agg_expr_name = agg_expr.display_name()?;
+                                self.collected_count_expr_map.insert(
+                                    agg_expr_name,
+                                    (agg_expr, result_expr_on_empty),
+                                );
+                            }
+                        }
+                    }
+                }
+
                 let new_plan = LogicalPlanBuilder::from((*aggregate.input).clone())
                     .aggregate(missing_exprs, aggregate.aggr_expr.to_vec())?
                     .build()?;
@@ -618,6 +696,40 @@ fn remove_duplicated_filter(filters: Vec<Expr>, in_predicate: &Expr) -> Vec<Expr
         .collect::<Vec<_>>()
 }
 
+fn agg_exprs_eva_result_on_empty_batch(
+    agg_expr: &[Expr],
+    schema: DFSchemaRef,
+) -> Result<HashMap<Expr, Expr>> {
+    let mut result_expr_map = HashMap::new();
+    for e in agg_expr.iter() {
+        let new_expr = e.clone().transform_up(&|expr| {
+            let new_expr = match expr {
+                Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
+                    fun,
+                    ..
+                }) => {
+                    if matches!(fun, AggregateFunction::Count) {
+                        Transformed::Yes(Expr::Literal(ScalarValue::Int64(Some(0))))
+                    } else {
+                        Transformed::Yes(Expr::Literal(ScalarValue::Null))
+                    }
+                }
+                Expr::AggregateUDF(_) => {
+                    Transformed::Yes(Expr::Literal(ScalarValue::Null))
+                }
+                _ => Transformed::No(expr),
+            };
+            Ok(new_expr)
+        })?;
+
+        let props = ExecutionProps::new();
+        let info = SimplifyContext::new(&props).with_schema(schema.clone());
+        let simplifier = ExprSimplifier::new(info);
+        result_expr_map.insert(e.clone(), simplifier.simplify(new_expr)?);
+    }
+    Ok(result_expr_map)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/optimizer/tests/integration-test.rs b/datafusion/optimizer/tests/integration-test.rs
index 350e6e3a75448..c101eff9de9e6 100644
--- a/datafusion/optimizer/tests/integration-test.rs
+++ b/datafusion/optimizer/tests/integration-test.rs
@@ -66,14 +66,13 @@ fn subquery_filter_with_cast() -> Result<()> {
     )";
     let plan = test_sql(sql)?;
     let expected = "Projection: test.col_int32\
-    \n  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.AVG(test.col_int32)\
-    \n    CrossJoin:\
-    \n      TableScan: test projection=[col_int32]\
-    \n      SubqueryAlias: __scalar_sq_1\
-    \n        Aggregate: groupBy=[[]], aggr=[[AVG(test.col_int32)]]\
-    \n          Projection: test.col_int32\
-    \n            Filter: test.col_utf8 >= Utf8(\"2002-05-08\") AND test.col_utf8 <= Utf8(\"2002-05-13\")\
-    \n              TableScan: test projection=[col_int32, col_utf8]";
+    \n  Inner Join:  Filter: CAST(test.col_int32 AS Float64) > __scalar_sq_1.AVG(test.col_int32)\
+    \n    TableScan: test projection=[col_int32]\
+    \n    SubqueryAlias: __scalar_sq_1\
+    \n      Aggregate: groupBy=[[]], aggr=[[AVG(test.col_int32)]]\
+    \n        Projection: test.col_int32\
+    \n          Filter: test.col_utf8 >= Utf8(\"2002-05-08\") AND test.col_utf8 <= Utf8(\"2002-05-13\")\
+    \n            TableScan: test projection=[col_int32, col_utf8]";
     assert_eq!(expected, format!("{plan:?}"));
     Ok(())
 }
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 2fe44602d831a..607572862290b 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -237,9 +237,16 @@ impl Accumulator for AvgAccumulator {
             ScalarValue::Float64(e) => {
                 Ok(ScalarValue::Float64(e.map(|f| f / self.count as f64)))
             }
-            ScalarValue::Decimal128(value, precision, scale) => {
-                Ok(match value {
-                    None => ScalarValue::Decimal128(None, precision, scale),
+            ScalarValue::Decimal128(value, _, scale) => {
+                match value {
+                    None => match &self.return_data_type {
+                        DataType::Decimal128(p, s) => {
+                            Ok(ScalarValue::Decimal128(None, *p, *s))
+                        }
+                        other => Err(DataFusionError::Internal(format!(
+                            "Error returned data type in AvgAccumulator {other:?}"
+                        ))),
+                    },
                     Some(value) => {
                         // now the sum_type and return type is not the same, need to convert the sum type to return type
                         calculate_result_decimal_for_avg(
@@ -247,9 +254,9 @@ impl Accumulator for AvgAccumulator {
                             self.count as i128,
                             scale,
                             &self.return_data_type,
-                        )?
+                        )
                     }
-                })
+                }
             }
             _ => Err(DataFusionError::Internal(
                 "Sum should be f64 or decimal128 on average".to_string(),

From f64473e086fc98a7d0ebafab911b28dbb8586eb8 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Tue, 30 May 2023 18:04:01 +0800
Subject: [PATCH 06/13] resolve review comments

---
 datafusion/expr/src/logical_plan/plan.rs | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index f1ef72d3148bc..b38ec4af74e12 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -403,6 +403,7 @@ impl LogicalPlan {
         Ok(using_columns)
     }
 
+    /// returns the first output expression of this `LogicalPlan` node.
     pub fn head_output_expr(&self) -> Result<Option<Expr>> {
         match self {
             LogicalPlan::Projection(projection) => {
@@ -415,10 +416,12 @@ impl LogicalPlan {
                     Ok(Some(agg.group_expr.as_slice()[0].clone()))
                 }
             }
-            LogicalPlan::Filter(filter) => filter.input.head_output_expr(),
-            LogicalPlan::Distinct(distinct) => distinct.input.head_output_expr(),
-            LogicalPlan::Sort(sort) => sort.input.head_output_expr(),
-            LogicalPlan::Limit(limit) => limit.input.head_output_expr(),
+            LogicalPlan::Filter(Filter { input, .. })
+            | LogicalPlan::Distinct(Distinct { input, .. })
+            | LogicalPlan::Sort(Sort { input, .. })
+            | LogicalPlan::Limit(Limit { input, .. })
+            | LogicalPlan::Repartition(Repartition { input, .. })
+            | LogicalPlan::Window(Window { input, .. }) => input.head_output_expr(),
             LogicalPlan::Join(Join {
                 left,
                 right,
@@ -442,8 +445,6 @@ impl LogicalPlan {
                     cross.left.head_output_expr()
                 }
             }
-            LogicalPlan::Repartition(repartition) => repartition.input.head_output_expr(),
-            LogicalPlan::Window(window) => window.input.head_output_expr(),
             LogicalPlan::Union(union) => Ok(Some(Expr::Column(
                 union.schema.fields()[0].qualified_column(),
             ))),

From df94f18291e490eda3aea94c320338f846a4fd42 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Thu, 1 Jun 2023 10:53:13 +0800
Subject: [PATCH 07/13] refine the count bug handling logic

---
 datafusion/core/tests/sql/subqueries.rs       | 225 ++++++++
 datafusion/optimizer/src/decorrelate.rs       | 505 ++++++++++++++++++
 .../src/decorrelate_predicate_subquery.rs     |   9 +-
 datafusion/optimizer/src/lib.rs               |   1 +
 .../optimizer/src/scalar_subquery_to_join.rs  | 111 ++--
 datafusion/optimizer/src/utils.rs             | 353 +-----------
 6 files changed, 803 insertions(+), 401 deletions(-)
 create mode 100644 datafusion/optimizer/src/decorrelate.rs

diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index f3da709f88b86..d5d136c6f8339 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -1359,6 +1359,94 @@ async fn correlated_scalar_subquery_sum_agg_bug() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_with_having() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE t2.t2_int = t1.t1_int having count(*) >1) from t1";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // the having condition is kept as the normal filter condition, no need to pull up
+    let expected = vec![
+        "Projection: t1.t1_id, __scalar_sq_1.cnt_plus_2 AS cnt_plus_2 [t1_id:UInt32;N, cnt_plus_2:Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, cnt_plus_2:Int64;N, t2_int:UInt32;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [cnt_plus_2:Int64;N, t2_int:UInt32;N]",
+        "      Projection: COUNT(UInt8(1)) + Int64(2) AS cnt_plus_2, t2.t2_int [cnt_plus_2:Int64;N, t2_int:UInt32;N]",
+        "        Filter: COUNT(UInt8(1)) > Int64(1) [t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N]",
+        "          Projection: t2.t2_int, COUNT(UInt8(1)) [t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N]",
+        "            Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+------------+",
+        "| t1_id | cnt_plus_2 |",
+        "+-------+------------+",
+        "| 11    |            |",
+        "| 22    |            |",
+        "| 33    | 5          |",
+        "| 44    |            |",
+        "+-------+------------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_with_pull_up_having() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE t2.t2_int = t1.t1_int having count(*) = 0) from t1";
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // the having condition need to pull up and evaluated after the left out join
+    let expected = vec![
+        "Projection: t1.t1_id, CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) AS cnt_plus_2 WHEN __scalar_sq_1.COUNT(UInt8(1)) != Int64(0) THEN NULL ELSE __scalar_sq_1.cnt_plus_2 END AS cnt_plus_2 [t1_id:UInt32;N, cnt_plus_2:Int64;N]",
+        "  Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_id:UInt32;N, t1_int:UInt32;N, cnt_plus_2:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "    TableScan: t1 projection=[t1_id, t1_int] [t1_id:UInt32;N, t1_int:UInt32;N]",
+        "    SubqueryAlias: __scalar_sq_1 [cnt_plus_2:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean]",
+        "      Projection: COUNT(UInt8(1)) + Int64(2) AS cnt_plus_2, t2.t2_int, COUNT(UInt8(1)), __always_true [cnt_plus_2:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean]",
+        "        Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "          TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+-------+------------+",
+        "| t1_id | cnt_plus_2 |",
+        "+-------+------------+",
+        "| 11    |            |",
+        "| 22    | 2          |",
+        "| 33    |            |",
+        "| 44    | 2          |",
+        "+-------+------------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn correlated_scalar_subquery_count_agg_in_having() -> Result<()> {
     let ctx = create_join_context("t1_id", "t2_id", true)?;
@@ -1402,3 +1490,140 @@ async fn correlated_scalar_subquery_count_agg_in_having() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_in_nested_projection() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 where (select cnt from (select count(*) as cnt, sum(t2_int) from t2 where t1.t1_int = t2.t2_int)) = 0";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.cnt END = Int64(0) [t1_int:UInt32;N, cnt:Int64;N, __always_true:Boolean;N]",
+        "    Projection: t1.t1_int, __scalar_sq_1.cnt, __scalar_sq_1.__always_true [t1_int:UInt32;N, cnt:Int64;N, __always_true:Boolean;N]",
+        "      Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_int:UInt32;N, cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "        TableScan: t1 projection=[t1_int] [t1_int:UInt32;N]",
+        "        SubqueryAlias: __scalar_sq_1 [cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "          Projection: COUNT(UInt8(1)) AS cnt, t2.t2_int, __always_true [cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "            Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_in_nested_subquery() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 where \
+                        (select cnt_plus_one + 1 as cnt_plus_two from \
+                            (select cnt + 1 as cnt_plus_one from \
+                                (select count(*) as cnt, sum(t2_int) s from t2 where t1.t1_int = t2.t2_int having cnt = 0)\
+                            )\
+                        ) = 2";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    // pull up the deeply nested having condition
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(2) WHEN __scalar_sq_1.COUNT(UInt8(1)) != Int64(0) THEN NULL ELSE __scalar_sq_1.cnt_plus_two END = Int64(2) [t1_int:UInt32;N, cnt_plus_two:Int64;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "    Projection: t1.t1_int, __scalar_sq_1.cnt_plus_two, __scalar_sq_1.COUNT(UInt8(1)), __scalar_sq_1.__always_true [t1_int:UInt32;N, cnt_plus_two:Int64;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "      Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_int:UInt32;N, cnt_plus_two:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean;N]",
+        "        TableScan: t1 projection=[t1_int] [t1_int:UInt32;N]",
+        "        SubqueryAlias: __scalar_sq_1 [cnt_plus_two:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean]",
+        "          Projection: COUNT(UInt8(1)) + Int64(1) + Int64(1) AS cnt_plus_two, t2.t2_int, COUNT(UInt8(1)), __always_true [cnt_plus_two:Int64;N, t2_int:UInt32;N, COUNT(UInt8(1)):Int64;N, __always_true:Boolean]",
+        "            Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn correlated_scalar_subquery_count_agg_in_case_when() -> Result<()> {
+    let ctx = create_join_context("t1_id", "t2_id", true)?;
+
+    let sql = "select t1.t1_int from t1 where \
+                    (select case when count(*) = 1 then null else count(*) end as cnt from t2 where t2.t2_int = t1.t1_int)\
+                    = 0";
+
+    let msg = format!("Creating logical plan for '{sql}'");
+    let dataframe = ctx.sql(sql).await.expect(&msg);
+    let plan = dataframe.into_optimized_plan()?;
+
+    let expected = vec![
+        "Projection: t1.t1_int [t1_int:UInt32;N]",
+        "  Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.cnt END = Int64(0) [t1_int:UInt32;N, cnt:Int64;N, __always_true:Boolean;N]",
+        "    Projection: t1.t1_int, __scalar_sq_1.cnt, __scalar_sq_1.__always_true [t1_int:UInt32;N, cnt:Int64;N, __always_true:Boolean;N]",
+        "      Left Join: t1.t1_int = __scalar_sq_1.t2_int [t1_int:UInt32;N, cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean;N]",
+        "        TableScan: t1 projection=[t1_int] [t1_int:UInt32;N]",
+        "        SubqueryAlias: __scalar_sq_1 [cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "          Projection: CASE WHEN COUNT(UInt8(1)) = Int64(1) THEN Int64(NULL) ELSE COUNT(UInt8(1)) END AS cnt, t2.t2_int, __always_true [cnt:Int64;N, t2_int:UInt32;N, __always_true:Boolean]",
+        "            Aggregate: groupBy=[[t2.t2_int, Boolean(true) AS __always_true]], aggr=[[COUNT(UInt8(1))]] [t2_int:UInt32;N, __always_true:Boolean, COUNT(UInt8(1)):Int64;N]",
+        "              TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
+    ];
+    let formatted = plan.display_indent_schema().to_string();
+    let actual: Vec<&str> = formatted.trim().lines().collect();
+    assert_eq!(
+        expected, actual,
+        "\n\nexpected:\n\n{expected:#?}\nactual:\n\n{actual:#?}\n\n"
+    );
+
+    // assert data
+    let results = execute_to_batches(&ctx, sql).await;
+    let expected = vec![
+        "+--------+",
+        "| t1_int |",
+        "+--------+",
+        "| 2      |",
+        "| 4      |",
+        "+--------+",
+    ];
+    assert_batches_sorted_eq!(expected, &results);
+
+    Ok(())
+}
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
new file mode 100644
index 0000000000000..3e1a97cf88232
--- /dev/null
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -0,0 +1,505 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::simplify_expressions::{ExprSimplifier, SimplifyContext};
+use crate::utils::{
+    collect_subquery_cols, conjunction, find_join_exprs, split_conjunction,
+};
+use datafusion_common::tree_node::{
+    RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
+};
+use datafusion_common::Result;
+use datafusion_common::{Column, DFSchemaRef, DataFusionError, ScalarValue};
+use datafusion_expr::expr_rewriter::unnormalize_col;
+use datafusion_expr::{
+    expr, BinaryExpr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder, Operator,
+};
+use datafusion_physical_expr::execution_props::ExecutionProps;
+use std::collections::{BTreeSet, HashMap};
+use std::ops::Deref;
+
+/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
+/// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
+pub struct PullUpCorrelatedExpr {
+    pub join_filters: Vec<Expr>,
+    // mapping from the plan to its holding correlated columns
+    pub correlated_subquery_cols_map: HashMap<LogicalPlan, BTreeSet<Column>>,
+    pub in_predicate_opt: Option<Expr>,
+    // indicate whether it is Exists(Not Exists) SubQuery
+    pub exists_sub_query: bool,
+    // indicate whether the correlated expressions can pull up or not
+    pub can_pull_up: bool,
+    // indicate whether need to handle the Count bug during the pull up process
+    pub need_handle_count_bug: bool,
+    // mapping from the plan to its expressions' evaluation result on empty batch
+    pub collected_count_expr_map: HashMap<LogicalPlan, ExprResultMap>,
+    // pull up having expr, which must be evaluated after the Join
+    pub pull_up_having_expr: Option<Expr>,
+}
+
+/// Used to indicate the unmatched rows from the inner(subquery) table after the left out Join
+/// This is used to handle the Count bug
+pub const UN_MATCHED_ROW_INDICATOR: &str = "__always_true";
+
+/// Mapping from expr display name to its evaluation result on empty record batch (for example: 'count(*)' is 'ScalarValue(0)', 'count(*) + 2' is 'ScalarValue(2)')
+pub type ExprResultMap = HashMap<String, Expr>;
+
+impl TreeNodeRewriter for PullUpCorrelatedExpr {
+    type N = LogicalPlan;
+
+    fn pre_visit(&mut self, plan: &LogicalPlan) -> Result<RewriteRecursion> {
+        match plan {
+            LogicalPlan::Filter(_) => Ok(RewriteRecursion::Continue),
+            LogicalPlan::Union(_) | LogicalPlan::Sort(_) | LogicalPlan::Extension(_) => {
+                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
+                if plan_hold_outer {
+                    // the unsupported case
+                    self.can_pull_up = false;
+                    Ok(RewriteRecursion::Stop)
+                } else {
+                    Ok(RewriteRecursion::Continue)
+                }
+            }
+            LogicalPlan::Limit(_) => {
+                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
+                match (self.exists_sub_query, plan_hold_outer) {
+                    (false, true) => {
+                        // the unsupported case
+                        self.can_pull_up = false;
+                        Ok(RewriteRecursion::Stop)
+                    }
+                    _ => Ok(RewriteRecursion::Continue),
+                }
+            }
+            _ if plan.expressions().iter().any(|expr| expr.contains_outer()) => {
+                // the unsupported cases, the plan expressions contain out reference columns(like window expressions)
+                self.can_pull_up = false;
+                Ok(RewriteRecursion::Stop)
+            }
+            _ => Ok(RewriteRecursion::Continue),
+        }
+    }
+
+    fn mutate(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
+        let subquery_schema = plan.schema().clone();
+        match &plan {
+            LogicalPlan::Filter(plan_filter) => {
+                let subquery_filter_exprs = split_conjunction(&plan_filter.predicate);
+                let (mut join_filters, subquery_filters) =
+                    find_join_exprs(subquery_filter_exprs)?;
+                if let Some(in_predicate) = &self.in_predicate_opt {
+                    // in_predicate may be already included in the join filters, remove it from the join filters first.
+                    join_filters = remove_duplicated_filter(join_filters, in_predicate);
+                }
+                let correlated_subquery_cols =
+                    collect_subquery_cols(&join_filters, subquery_schema)?;
+                for expr in join_filters {
+                    if !self.join_filters.contains(&expr) {
+                        self.join_filters.push(expr)
+                    }
+                }
+
+                let mut expr_result_map_for_count_bug = HashMap::new();
+                let pull_up_expr_opt = if let Some(expr_result_map) =
+                    self.collected_count_expr_map.get(plan_filter.input.deref())
+                {
+                    if let Some(expr) = conjunction(subquery_filters.clone()) {
+                        filter_exprs_evaluation_result_on_empty_batch(
+                            &expr,
+                            plan_filter.input.schema().clone(),
+                            expr_result_map,
+                            &mut expr_result_map_for_count_bug,
+                        )?
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                };
+
+                match (&pull_up_expr_opt, &self.pull_up_having_expr) {
+                    (Some(_), Some(_)) => {
+                        // Error path
+                        Err(DataFusionError::Plan(
+                            "Unsupported Subquery plan".to_string(),
+                        ))
+                    }
+                    (Some(_), None) => {
+                        self.pull_up_having_expr = pull_up_expr_opt;
+                        let new_plan =
+                            LogicalPlanBuilder::from((*plan_filter.input).clone())
+                                .build()?;
+                        self.correlated_subquery_cols_map
+                            .insert(new_plan.clone(), correlated_subquery_cols);
+                        Ok(new_plan)
+                    }
+                    (None, _) => {
+                        // if the subquery still has filter expressions, restore them.
+                        let mut plan =
+                            LogicalPlanBuilder::from((*plan_filter.input).clone());
+                        if let Some(expr) = conjunction(subquery_filters) {
+                            plan = plan.filter(expr)?
+                        }
+                        let new_plan = plan.build()?;
+                        self.correlated_subquery_cols_map
+                            .insert(new_plan.clone(), correlated_subquery_cols);
+                        Ok(new_plan)
+                    }
+                }
+            }
+            LogicalPlan::Projection(projection)
+                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
+            {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                // add missing columns to Projection
+                let mut missing_exprs =
+                    self.collect_missing_exprs(&projection.expr, &local_correlated_cols)?;
+
+                let mut expr_result_map_for_count_bug = HashMap::new();
+                if let Some(expr_result_map) =
+                    self.collected_count_expr_map.get(projection.input.deref())
+                {
+                    proj_exprs_evaluation_result_on_empty_batch(
+                        &projection.expr,
+                        projection.input.schema().clone(),
+                        expr_result_map,
+                        &mut expr_result_map_for_count_bug,
+                    )?;
+                    if !expr_result_map_for_count_bug.is_empty() {
+                        // has count bug
+                        let un_matched_row = Expr::Column(Column::new_unqualified(
+                            UN_MATCHED_ROW_INDICATOR.to_string(),
+                        ));
+                        // add the unmatched rows indicator to the Projection expressions
+                        missing_exprs.push(un_matched_row);
+                    }
+                }
+
+                let new_plan = LogicalPlanBuilder::from((*projection.input).clone())
+                    .project(missing_exprs)?
+                    .build()?;
+                if !expr_result_map_for_count_bug.is_empty() {
+                    self.collected_count_expr_map
+                        .insert(new_plan.clone(), expr_result_map_for_count_bug);
+                }
+                Ok(new_plan)
+            }
+            LogicalPlan::Aggregate(aggregate)
+                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
+            {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                // add missing columns to Aggregation's group expressions
+                let mut missing_exprs = self.collect_missing_exprs(
+                    &aggregate.group_expr,
+                    &local_correlated_cols,
+                )?;
+
+                // if the original group expressions are empty, need to handle the Count bug
+                let mut expr_result_map_for_count_bug = HashMap::new();
+                if self.need_handle_count_bug
+                    && aggregate.group_expr.is_empty()
+                    && !missing_exprs.is_empty()
+                {
+                    agg_exprs_evaluation_result_on_empty_batch(
+                        &aggregate.aggr_expr,
+                        aggregate.input.schema().clone(),
+                        &mut expr_result_map_for_count_bug,
+                    )?;
+                    if !expr_result_map_for_count_bug.is_empty() {
+                        // has count bug
+                        let un_matched_row = Expr::Alias(
+                            Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)))),
+                            UN_MATCHED_ROW_INDICATOR.to_string(),
+                        );
+                        // add the unmatched rows indicator to the Aggregation's group expressions
+                        missing_exprs.push(un_matched_row);
+                    }
+                }
+                let new_plan = LogicalPlanBuilder::from((*aggregate.input).clone())
+                    .aggregate(missing_exprs, aggregate.aggr_expr.to_vec())?
+                    .build()?;
+                if !expr_result_map_for_count_bug.is_empty() {
+                    self.collected_count_expr_map
+                        .insert(new_plan.clone(), expr_result_map_for_count_bug);
+                }
+                Ok(new_plan)
+            }
+            LogicalPlan::SubqueryAlias(alias) => {
+                let mut local_correlated_cols = BTreeSet::new();
+                collect_local_correlated_cols(
+                    &plan,
+                    &self.correlated_subquery_cols_map,
+                    &mut local_correlated_cols,
+                );
+                let mut new_correlated_cols = BTreeSet::new();
+                for col in local_correlated_cols.iter() {
+                    new_correlated_cols
+                        .insert(Column::new(Some(alias.alias.clone()), col.name.clone()));
+                }
+                self.correlated_subquery_cols_map
+                    .insert(plan.clone(), new_correlated_cols);
+                Ok(plan)
+            }
+            LogicalPlan::Limit(limit) => {
+                // handling the limit clause in the subquery
+                match (self.exists_sub_query, self.join_filters.is_empty()) {
+                    // un-correlated exist subquery, keep the limit
+                    (true, true) => Ok(plan),
+                    // Correlated exist subquery, remove the limit(so that correlated expressions can pull up)
+                    (true, false) => {
+                        if limit.fetch.filter(|limit_row| *limit_row == 0).is_some() {
+                            Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                                produce_one_row: false,
+                                schema: limit.input.schema().clone(),
+                            }))
+                        } else {
+                            LogicalPlanBuilder::from((*limit.input).clone()).build()
+                        }
+                    }
+                    _ => Ok(plan),
+                }
+            }
+            _ => Ok(plan),
+        }
+    }
+}
+
+impl PullUpCorrelatedExpr {
+    fn collect_missing_exprs(
+        &self,
+        exprs: &[Expr],
+        correlated_subquery_cols: &BTreeSet<Column>,
+    ) -> Result<Vec<Expr>> {
+        let mut missing_exprs = vec![];
+        if let Some(Expr::BinaryExpr(BinaryExpr {
+            left: _,
+            op: Operator::Eq,
+            right,
+        })) = &self.in_predicate_opt
+        {
+            if !matches!(right.deref(), Expr::Column(_))
+                && !matches!(right.deref(), Expr::Literal(_))
+                && !matches!(right.deref(), Expr::Alias(_, _))
+            {
+                let alias_expr = right
+                    .deref()
+                    .clone()
+                    .alias(format!("{:?}", unnormalize_col(right.deref().clone())));
+                missing_exprs.push(alias_expr)
+            }
+        }
+        for expr in exprs {
+            if !missing_exprs.contains(expr) {
+                missing_exprs.push(expr.clone())
+            }
+        }
+        for col in correlated_subquery_cols.iter() {
+            let col_expr = Expr::Column(col.clone());
+            if !missing_exprs.contains(&col_expr) {
+                missing_exprs.push(col_expr)
+            }
+        }
+        if let Some(pull_up_having) = &self.pull_up_having_expr {
+            let filter_apply_columns = pull_up_having.to_columns()?;
+            for col in filter_apply_columns {
+                let col_expr = Expr::Column(col);
+                if !missing_exprs.contains(&col_expr) {
+                    missing_exprs.push(col_expr)
+                }
+            }
+        }
+
+        Ok(missing_exprs)
+    }
+}
+
+fn collect_local_correlated_cols(
+    plan: &LogicalPlan,
+    all_cols_map: &HashMap<LogicalPlan, BTreeSet<Column>>,
+    local_cols: &mut BTreeSet<Column>,
+) {
+    for child in plan.inputs() {
+        if let Some(cols) = all_cols_map.get(child) {
+            local_cols.extend(cols.clone());
+        }
+        // SubqueryAlias is treated as the leaf node
+        if !matches!(child, LogicalPlan::SubqueryAlias(_)) {
+            collect_local_correlated_cols(child, all_cols_map, local_cols);
+        }
+    }
+}
+
+fn remove_duplicated_filter(filters: Vec<Expr>, in_predicate: &Expr) -> Vec<Expr> {
+    filters
+        .into_iter()
+        .filter(|filter| {
+            if filter == in_predicate {
+                return false;
+            }
+
+            // ignore the binary order
+            !match (filter, in_predicate) {
+                (Expr::BinaryExpr(a_expr), Expr::BinaryExpr(b_expr)) => {
+                    (a_expr.op == b_expr.op)
+                        && (a_expr.left == b_expr.left && a_expr.right == b_expr.right)
+                        || (a_expr.left == b_expr.right && a_expr.right == b_expr.left)
+                }
+                _ => false,
+            }
+        })
+        .collect::<Vec<_>>()
+}
+
+fn agg_exprs_evaluation_result_on_empty_batch(
+    agg_expr: &[Expr],
+    schema: DFSchemaRef,
+    expr_result_map_for_count_bug: &mut ExprResultMap,
+) -> Result<()> {
+    for e in agg_expr.iter() {
+        let result_expr = e.clone().transform_up(&|expr| {
+            let new_expr = match expr {
+                Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
+                    fun,
+                    ..
+                }) => {
+                    if matches!(fun, datafusion_expr::AggregateFunction::Count) {
+                        Transformed::Yes(Expr::Literal(ScalarValue::Int64(Some(0))))
+                    } else {
+                        Transformed::Yes(Expr::Literal(ScalarValue::Null))
+                    }
+                }
+                Expr::AggregateUDF(_) => {
+                    Transformed::Yes(Expr::Literal(ScalarValue::Null))
+                }
+                _ => Transformed::No(expr),
+            };
+            Ok(new_expr)
+        })?;
+
+        let props = ExecutionProps::new();
+        let info = SimplifyContext::new(&props).with_schema(schema.clone());
+        let simplifier = ExprSimplifier::new(info);
+        let result_expr = simplifier.simplify(result_expr)?;
+        if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) {
+            expr_result_map_for_count_bug.insert(e.display_name()?, result_expr);
+        }
+    }
+    Ok(())
+}
+
+fn proj_exprs_evaluation_result_on_empty_batch(
+    proj_expr: &[Expr],
+    schema: DFSchemaRef,
+    input_expr_result_map_for_count_bug: &ExprResultMap,
+    expr_result_map_for_count_bug: &mut ExprResultMap,
+) -> Result<()> {
+    for expr in proj_expr.iter() {
+        let result_expr = expr.clone().transform_up(&|expr| {
+            if let Expr::Column(Column { name, .. }) = &expr {
+                if let Some(result_expr) = input_expr_result_map_for_count_bug.get(name) {
+                    Ok(Transformed::Yes(result_expr.clone()))
+                } else {
+                    Ok(Transformed::No(expr))
+                }
+            } else {
+                Ok(Transformed::No(expr))
+            }
+        })?;
+        if result_expr.ne(expr) {
+            let props = ExecutionProps::new();
+            let info = SimplifyContext::new(&props).with_schema(schema.clone());
+            let simplifier = ExprSimplifier::new(info);
+            let result_expr = simplifier.simplify(result_expr)?;
+            let expr_name = match expr {
+                Expr::Alias(_, alias) => alias.to_string(),
+                Expr::Column(Column { relation: _, name }) => name.to_string(),
+                _ => expr.display_name()?,
+            };
+            expr_result_map_for_count_bug.insert(expr_name, result_expr);
+        }
+    }
+    Ok(())
+}
+
+fn filter_exprs_evaluation_result_on_empty_batch(
+    filter_expr: &Expr,
+    schema: DFSchemaRef,
+    input_expr_result_map_for_count_bug: &ExprResultMap,
+    expr_result_map_for_count_bug: &mut ExprResultMap,
+) -> Result<Option<Expr>> {
+    let result_expr = filter_expr.clone().transform_up(&|expr| {
+        if let Expr::Column(Column { name, .. }) = &expr {
+            if let Some(result_expr) = input_expr_result_map_for_count_bug.get(name) {
+                Ok(Transformed::Yes(result_expr.clone()))
+            } else {
+                Ok(Transformed::No(expr))
+            }
+        } else {
+            Ok(Transformed::No(expr))
+        }
+    })?;
+    let pull_up_expr = if result_expr.ne(filter_expr) {
+        let props = ExecutionProps::new();
+        let info = SimplifyContext::new(&props).with_schema(schema);
+        let simplifier = ExprSimplifier::new(info);
+        let result_expr = simplifier.simplify(result_expr)?;
+        match &result_expr {
+            // evaluate to false or null on empty batch, no need to pull up
+            Expr::Literal(ScalarValue::Null)
+            | Expr::Literal(ScalarValue::Boolean(Some(false))) => None,
+            // evaluate to true on empty batch, need to pull up the expr
+            Expr::Literal(ScalarValue::Boolean(Some(true))) => {
+                for (name, exprs) in input_expr_result_map_for_count_bug {
+                    expr_result_map_for_count_bug.insert(name.clone(), exprs.clone());
+                }
+                Some(filter_expr.clone())
+            }
+            // can not evaluate statically
+            _ => {
+                for input_expr in input_expr_result_map_for_count_bug.values() {
+                    let new_expr = Expr::Case(expr::Case {
+                        expr: None,
+                        when_then_expr: vec![(
+                            Box::new(result_expr.clone()),
+                            Box::new(input_expr.clone()),
+                        )],
+                        else_expr: Some(Box::new(Expr::Literal(ScalarValue::Null))),
+                    });
+                    expr_result_map_for_count_bug
+                        .insert(new_expr.display_name()?, new_expr);
+                }
+                None
+            }
+        }
+    } else {
+        for (name, exprs) in input_expr_result_map_for_count_bug {
+            expr_result_map_for_count_bug.insert(name.clone(), exprs.clone());
+        }
+        None
+    };
+    Ok(pull_up_expr)
+}
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 449bec48b051f..cda921c188429 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -16,10 +16,9 @@
 // under the License.
 
 use crate::alias::AliasGenerator;
+use crate::decorrelate::PullUpCorrelatedExpr;
 use crate::optimizer::ApplyOrder;
-use crate::utils::{
-    conjunction, replace_qualified_name, split_conjunction, PullUpCorrelatedExpr,
-};
+use crate::utils::{conjunction, replace_qualified_name, split_conjunction};
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{Column, DataFusionError, Result};
@@ -223,9 +222,9 @@ fn build_join(
         in_predicate_opt: in_predicate_opt.clone(),
         exists_sub_query: in_predicate_opt.is_none(),
         can_pull_up: true,
-        need_collect_count_expr_map: false,
+        need_handle_count_bug: false,
         collected_count_expr_map: Default::default(),
-        expr_check_map: Default::default(),
+        pull_up_having_expr: None,
     };
     let new_plan = subquery.clone().rewrite(&mut pull_up)?;
     if !pull_up.can_pull_up {
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 2af5edbd9fd34..ec971b7fbd622 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -18,6 +18,7 @@
 pub mod alias;
 pub mod analyzer;
 pub mod common_subexpr_eliminate;
+pub mod decorrelate;
 pub mod decorrelate_predicate_subquery;
 pub mod eliminate_cross_join;
 pub mod eliminate_duplicated_expr;
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 46ad5f2a80fac..97cc25768db17 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -16,15 +16,14 @@
 // under the License.
 
 use crate::alias::AliasGenerator;
+use crate::decorrelate::{PullUpCorrelatedExpr, UN_MATCHED_ROW_INDICATOR};
 use crate::optimizer::ApplyOrder;
-use crate::utils::{
-    conjunction, replace_qualified_name, ExprCheckMap, PullUpCorrelatedExpr,
-};
+use crate::utils::{conjunction, replace_qualified_name};
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::{
     RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
 };
-use datafusion_common::{Column, DataFusionError, Result};
+use datafusion_common::{Column, DataFusionError, Result, ScalarValue};
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
 use std::collections::{BTreeSet, HashMap};
@@ -82,30 +81,16 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 let mut cur_input = filter.input.as_ref().clone();
                 for (subquery, alias) in subqueries {
                     if let Some((optimized_subquery, expr_check_map)) =
-                        build_join(&subquery, &cur_input, &alias, true)?
+                        build_join(&subquery, &cur_input, &alias)?
                     {
                         if !expr_check_map.is_empty() {
                             rewrite_expr =
                                 rewrite_expr.clone().transform_up(&|expr| {
                                     if let Expr::Column(col) = &expr {
-                                        if let Some((expr1, expr2)) =
+                                        if let Some(map_expr) =
                                             expr_check_map.get(&col.name)
                                         {
-                                            let new_expr = Expr::Case(expr::Case {
-                                                expr: None,
-                                                when_then_expr: vec![(
-                                                    Box::new(Expr::IsNull(Box::new(
-                                                        Expr::Column(
-                                                            Column::new_unqualified(
-                                                                "__always_true",
-                                                            ),
-                                                        ),
-                                                    ))),
-                                                    Box::new(expr2.clone()),
-                                                )],
-                                                else_expr: Some(Box::new(expr1.clone())),
-                                            });
-                                            Ok(Transformed::Yes(new_expr))
+                                            Ok(Transformed::Yes(map_expr.clone()))
                                         } else {
                                             Ok(Transformed::No(expr))
                                         }
@@ -146,7 +131,7 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                 let mut cur_input = projection.input.as_ref().clone();
                 for (subquery, alias) in all_subqueryies {
                     if let Some((optimized_subquery, expr_check_map)) =
-                        build_join(&subquery, &cur_input, &alias, true)?
+                        build_join(&subquery, &cur_input, &alias)?
                     {
                         cur_input = optimized_subquery;
                         if !expr_check_map.is_empty() {
@@ -154,29 +139,20 @@ impl OptimizerRule for ScalarSubqueryToJoin {
                                 if let Some(rewrite_expr) =
                                     expr_to_rewrite_expr_map.get(expr)
                                 {
-                                    let new_expr = rewrite_expr.clone().transform_up(&|expr| {
-                                        if let Expr::Column(col) = &expr {
-                                            if let Some((expr1, expr2)) = expr_check_map.get(&col.name)
-                                            {
-                                                let new_expr = Expr::Case(expr::Case {
-                                                    expr: None,
-                                                    when_then_expr: vec![(
-                                                        Box::new(Expr::IsNull(Box::new(
-                                                            Expr::Column(Column::new_unqualified("__always_true")),
-                                                        ))),
-                                                        Box::new(expr2.clone()),
-                                                    )],
-                                                    else_expr: Some(Box::new(expr1.clone())),
-                                                });
-                                                Ok(Transformed::Yes(new_expr))
+                                    let new_expr =
+                                        rewrite_expr.clone().transform_up(&|expr| {
+                                            if let Expr::Column(col) = &expr {
+                                                if let Some(map_expr) =
+                                                    expr_check_map.get(&col.name)
+                                                {
+                                                    Ok(Transformed::Yes(map_expr.clone()))
+                                                } else {
+                                                    Ok(Transformed::No(expr))
+                                                }
                                             } else {
                                                 Ok(Transformed::No(expr))
                                             }
-                                        } else {
-                                            Ok(Transformed::No(expr))
-                                        }
-
-                                    })?;
+                                        })?;
                                     expr_to_rewrite_expr_map.insert(expr, new_expr);
                                 }
                             }
@@ -303,8 +279,7 @@ fn build_join(
     subquery: &Subquery,
     filter_input: &LogicalPlan,
     subquery_alias: &str,
-    need_collect_count_expr_map: bool,
-) -> Result<Option<(LogicalPlan, ExprCheckMap)>> {
+) -> Result<Option<(LogicalPlan, HashMap<String, Expr>)>> {
     let subquery_plan = subquery.subquery.as_ref();
     let mut pull_up = PullUpCorrelatedExpr {
         join_filters: vec![],
@@ -312,15 +287,17 @@ fn build_join(
         in_predicate_opt: None,
         exists_sub_query: false,
         can_pull_up: true,
-        need_collect_count_expr_map,
+        need_handle_count_bug: true,
         collected_count_expr_map: Default::default(),
-        expr_check_map: Default::default(),
+        pull_up_having_expr: None,
     };
     let new_plan = subquery_plan.clone().rewrite(&mut pull_up)?;
     if !pull_up.can_pull_up {
         return Ok(None);
     }
 
+    let collected_count_expr_map =
+        pull_up.collected_count_expr_map.get(&new_plan).cloned();
     let sub_query_alias = LogicalPlanBuilder::from(new_plan)
         .alias(subquery_alias.to_string())?
         .build()?;
@@ -368,7 +345,47 @@ fn build_join(
             )?
             .build()?
     };
-    Ok(Some((new_plan, pull_up.expr_check_map.clone())))
+    let mut computation_project_expr = HashMap::new();
+    if let Some(expr_map) = collected_count_expr_map {
+        for (name, result) in expr_map {
+            let computer_expr = if let Some(filter) = &pull_up.pull_up_having_expr {
+                Expr::Case(expr::Case {
+                    expr: None,
+                    when_then_expr: vec![
+                        (
+                            Box::new(Expr::IsNull(Box::new(Expr::Column(
+                                Column::new_unqualified(UN_MATCHED_ROW_INDICATOR),
+                            )))),
+                            Box::new(result),
+                        ),
+                        (
+                            Box::new(Expr::Not(Box::new(filter.clone()))),
+                            Box::new(Expr::Literal(ScalarValue::Null)),
+                        ),
+                    ],
+                    else_expr: Some(Box::new(Expr::Column(Column::new_unqualified(
+                        name.clone(),
+                    )))),
+                })
+            } else {
+                Expr::Case(expr::Case {
+                    expr: None,
+                    when_then_expr: vec![(
+                        Box::new(Expr::IsNull(Box::new(Expr::Column(
+                            Column::new_unqualified(UN_MATCHED_ROW_INDICATOR),
+                        )))),
+                        Box::new(result),
+                    )],
+                    else_expr: Some(Box::new(Expr::Column(Column::new_unqualified(
+                        name.clone(),
+                    )))),
+                })
+            };
+            computation_project_expr.insert(name, computer_expr);
+        }
+    }
+
+    Ok(Some((new_plan, computation_project_expr)))
 }
 
 #[cfg(test)]
diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs
index 177e9bf4ab545..32ef4e087923d 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -17,28 +17,20 @@
 
 //! Collection of utility functions that are leveraged by the query optimizer rules
 
-use crate::simplify_expressions::{ExprSimplifier, SimplifyContext};
 use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::{
-    RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
-};
-use datafusion_common::{plan_err, Column, DFSchemaRef, ScalarValue};
+use datafusion_common::tree_node::{TreeNode, TreeNodeRewriter};
+use datafusion_common::{plan_err, Column, DFSchemaRef};
 use datafusion_common::{DFSchema, Result};
 use datafusion_expr::expr::{BinaryExpr, Sort};
-use datafusion_expr::expr_rewriter::{
-    replace_col, strip_outer_reference, unnormalize_col,
-};
-use datafusion_expr::logical_plan::LogicalPlanBuilder;
+use datafusion_expr::expr_rewriter::{replace_col, strip_outer_reference};
 use datafusion_expr::utils::from_plan;
 use datafusion_expr::{
     and,
     logical_plan::{Filter, LogicalPlan},
-    AggregateFunction, EmptyRelation, Expr, Operator,
+    Expr, Operator,
 };
-use datafusion_physical_expr::execution_props::ExecutionProps;
 use log::{debug, trace};
 use std::collections::{BTreeSet, HashMap};
-use std::ops::Deref;
 use std::sync::Arc;
 
 /// Convenience rule for writing optimizers: recursively invoke
@@ -393,343 +385,6 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
     trace!("{description}::\n{}\n", plan.display_indent_schema());
 }
 
-/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
-/// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
-pub struct PullUpCorrelatedExpr {
-    pub join_filters: Vec<Expr>,
-    // mapping from the plan to its holding correlated columns
-    pub correlated_subquery_cols_map: HashMap<LogicalPlan, BTreeSet<Column>>,
-    pub in_predicate_opt: Option<Expr>,
-    // indicate whether it is Exists(Not Exists) SubQuery
-    pub exists_sub_query: bool,
-    // indicate whether the correlated expressions can pull up or not
-    pub can_pull_up: bool,
-    // indicate whether the subquery need to collect count expr mapping
-    pub need_collect_count_expr_map: bool,
-    // mapping from expr name to the pair of agg expr and its evaluation result on empty record batch
-    pub collected_count_expr_map: HashMap<String, (Expr, Expr)>,
-    pub expr_check_map: ExprCheckMap,
-}
-
-pub type ExprCheckMap = HashMap<String, (Expr, Expr)>;
-
-impl TreeNodeRewriter for PullUpCorrelatedExpr {
-    type N = LogicalPlan;
-
-    fn pre_visit(&mut self, plan: &LogicalPlan) -> Result<RewriteRecursion> {
-        match plan {
-            LogicalPlan::Filter(_) => Ok(RewriteRecursion::Continue),
-            LogicalPlan::Union(_) | LogicalPlan::Sort(_) | LogicalPlan::Extension(_) => {
-                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
-                if plan_hold_outer {
-                    // the unsupported case
-                    self.can_pull_up = false;
-                    Ok(RewriteRecursion::Stop)
-                } else {
-                    Ok(RewriteRecursion::Continue)
-                }
-            }
-            LogicalPlan::Limit(_) => {
-                let plan_hold_outer = !plan.all_out_ref_exprs().is_empty();
-                match (self.exists_sub_query, plan_hold_outer) {
-                    (false, true) => {
-                        // the unsupported case
-                        self.can_pull_up = false;
-                        Ok(RewriteRecursion::Stop)
-                    }
-                    _ => Ok(RewriteRecursion::Continue),
-                }
-            }
-            _ if plan.expressions().iter().any(|expr| expr.contains_outer()) => {
-                // the unsupported cases, the plan expressions contain out reference columns(like window expressions)
-                self.can_pull_up = false;
-                Ok(RewriteRecursion::Stop)
-            }
-            _ => Ok(RewriteRecursion::Continue),
-        }
-    }
-
-    fn mutate(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
-        let subquery_schema = plan.schema().clone();
-        match &plan {
-            LogicalPlan::Filter(plan_filter) => {
-                let subquery_filter_exprs = split_conjunction(&plan_filter.predicate);
-                let (mut join_filters, subquery_filters) =
-                    find_join_exprs(subquery_filter_exprs)?;
-                if let Some(in_predicate) = &self.in_predicate_opt {
-                    // in_predicate may be already included in the join filters, remove it from the join filters first.
-                    join_filters = remove_duplicated_filter(join_filters, in_predicate);
-                }
-                let correlated_subquery_cols =
-                    collect_subquery_cols(&join_filters, subquery_schema)?;
-                for expr in join_filters {
-                    if !self.join_filters.contains(&expr) {
-                        self.join_filters.push(expr)
-                    }
-                }
-                // if the subquery still has filter expressions, restore them.
-                let mut plan = LogicalPlanBuilder::from((*plan_filter.input).clone());
-                if let Some(expr) = conjunction(subquery_filters) {
-                    plan = plan.filter(expr)?
-                }
-                let new_plan = plan.build()?;
-                self.correlated_subquery_cols_map
-                    .insert(new_plan.clone(), correlated_subquery_cols);
-                Ok(new_plan)
-            }
-            LogicalPlan::Projection(projection)
-                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
-            {
-                let mut local_correlated_cols = BTreeSet::new();
-                collect_local_correlated_cols(
-                    &plan,
-                    &self.correlated_subquery_cols_map,
-                    &mut local_correlated_cols,
-                );
-                // add missing columns to Projection
-                let mut missing_exprs =
-                    self.collect_missing_exprs(&projection.expr, &local_correlated_cols)?;
-                if !self.collected_count_expr_map.is_empty() {
-                    let head_expr = missing_exprs.get(0);
-                    if let Some(expr) = head_expr {
-                        let result_expr = expr.clone().transform_up(&|expr| {
-                            if let Expr::Column(Column { name, .. }) = &expr {
-                                if let Some((_, result_expr)) =
-                                    self.collected_count_expr_map.get(name)
-                                {
-                                    Ok(Transformed::Yes(result_expr.clone()))
-                                } else {
-                                    Ok(Transformed::No(expr))
-                                }
-                            } else {
-                                Ok(Transformed::No(expr))
-                            }
-                        })?;
-                        let scalar_expr = match expr {
-                            Expr::Alias(_, alias) => (
-                                alias.to_string(),
-                                Expr::Column(Column::new_unqualified(alias)),
-                            ),
-                            Expr::Column(Column { relation: _, name }) => {
-                                (name.to_string(), expr.clone())
-                            }
-                            _ => {
-                                let scalar_column = expr.display_name()?;
-                                (
-                                    scalar_column.clone(),
-                                    Expr::Column(Column::new_unqualified(scalar_column)),
-                                )
-                            }
-                        };
-                        self.expr_check_map
-                            .insert(scalar_expr.0, (scalar_expr.1, result_expr));
-                        missing_exprs.push(Expr::Column(Column::new_unqualified(
-                            "__always_true".to_string(),
-                        )));
-                    }
-                }
-
-                let new_plan = LogicalPlanBuilder::from((*projection.input).clone())
-                    .project(missing_exprs)?
-                    .build()?;
-                Ok(new_plan)
-            }
-            LogicalPlan::Aggregate(aggregate)
-                if self.in_predicate_opt.is_some() || !self.join_filters.is_empty() =>
-            {
-                let mut local_correlated_cols = BTreeSet::new();
-                collect_local_correlated_cols(
-                    &plan,
-                    &self.correlated_subquery_cols_map,
-                    &mut local_correlated_cols,
-                );
-                // add missing columns to Aggregation's group expression
-                let mut missing_exprs = self.collect_missing_exprs(
-                    &aggregate.group_expr,
-                    &local_correlated_cols,
-                )?;
-
-                if self.need_collect_count_expr_map && aggregate.group_expr.is_empty() {
-                    let agg_result_exprs = agg_exprs_eva_result_on_empty_batch(
-                        &aggregate.aggr_expr,
-                        subquery_schema,
-                    )?;
-                    if !missing_exprs.is_empty() {
-                        let scalar_agg = !agg_result_exprs.values().any(|result_expr| {
-                            matches!(result_expr, Expr::Literal(ScalarValue::Null))
-                        });
-                        if scalar_agg {
-                            let internal_always_true_col = Expr::Alias(
-                                Box::new(Expr::Literal(ScalarValue::Boolean(Some(true)))),
-                                "__always_true".to_string(),
-                            );
-                            missing_exprs.push(internal_always_true_col);
-                            for (agg_expr, result_expr_on_empty) in agg_result_exprs {
-                                let agg_expr_name = agg_expr.display_name()?;
-                                self.collected_count_expr_map.insert(
-                                    agg_expr_name,
-                                    (agg_expr, result_expr_on_empty),
-                                );
-                            }
-                        }
-                    }
-                }
-
-                let new_plan = LogicalPlanBuilder::from((*aggregate.input).clone())
-                    .aggregate(missing_exprs, aggregate.aggr_expr.to_vec())?
-                    .build()?;
-                Ok(new_plan)
-            }
-            LogicalPlan::SubqueryAlias(alias) => {
-                let mut local_correlated_cols = BTreeSet::new();
-                collect_local_correlated_cols(
-                    &plan,
-                    &self.correlated_subquery_cols_map,
-                    &mut local_correlated_cols,
-                );
-                let mut new_correlated_cols = BTreeSet::new();
-                for col in local_correlated_cols.iter() {
-                    new_correlated_cols
-                        .insert(Column::new(Some(alias.alias.clone()), col.name.clone()));
-                }
-                self.correlated_subquery_cols_map
-                    .insert(plan.clone(), new_correlated_cols);
-                Ok(plan)
-            }
-            LogicalPlan::Limit(limit) => {
-                // handling the limit clause in the subquery
-                match (self.exists_sub_query, self.join_filters.is_empty()) {
-                    // un-correlated exist subquery, keep the limit
-                    (true, true) => Ok(plan),
-                    // Correlated exist subquery, remove the limit(so that correlated expressions can pull up)
-                    (true, false) => {
-                        if limit.fetch.filter(|limit_row| *limit_row == 0).is_some() {
-                            Ok(LogicalPlan::EmptyRelation(EmptyRelation {
-                                produce_one_row: false,
-                                schema: limit.input.schema().clone(),
-                            }))
-                        } else {
-                            LogicalPlanBuilder::from((*limit.input).clone()).build()
-                        }
-                    }
-                    _ => Ok(plan),
-                }
-            }
-            _ => Ok(plan),
-        }
-    }
-}
-
-impl PullUpCorrelatedExpr {
-    fn collect_missing_exprs(
-        &self,
-        exprs: &[Expr],
-        correlated_subquery_cols: &BTreeSet<Column>,
-    ) -> Result<Vec<Expr>> {
-        let mut missing_exprs = vec![];
-        if let Some(Expr::BinaryExpr(BinaryExpr {
-            left: _,
-            op: Operator::Eq,
-            right,
-        })) = &self.in_predicate_opt
-        {
-            if !matches!(right.deref(), Expr::Column(_))
-                && !matches!(right.deref(), Expr::Literal(_))
-                && !matches!(right.deref(), Expr::Alias(_, _))
-            {
-                let alias_expr = right
-                    .deref()
-                    .clone()
-                    .alias(format!("{:?}", unnormalize_col(right.deref().clone())));
-                missing_exprs.push(alias_expr)
-            }
-        }
-        for expr in exprs {
-            if !missing_exprs.contains(expr) {
-                missing_exprs.push(expr.clone())
-            }
-        }
-        for col in correlated_subquery_cols.iter() {
-            let col_expr = Expr::Column(col.clone());
-            if !missing_exprs.contains(&col_expr) {
-                missing_exprs.push(col_expr)
-            }
-        }
-        Ok(missing_exprs)
-    }
-}
-
-fn collect_local_correlated_cols(
-    plan: &LogicalPlan,
-    all_cols_map: &HashMap<LogicalPlan, BTreeSet<Column>>,
-    local_cols: &mut BTreeSet<Column>,
-) {
-    for child in plan.inputs() {
-        if let Some(cols) = all_cols_map.get(child) {
-            local_cols.extend(cols.clone());
-        }
-        // SubqueryAlias is treated as the leaf node
-        if !matches!(child, LogicalPlan::SubqueryAlias(_)) {
-            collect_local_correlated_cols(child, all_cols_map, local_cols);
-        }
-    }
-}
-
-fn remove_duplicated_filter(filters: Vec<Expr>, in_predicate: &Expr) -> Vec<Expr> {
-    filters
-        .into_iter()
-        .filter(|filter| {
-            if filter == in_predicate {
-                return false;
-            }
-
-            // ignore the binary order
-            !match (filter, in_predicate) {
-                (Expr::BinaryExpr(a_expr), Expr::BinaryExpr(b_expr)) => {
-                    (a_expr.op == b_expr.op)
-                        && (a_expr.left == b_expr.left && a_expr.right == b_expr.right)
-                        || (a_expr.left == b_expr.right && a_expr.right == b_expr.left)
-                }
-                _ => false,
-            }
-        })
-        .collect::<Vec<_>>()
-}
-
-fn agg_exprs_eva_result_on_empty_batch(
-    agg_expr: &[Expr],
-    schema: DFSchemaRef,
-) -> Result<HashMap<Expr, Expr>> {
-    let mut result_expr_map = HashMap::new();
-    for e in agg_expr.iter() {
-        let new_expr = e.clone().transform_up(&|expr| {
-            let new_expr = match expr {
-                Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction {
-                    fun,
-                    ..
-                }) => {
-                    if matches!(fun, AggregateFunction::Count) {
-                        Transformed::Yes(Expr::Literal(ScalarValue::Int64(Some(0))))
-                    } else {
-                        Transformed::Yes(Expr::Literal(ScalarValue::Null))
-                    }
-                }
-                Expr::AggregateUDF(_) => {
-                    Transformed::Yes(Expr::Literal(ScalarValue::Null))
-                }
-                _ => Transformed::No(expr),
-            };
-            Ok(new_expr)
-        })?;
-
-        let props = ExecutionProps::new();
-        let info = SimplifyContext::new(&props).with_schema(schema.clone());
-        let simplifier = ExprSimplifier::new(info);
-        result_expr_map.insert(e.clone(), simplifier.simplify(new_expr)?);
-    }
-    Ok(result_expr_map)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;

From 75eeb88f745b06e2eb30cda6a026c90acd799b09 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Thu, 1 Jun 2023 11:30:16 +0800
Subject: [PATCH 08/13] tiny fix

---
 datafusion/core/tests/sql/subqueries.rs |  1 -
 datafusion/optimizer/src/decorrelate.rs | 28 ++++++++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/tests/sql/subqueries.rs b/datafusion/core/tests/sql/subqueries.rs
index d5d136c6f8339..30dfdeb3eaaf2 100644
--- a/datafusion/core/tests/sql/subqueries.rs
+++ b/datafusion/core/tests/sql/subqueries.rs
@@ -961,7 +961,6 @@ async fn uncorrelated_scalar_subquery_with_limit0() -> Result<()> {
     let dataframe = ctx.sql(sql).await.expect(&msg);
     let plan = dataframe.into_optimized_plan()?;
 
-    // not de-correlated
     let expected = vec![
         "Projection: t1.t1_id, __scalar_sq_1.t2_id AS t2_id [t1_id:UInt32;N, t2_id:UInt32;N]",
         "  Left Join:  [t1_id:UInt32;N, t2_id:UInt32;N]",
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index 3e1a97cf88232..b40642abd3fd7 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -262,26 +262,40 @@ impl TreeNodeRewriter for PullUpCorrelatedExpr {
                 }
                 self.correlated_subquery_cols_map
                     .insert(plan.clone(), new_correlated_cols);
+                if let Some(input_map) =
+                    self.collected_count_expr_map.get(alias.input.deref())
+                {
+                    self.collected_count_expr_map
+                        .insert(plan.clone(), input_map.clone());
+                }
                 Ok(plan)
             }
             LogicalPlan::Limit(limit) => {
+                let input_expr_map = self
+                    .collected_count_expr_map
+                    .get(limit.input.deref())
+                    .cloned();
                 // handling the limit clause in the subquery
-                match (self.exists_sub_query, self.join_filters.is_empty()) {
-                    // un-correlated exist subquery, keep the limit
-                    (true, true) => Ok(plan),
+                let new_plan = match (self.exists_sub_query, self.join_filters.is_empty())
+                {
                     // Correlated exist subquery, remove the limit(so that correlated expressions can pull up)
                     (true, false) => {
                         if limit.fetch.filter(|limit_row| *limit_row == 0).is_some() {
-                            Ok(LogicalPlan::EmptyRelation(EmptyRelation {
+                            LogicalPlan::EmptyRelation(EmptyRelation {
                                 produce_one_row: false,
                                 schema: limit.input.schema().clone(),
-                            }))
+                            })
                         } else {
-                            LogicalPlanBuilder::from((*limit.input).clone()).build()
+                            LogicalPlanBuilder::from((*limit.input).clone()).build()?
                         }
                     }
-                    _ => Ok(plan),
+                    _ => plan,
+                };
+                if let Some(input_map) = input_expr_map {
+                    self.collected_count_expr_map
+                        .insert(new_plan.clone(), input_map);
                 }
+                Ok(new_plan)
             }
             _ => Ok(plan),
         }

From 717f51dbc7d49316455c2074aa7884e1b19d3729 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Thu, 1 Jun 2023 13:08:49 +0800
Subject: [PATCH 09/13] fix doc

---
 datafusion/optimizer/src/decorrelate.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index b40642abd3fd7..db4a1f2c409a2 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -32,7 +32,7 @@ use datafusion_physical_expr::execution_props::ExecutionProps;
 use std::collections::{BTreeSet, HashMap};
 use std::ops::Deref;
 
-/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's [Filter].
+/// This struct rewrite the sub query plan by pull up the correlated expressions(contains outer reference columns) from the inner subquery's 'Filter'.
 /// It adds the inner reference columns to the 'Projection' or 'Aggregate' of the subquery if they are missing, so that they can be evaluated by the parent operator as the join condition.
 pub struct PullUpCorrelatedExpr {
     pub join_filters: Vec<Expr>,

From a752ee8f351a51606260e37fc8e781ffec3ff74b Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Thu, 1 Jun 2023 17:40:56 +0800
Subject: [PATCH 10/13] fix tpch planning change

---
 .../test_files/tpch/q11.slt.part              | 157 +++++++++---------
 .../test_files/tpch/q15.slt.part              |  53 +++---
 .../test_files/tpch/q17.slt.part              |   8 +-
 .../sqllogictests/test_files/tpch/q2.slt.part |  10 +-
 .../test_files/tpch/q20.slt.part              |   8 +-
 .../test_files/tpch/q22.slt.part              |  75 ++++-----
 6 files changed, 149 insertions(+), 162 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q11.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q11.slt.part
index bc6d166b8680f..b3a462baaa0d3 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q11.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q11.slt.part
@@ -50,92 +50,89 @@ logical_plan
 Limit: skip=0, fetch=10
 --Sort: value DESC NULLS FIRST, fetch=10
 ----Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value
-------Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.__value
---------CrossJoin:
-----------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
-------------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost
---------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-----------------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
-------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
---------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
---------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-----------------Projection: nation.n_nationkey
-------------------Filter: nation.n_name = Utf8("GERMANY")
---------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
-----------SubqueryAlias: __scalar_sq_1
-------------Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15)) AS __value
---------------Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
-----------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost
-------------------Inner Join: supplier.s_nationkey = nation.n_nationkey
---------------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
-----------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-------------------------TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
-------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
---------------------Projection: nation.n_nationkey
-----------------------Filter: nation.n_name = Utf8("GERMANY")
-------------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
+------Inner Join:  Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > __scalar_sq_1.SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)
+--------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
+----------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost
+------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+--------------Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
+----------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost]
+------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+--------------Projection: nation.n_nationkey
+----------------Filter: nation.n_name = Utf8("GERMANY")
+------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
+--------SubqueryAlias: __scalar_sq_1
+----------Projection: CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS Decimal128(38, 15))
+------------Aggregate: groupBy=[[]], aggr=[[SUM(partsupp.ps_supplycost * CAST(partsupp.ps_availqty AS Decimal128(10, 0)))]]
+--------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost
+----------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+------------------Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey
+--------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+----------------------TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost]
+----------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+------------------Projection: nation.n_nationkey
+--------------------Filter: nation.n_name = Utf8("GERMANY")
+----------------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("GERMANY")]
 physical_plan
 GlobalLimitExec: skip=0, fetch=10
---SortExec: fetch=10, expr=[value@1 DESC]
-----ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
-------CoalesceBatchesExec: target_batch_size=8192
---------FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > __value@2
-----------CrossJoinExec
-------------CoalescePartitionsExec
---------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
-----------------CoalesceBatchesExec: target_batch_size=8192
-------------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4
---------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
-----------------------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]
+--SortPreservingMergeExec: [value@1 DESC]
+----SortExec: fetch=10, expr=[value@1 DESC]
+------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
+--------NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: CastExpr { expr: Column { name: "SUM(partsupp.ps_supplycost * partsupp.ps_availqty)", index: 0 }, cast_type: Decimal128(38, 15), cast_options: CastOptions { safe: false, format_options: FormatOptions { safe: true, null: "", date_format: None, datetime_format: None, timestamp_format: None, timestamp_tz_format: None, time_format: None } } }, op: Gt, right: Column { name: "SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)", index: 1 } }
+----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
+------------CoalesceBatchesExec: target_batch_size=8192
+--------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4
+----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
+------------------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost]
+--------------------CoalesceBatchesExec: target_batch_size=8192
+----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]
 ------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })]
-----------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4), input_partitions=4
---------------------------------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey]
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 4), input_partitions=4
+----------------------------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey]
+------------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]
 ----------------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })]
---------------------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------------------RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4
-------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/partsupp.tbl]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], has_header=false
---------------------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------------------RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4
-------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-----------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4
---------------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
+------------------------------------RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4
+--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/partsupp.tbl]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], has_header=false
+----------------------------------CoalesceBatchesExec: target_batch_size=8192
+------------------------------------RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4
+--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4
+----------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
+------------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------------FilterExec: n_name@1 = GERMANY
+----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+----------ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as SUM(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+------------AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
+--------------CoalescePartitionsExec
+----------------AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
+------------------ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]
+--------------------CoalesceBatchesExec: target_batch_size=8192
+----------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4
+----------------------------ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]
+------------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]
+----------------------------------CoalesceBatchesExec: target_batch_size=8192
+------------------------------------RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4
+--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/partsupp.tbl]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false
 ----------------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------------FilterExec: n_name@1 = GERMANY
+------------------------------------RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4
 --------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
-------------ProjectionExec: expr=[CAST(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as __value]
---------------AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
-----------------CoalescePartitionsExec
-------------------AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)]
---------------------ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost]
-----------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })]
---------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 4), input_partitions=4
-------------------------------ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey]
---------------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })]
-------------------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------------------RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4
-----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/partsupp.tbl]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], has_header=false
-------------------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------------------RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 4), input_partitions=4
-----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
---------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4
-------------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
---------------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------------FilterExec: n_name@1 = GERMANY
-------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 4), input_partitions=4
+----------------------------ProjectionExec: expr=[n_nationkey@0 as n_nationkey]
+------------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------------FilterExec: n_name@1 = GERMANY
+----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 
 
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q15.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q15.slt.part
index f7e428dcfb9d6..0406b4f70f7a0 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q15.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q15.slt.part
@@ -52,7 +52,7 @@ order by
 logical_plan
 Sort: supplier.s_suppkey ASC NULLS LAST
 --Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, total_revenue
-----Inner Join: total_revenue = __scalar_sq_3.__value
+----Inner Join: total_revenue = __scalar_sq_3.MAX(total_revenue)
 ------Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, total_revenue
 --------Inner Join: supplier.s_suppkey = supplier_no
 ----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
@@ -63,21 +63,20 @@ Sort: supplier.s_suppkey ASC NULLS LAST
 ------------------Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
 --------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("9496"), lineitem.l_shipdate < Date32("9587")]
 ------SubqueryAlias: __scalar_sq_3
---------Projection: MAX(total_revenue) AS __value
-----------Aggregate: groupBy=[[]], aggr=[[MAX(total_revenue)]]
-------------Projection: revenue0.SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
---------------SubqueryAlias: revenue0
-----------------Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)
-------------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
---------------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
-----------------------Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
-------------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("9496"), lineitem.l_shipdate < Date32("9587")]
+--------Aggregate: groupBy=[[]], aggr=[[MAX(total_revenue)]]
+----------Projection: revenue0.SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
+------------SubqueryAlias: revenue0
+--------------Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)
+----------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+------------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
+--------------------Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587")
+----------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("9496"), lineitem.l_shipdate < Date32("9587")]
 physical_plan
 SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]
 --SortExec: expr=[s_suppkey@0 ASC NULLS LAST]
 ----ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@4 as total_revenue]
 ------CoalesceBatchesExec: target_batch_size=8192
---------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "__value", index: 0 })]
+--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "MAX(total_revenue)", index: 0 })]
 ----------CoalesceBatchesExec: target_batch_size=8192
 ------------RepartitionExec: partitioning=Hash([Column { name: "total_revenue", index: 4 }], 4), input_partitions=4
 --------------ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@5 as total_revenue]
@@ -98,23 +97,21 @@ SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]
 ------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 --------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/lineitem.tbl]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
 ----------CoalesceBatchesExec: target_batch_size=8192
-------------RepartitionExec: partitioning=Hash([Column { name: "__value", index: 0 }], 4), input_partitions=1
---------------ProjectionExec: expr=[MAX(total_revenue)@0 as __value]
-----------------AggregateExec: mode=Final, gby=[], aggr=[MAX(total_revenue)]
-------------------CoalescePartitionsExec
---------------------AggregateExec: mode=Partial, gby=[], aggr=[MAX(total_revenue)]
-----------------------ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as total_revenue]
-------------------------ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
---------------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-----------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 4), input_partitions=4
---------------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-----------------------------------ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
-------------------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------------------FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587
-----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/lineitem.tbl]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
-
+------------RepartitionExec: partitioning=Hash([Column { name: "MAX(total_revenue)", index: 0 }], 4), input_partitions=1
+--------------AggregateExec: mode=Final, gby=[], aggr=[MAX(total_revenue)]
+----------------CoalescePartitionsExec
+------------------AggregateExec: mode=Partial, gby=[], aggr=[MAX(total_revenue)]
+--------------------ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@0 as total_revenue]
+----------------------ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+------------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+--------------------------CoalesceBatchesExec: target_batch_size=8192
+----------------------------RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 4), input_partitions=4
+------------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+--------------------------------ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount]
+----------------------------------CoalesceBatchesExec: target_batch_size=8192
+------------------------------------FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587
+--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/lineitem.tbl]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
 
 
 query ITTTR
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q17.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q17.slt.part
index 522d67811aac9..4f52711f2985d 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q17.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q17.slt.part
@@ -39,7 +39,7 @@ logical_plan
 Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly
 --Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]]
 ----Projection: lineitem.l_extendedprice
-------Inner Join: part.p_partkey = __scalar_sq_5.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_5.__value
+------Inner Join: part.p_partkey = __scalar_sq_5.l_partkey Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < __scalar_sq_5.Float64(0.2) * AVG(lineitem.l_quantity)
 --------Projection: lineitem.l_quantity, lineitem.l_extendedprice, part.p_partkey
 ----------Inner Join: lineitem.l_partkey = part.p_partkey
 ------------TableScan: lineitem projection=[l_partkey, l_quantity, l_extendedprice]
@@ -47,7 +47,7 @@ Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_y
 --------------Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX")
 ----------------TableScan: part projection=[p_partkey, p_brand, p_container], partial_filters=[part.p_brand = Utf8("Brand#23"), part.p_container = Utf8("MED BOX")]
 --------SubqueryAlias: __scalar_sq_5
-----------Projection: lineitem.l_partkey, CAST(Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)) AS __value
+----------Projection: CAST(Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS Decimal128(30, 15)), lineitem.l_partkey
 ------------Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]]
 --------------TableScan: lineitem projection=[l_partkey, l_quantity]
 physical_plan
@@ -57,7 +57,7 @@ ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as av
 ------AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)]
 --------ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice]
 ----------CoalesceBatchesExec: target_batch_size=8192
-------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 2 }, Column { name: "l_partkey", index: 0 })], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < __value@1
+------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 2 }, Column { name: "l_partkey", index: 1 })], filter=CAST(l_quantity@0 AS Decimal128(30, 15)) < Float64(0.2) * AVG(lineitem.l_quantity)@1
 --------------CoalesceBatchesExec: target_batch_size=8192
 ----------------RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 2 }], 4), input_partitions=4
 ------------------ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, p_partkey@3 as p_partkey]
@@ -74,7 +74,7 @@ ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as av
 --------------------------------FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX
 ----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 ------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_brand, p_container], has_header=false
---------------ProjectionExec: expr=[l_partkey@0 as l_partkey, CAST(0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as __value]
+--------------ProjectionExec: expr=[CAST(0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) AS Decimal128(30, 15)) as Float64(0.2) * AVG(lineitem.l_quantity), l_partkey@0 as l_partkey]
 ----------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)]
 ------------------CoalesceBatchesExec: target_batch_size=8192
 --------------------RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 4), input_partitions=4
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q2.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q2.slt.part
index fe125c2b3b0cc..8203642869c07 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q2.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q2.slt.part
@@ -66,7 +66,7 @@ logical_plan
 Limit: skip=0, fetch=10
 --Sort: supplier.s_acctbal DESC NULLS FIRST, nation.n_name ASC NULLS LAST, supplier.s_name ASC NULLS LAST, part.p_partkey ASC NULLS LAST, fetch=10
 ----Projection: supplier.s_acctbal, supplier.s_name, nation.n_name, part.p_partkey, part.p_mfgr, supplier.s_address, supplier.s_phone, supplier.s_comment
-------Inner Join: part.p_partkey = __scalar_sq_7.ps_partkey, partsupp.ps_supplycost = __scalar_sq_7.__value
+------Inner Join: part.p_partkey = __scalar_sq_7.ps_partkey, partsupp.ps_supplycost = __scalar_sq_7.MIN(partsupp.ps_supplycost)
 --------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name
 ----------Inner Join: nation.n_regionkey = region.r_regionkey
 ------------Projection: part.p_partkey, part.p_mfgr, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_acctbal, supplier.s_comment, partsupp.ps_supplycost, nation.n_name, nation.n_regionkey
@@ -85,7 +85,7 @@ Limit: skip=0, fetch=10
 --------------Filter: region.r_name = Utf8("EUROPE")
 ----------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 --------SubqueryAlias: __scalar_sq_7
-----------Projection: partsupp.ps_partkey, MIN(partsupp.ps_supplycost) AS __value
+----------Projection: MIN(partsupp.ps_supplycost), partsupp.ps_partkey
 ------------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[MIN(partsupp.ps_supplycost)]]
 --------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
 ----------------Inner Join: nation.n_regionkey = region.r_regionkey
@@ -105,7 +105,7 @@ GlobalLimitExec: skip=0, fetch=10
 ----SortExec: fetch=10, expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST]
 ------ProjectionExec: expr=[s_acctbal@5 as s_acctbal, s_name@2 as s_name, n_name@8 as n_name, p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_address@3 as s_address, s_phone@4 as s_phone, s_comment@6 as s_comment]
 --------CoalesceBatchesExec: target_batch_size=8192
-----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 0 }), (Column { name: "ps_supplycost", index: 7 }, Column { name: "__value", index: 1 })]
+----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "ps_partkey", index: 1 }), (Column { name: "ps_supplycost", index: 7 }, Column { name: "MIN(partsupp.ps_supplycost)", index: 0 })]
 ------------CoalesceBatchesExec: target_batch_size=8192
 --------------RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }, Column { name: "ps_supplycost", index: 7 }], 4), input_partitions=4
 ----------------ProjectionExec: expr=[p_partkey@0 as p_partkey, p_mfgr@1 as p_mfgr, s_name@2 as s_name, s_address@3 as s_address, s_phone@4 as s_phone, s_acctbal@5 as s_acctbal, s_comment@6 as s_comment, ps_supplycost@7 as ps_supplycost, n_name@8 as n_name]
@@ -153,8 +153,8 @@ GlobalLimitExec: skip=0, fetch=10
 --------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 ----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
 ------------CoalesceBatchesExec: target_batch_size=8192
---------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "__value", index: 1 }], 4), input_partitions=4
-----------------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, MIN(partsupp.ps_supplycost)@1 as __value]
+--------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 1 }, Column { name: "MIN(partsupp.ps_supplycost)", index: 0 }], 4), input_partitions=4
+----------------ProjectionExec: expr=[MIN(partsupp.ps_supplycost)@1 as MIN(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
 ------------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[MIN(partsupp.ps_supplycost)]
 --------------------CoalesceBatchesExec: target_batch_size=8192
 ----------------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 4), input_partitions=4
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q20.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q20.slt.part
index f6d343d4db30d..8e2af2c340d20 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q20.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q20.slt.part
@@ -67,7 +67,7 @@ Sort: supplier.s_name ASC NULLS LAST
 --------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_name = Utf8("CANADA")]
 ------SubqueryAlias: __correlated_sq_5
 --------Projection: partsupp.ps_suppkey
-----------Inner Join: partsupp.ps_partkey = __scalar_sq_9.l_partkey, partsupp.ps_suppkey = __scalar_sq_9.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_9.__value
+----------Inner Join: partsupp.ps_partkey = __scalar_sq_9.l_partkey, partsupp.ps_suppkey = __scalar_sq_9.l_suppkey Filter: CAST(partsupp.ps_availqty AS Float64) > __scalar_sq_9.Float64(0.5) * SUM(lineitem.l_quantity)
 ------------LeftSemi Join: partsupp.ps_partkey = __correlated_sq_6.p_partkey
 --------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty]
 --------------SubqueryAlias: __correlated_sq_6
@@ -75,7 +75,7 @@ Sort: supplier.s_name ASC NULLS LAST
 ------------------Filter: part.p_name LIKE Utf8("forest%")
 --------------------TableScan: part projection=[p_partkey, p_name], partial_filters=[part.p_name LIKE Utf8("forest%")]
 ------------SubqueryAlias: __scalar_sq_9
---------------Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value
+--------------Projection: Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64), lineitem.l_partkey, lineitem.l_suppkey
 ----------------Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]]
 ------------------Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity
 --------------------Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131")
@@ -106,7 +106,7 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST]
 ------------RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 4), input_partitions=4
 --------------ProjectionExec: expr=[ps_suppkey@1 as ps_suppkey]
 ----------------CoalesceBatchesExec: target_batch_size=8192
-------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 0 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 1 })], filter=CAST(ps_availqty@0 AS Float64) > __value@1
+------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_partkey", index: 0 }, Column { name: "l_partkey", index: 1 }), (Column { name: "ps_suppkey", index: 1 }, Column { name: "l_suppkey", index: 2 })], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * SUM(lineitem.l_quantity)@1
 --------------------CoalesceBatchesExec: target_batch_size=8192
 ----------------------RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }, Column { name: "ps_suppkey", index: 1 }], 4), input_partitions=4
 ------------------------CoalesceBatchesExec: target_batch_size=8192
@@ -122,7 +122,7 @@ SortPreservingMergeExec: [s_name@0 ASC NULLS LAST]
 ------------------------------------FilterExec: p_name@1 LIKE forest%
 --------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 ----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], has_header=false
---------------------ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as __value]
+--------------------ProjectionExec: expr=[0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * SUM(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]
 ----------------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)]
 ------------------------CoalesceBatchesExec: target_batch_size=8192
 --------------------------RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 4), input_partitions=4
diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch/q22.slt.part b/datafusion/core/tests/sqllogictests/test_files/tpch/q22.slt.part
index 9c7dd85ccd82f..9f8b651f5386b 100644
--- a/datafusion/core/tests/sqllogictests/test_files/tpch/q22.slt.part
+++ b/datafusion/core/tests/sqllogictests/test_files/tpch/q22.slt.part
@@ -61,56 +61,49 @@ Sort: custsale.cntrycode ASC NULLS LAST
 ----Aggregate: groupBy=[[custsale.cntrycode]], aggr=[[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]]
 ------SubqueryAlias: custsale
 --------Projection: substr(customer.c_phone, Int64(1), Int64(2)) AS cntrycode, customer.c_acctbal
-----------Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_11.__value
-------------CrossJoin:
---------------Projection: customer.c_phone, customer.c_acctbal
-----------------LeftAnti Join: customer.c_custkey = __correlated_sq_13.o_custkey
-------------------Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
---------------------TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])]
-------------------SubqueryAlias: __correlated_sq_13
---------------------TableScan: orders projection=[o_custkey]
---------------SubqueryAlias: __scalar_sq_11
-----------------Projection: AVG(customer.c_acctbal) AS __value
-------------------Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
---------------------Projection: customer.c_acctbal
-----------------------Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
-------------------------TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),15,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]), customer.c_acctbal > Decimal128(Some(0),15,2)]
+----------Inner Join:  Filter: CAST(customer.c_acctbal AS Decimal128(19, 6)) > __scalar_sq_11.AVG(customer.c_acctbal)
+------------Projection: customer.c_phone, customer.c_acctbal
+--------------LeftAnti Join: customer.c_custkey = __correlated_sq_13.o_custkey
+----------------Filter: substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
+------------------TableScan: customer projection=[c_custkey, c_phone, c_acctbal], partial_filters=[substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])]
+----------------SubqueryAlias: __correlated_sq_13
+------------------TableScan: orders projection=[o_custkey]
+------------SubqueryAlias: __scalar_sq_11
+--------------Aggregate: groupBy=[[]], aggr=[[AVG(customer.c_acctbal)]]
+----------------Projection: customer.c_acctbal
+------------------Filter: customer.c_acctbal > Decimal128(Some(0),15,2) AND substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")])
+--------------------TableScan: customer projection=[c_phone, c_acctbal], partial_filters=[customer.c_acctbal > Decimal128(Some(0),15,2) AS customer.c_acctbal > Decimal128(Some(0),30,15), substr(customer.c_phone, Int64(1), Int64(2)) IN ([Utf8("13"), Utf8("31"), Utf8("23"), Utf8("29"), Utf8("30"), Utf8("18"), Utf8("17")]), customer.c_acctbal > Decimal128(Some(0),15,2)]
 physical_plan
 SortPreservingMergeExec: [cntrycode@0 ASC NULLS LAST]
 --SortExec: expr=[cntrycode@0 ASC NULLS LAST]
 ----ProjectionExec: expr=[cntrycode@0 as cntrycode, COUNT(UInt8(1))@1 as numcust, SUM(custsale.c_acctbal)@2 as totacctbal]
 ------AggregateExec: mode=FinalPartitioned, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]
 --------CoalesceBatchesExec: target_batch_size=8192
-----------RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4), input_partitions=1
+----------RepartitionExec: partitioning=Hash([Column { name: "cntrycode", index: 0 }], 4), input_partitions=4
 ------------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[COUNT(UInt8(1)), SUM(custsale.c_acctbal)]
 --------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]
-----------------CoalesceBatchesExec: target_batch_size=8192
-------------------FilterExec: CAST(c_acctbal@1 AS Decimal128(19, 6)) > __value@2
---------------------CrossJoinExec
-----------------------CoalescePartitionsExec
-------------------------ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]
+----------------NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: CastExpr { expr: Column { name: "c_acctbal", index: 0 }, cast_type: Decimal128(19, 6), cast_options: CastOptions { safe: false, format_options: FormatOptions { safe: true, null: "", date_format: None, datetime_format: None, timestamp_format: None, timestamp_tz_format: None, time_format: None } } }, op: Gt, right: Column { name: "AVG(customer.c_acctbal)", index: 1 } }
+------------------ProjectionExec: expr=[c_phone@1 as c_phone, c_acctbal@2 as c_acctbal]
+--------------------CoalesceBatchesExec: target_batch_size=8192
+----------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4
+----------------------------CoalesceBatchesExec: target_batch_size=8192
+------------------------------FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])
+--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+----------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], has_header=false
+------------------------CoalesceBatchesExec: target_batch_size=8192
+--------------------------RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4), input_partitions=4
+----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/orders.tbl]]}, projection=[o_custkey], has_header=false
+------------------AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]
+--------------------CoalescePartitionsExec
+----------------------AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]
+------------------------ProjectionExec: expr=[c_acctbal@1 as c_acctbal]
 --------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 0 })]
-------------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------------RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 4), input_partitions=4
-----------------------------------CoalesceBatchesExec: target_batch_size=8192
-------------------------------------FilterExec: Use substr(c_phone@1, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])
---------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], has_header=false
-------------------------------CoalesceBatchesExec: target_batch_size=8192
---------------------------------RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 0 }], 4), input_partitions=4
-----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/orders.tbl]]}, projection=[o_custkey], has_header=false
-----------------------ProjectionExec: expr=[AVG(customer.c_acctbal)@0 as __value]
-------------------------AggregateExec: mode=Final, gby=[], aggr=[AVG(customer.c_acctbal)]
---------------------------CoalescePartitionsExec
-----------------------------AggregateExec: mode=Partial, gby=[], aggr=[AVG(customer.c_acctbal)]
-------------------------------ProjectionExec: expr=[c_acctbal@1 as c_acctbal]
---------------------------------CoalesceBatchesExec: target_batch_size=8192
-----------------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])
-------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
---------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], has_header=false
-
+----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND Use substr(c_phone@0, 1, 2) IN (SET) ([Literal { value: Utf8("13") }, Literal { value: Utf8("31") }, Literal { value: Utf8("23") }, Literal { value: Utf8("29") }, Literal { value: Utf8("30") }, Literal { value: Utf8("18") }, Literal { value: Utf8("17") }])
+------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/sqllogictests/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], has_header=false
 
 
 query TIR

From c549b6a1e5119497c3503baf8dcf916a8e92b303 Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Thu, 1 Jun 2023 18:58:24 +0800
Subject: [PATCH 11/13] Avoid unnecessary alias in the InSubquery rewriting

---
 datafusion/core/tests/sql/joins.rs            | 48 +++++++++----------
 datafusion/expr/src/expr_rewriter/mod.rs      | 17 +++++++
 datafusion/expr/src/logical_plan/plan.rs      | 17 +++----
 datafusion/optimizer/src/decorrelate.rs       | 23 +--------
 .../src/decorrelate_predicate_subquery.rs     | 44 ++++++++---------
 .../optimizer/src/scalar_subquery_to_join.rs  | 17 ++-----
 6 files changed, 77 insertions(+), 89 deletions(-)

diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 1ab28d683f45e..3f4da9f658aae 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -2027,10 +2027,10 @@ async fn subquery_to_join_with_both_side_expr() -> Result<()> {
 
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) [t2.t2_id + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2071,10 +2071,10 @@ async fn subquery_to_join_with_muti_filter() -> Result<()> {
 
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1), t2.t2_int [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N]",
         "        Filter: t2.t2_int > UInt32(0) [t2_id:UInt32;N, t2_int:UInt32;N]",
         "          TableScan: t2 projection=[t2_id, t2_int] [t2_id:UInt32;N, t2_int:UInt32;N]",
     ];
@@ -2115,10 +2115,10 @@ async fn three_projection_exprs_subquery_to_join() -> Result<()> {
 
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int AND t1.t1_name != __correlated_sq_1.t2_name [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int AND t1.t1_name != __correlated_sq_1.t2_name [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
         "        Filter: t2.t2_int > UInt32(0) [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
         "          TableScan: t2 projection=[t2_id, t2_name, t2_int] [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
     ];
@@ -2158,11 +2158,11 @@ async fn in_subquery_to_join_with_correlated_outer_filter() -> Result<()> {
     let plan = dataframe.into_optimized_plan().unwrap();
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    Filter: t1.t1_int > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) [t2.t2_id + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2188,10 +2188,10 @@ async fn not_in_subquery_to_join_with_correlated_outer_filter() -> Result<()> {
     let plan = dataframe.into_optimized_plan().unwrap();
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftAnti Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftAnti Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) [t2.t2_id + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
     ];
 
@@ -2218,11 +2218,11 @@ async fn in_subquery_to_join_with_outer_filter() -> Result<()> {
 
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int AND t1.t1_name != __correlated_sq_1.t2_name [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) Filter: t1.t1_int <= __correlated_sq_1.t2_int AND t1.t1_name != __correlated_sq_1.t2_name [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "    Filter: t1.t1_id > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
-        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [CAST(t2_id AS Int64) + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "    SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
+        "      Projection: CAST(t2.t2_id AS Int64) + Int64(1), t2.t2_int, t2.t2_name [t2.t2_id + Int64(1):Int64;N, t2_int:UInt32;N, t2_name:Utf8;N]",
         "        TableScan: t2 projection=[t2_id, t2_name, t2_int] [t2_id:UInt32;N, t2_name:Utf8;N, t2_int:UInt32;N]",
     ];
 
@@ -2264,15 +2264,15 @@ async fn two_in_subquery_to_join_with_outer_filter() -> Result<()> {
 
     let expected = vec![
         "Explain [plan_type:Utf8, plan:Utf8]",
-        "  LeftSemi Join: CAST(t1.t1_int AS Int64) = __correlated_sq_2.CAST(t2_int AS Int64) + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "    LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.CAST(t2_id AS Int64) + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "  LeftSemi Join: CAST(t1.t1_int AS Int64) = __correlated_sq_2.t2.t2_int + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
+        "    LeftSemi Join: CAST(t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.t2.t2_id + Int64(1) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "      Filter: t1.t1_id > UInt32(0) [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
         "        TableScan: t1 projection=[t1_id, t1_name, t1_int] [t1_id:UInt32;N, t1_name:Utf8;N, t1_int:UInt32;N]",
-        "      SubqueryAlias: __correlated_sq_1 [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
-        "        Projection: CAST(t2.t2_id AS Int64) + Int64(1) AS t2.t2_id + Int64(1) AS CAST(t2_id AS Int64) + Int64(1) [CAST(t2_id AS Int64) + Int64(1):Int64;N]",
+        "      SubqueryAlias: __correlated_sq_1 [t2.t2_id + Int64(1):Int64;N]",
+        "        Projection: CAST(t2.t2_id AS Int64) + Int64(1) [t2.t2_id + Int64(1):Int64;N]",
         "          TableScan: t2 projection=[t2_id] [t2_id:UInt32;N]",
-        "    SubqueryAlias: __correlated_sq_2 [CAST(t2_int AS Int64) + Int64(1):Int64;N]",
-        "      Projection: CAST(t2.t2_int AS Int64) + Int64(1) AS t2.t2_int + Int64(1) AS CAST(t2_int AS Int64) + Int64(1) [CAST(t2_int AS Int64) + Int64(1):Int64;N]",
+        "    SubqueryAlias: __correlated_sq_2 [t2.t2_int + Int64(1):Int64;N]",
+        "      Projection: CAST(t2.t2_int AS Int64) + Int64(1) [t2.t2_int + Int64(1):Int64;N]",
         "        TableScan: t2 projection=[t2_int] [t2_int:UInt32;N]",
     ];
 
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index 63b5d3ed67308..6a47d951617c4 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -137,6 +137,23 @@ pub fn unnormalize_col(expr: Expr) -> Expr {
     .expect("Unnormalize is infallable")
 }
 
+/// Create a Column from the Scalar Expr
+pub fn create_col_from_scalar_expr(
+    scalar_expr: &Expr,
+    subqry_alias: String,
+) -> Result<Column> {
+    match scalar_expr {
+        Expr::Alias(_, alias) => Ok(Column::new(Some(subqry_alias), alias)),
+        Expr::Column(Column { relation: _, name }) => {
+            Ok(Column::new(Some(subqry_alias), name))
+        }
+        _ => {
+            let scalar_column = scalar_expr.display_name()?;
+            Ok(Column::new(Some(subqry_alias), scalar_column))
+        }
+    }
+}
+
 /// Recursively un-normalize all [`Column`] expressions in a list of expression trees
 #[inline]
 pub fn unnormalize_cols(exprs: impl IntoIterator<Item = Expr>) -> Vec<Expr> {
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 0f00b2bbc990c..e53f5fa250e47 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -17,7 +17,7 @@
 
 use crate::expr::InSubquery;
 use crate::expr::{Exists, Placeholder};
-use crate::expr_rewriter::unnormalize_col;
+use crate::expr_rewriter::create_col_from_scalar_expr;
 ///! Logical plan types
 use crate::logical_plan::display::{GraphvizVisitor, IndentVisitor};
 use crate::logical_plan::extension::UserDefinedLogicalNode;
@@ -453,13 +453,14 @@ impl LogicalPlan {
             ))),
             LogicalPlan::SubqueryAlias(subquery_alias) => {
                 let expr_opt = subquery_alias.input.head_output_expr()?;
-                Ok(expr_opt.map(|expr| {
-                    let col_name = format!("{:?}", unnormalize_col(expr));
-                    Expr::Column(Column::new(
-                        Some(subquery_alias.alias.clone()),
-                        col_name,
-                    ))
-                }))
+                expr_opt
+                    .map(|expr| {
+                        Ok(Expr::Column(create_col_from_scalar_expr(
+                            &expr,
+                            subquery_alias.alias.to_string(),
+                        )?))
+                    })
+                    .map_or(Ok(None), |v| v.map(Some))
             }
             LogicalPlan::Subquery(_) => Ok(None),
             LogicalPlan::EmptyRelation(_)
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index db4a1f2c409a2..fa96ede3c8e47 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -24,10 +24,7 @@ use datafusion_common::tree_node::{
 };
 use datafusion_common::Result;
 use datafusion_common::{Column, DFSchemaRef, DataFusionError, ScalarValue};
-use datafusion_expr::expr_rewriter::unnormalize_col;
-use datafusion_expr::{
-    expr, BinaryExpr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder, Operator,
-};
+use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
 use datafusion_physical_expr::execution_props::ExecutionProps;
 use std::collections::{BTreeSet, HashMap};
 use std::ops::Deref;
@@ -309,23 +306,6 @@ impl PullUpCorrelatedExpr {
         correlated_subquery_cols: &BTreeSet<Column>,
     ) -> Result<Vec<Expr>> {
         let mut missing_exprs = vec![];
-        if let Some(Expr::BinaryExpr(BinaryExpr {
-            left: _,
-            op: Operator::Eq,
-            right,
-        })) = &self.in_predicate_opt
-        {
-            if !matches!(right.deref(), Expr::Column(_))
-                && !matches!(right.deref(), Expr::Literal(_))
-                && !matches!(right.deref(), Expr::Alias(_, _))
-            {
-                let alias_expr = right
-                    .deref()
-                    .clone()
-                    .alias(format!("{:?}", unnormalize_col(right.deref().clone())));
-                missing_exprs.push(alias_expr)
-            }
-        }
         for expr in exprs {
             if !missing_exprs.contains(expr) {
                 missing_exprs.push(expr.clone())
@@ -346,7 +326,6 @@ impl PullUpCorrelatedExpr {
                 }
             }
         }
-
         Ok(missing_exprs)
     }
 }
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 926e671926f87..80ceeb11e269c 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -23,7 +23,7 @@ use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{Column, DataFusionError, Result};
 use datafusion_expr::expr::{Exists, InSubquery};
-use datafusion_expr::expr_rewriter::unnormalize_col;
+use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::{
     exists, in_subquery, not_exists, not_in_subquery, BinaryExpr, Expr, Filter,
@@ -256,8 +256,7 @@ fn build_join(
                 right,
             })),
         ) => {
-            let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
-            let right_col = Column::new(Some(subquery_alias), right_expr_name);
+            let right_col = create_col_from_scalar_expr(right.deref(), subquery_alias)?;
             let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
             Some(in_predicate.and(join_filter))
         }
@@ -270,8 +269,7 @@ fn build_join(
                 right,
             })),
         ) => {
-            let right_expr_name = format!("{:?}", unnormalize_col(right.deref().clone()));
-            let right_col = Column::new(Some(subquery_alias), right_expr_name);
+            let right_col = create_col_from_scalar_expr(right.deref(), subquery_alias)?;
             let in_predicate = Expr::eq(left.deref().clone(), Expr::Column(right_col));
             Some(in_predicate)
         }
@@ -887,10 +885,10 @@ mod tests {
             .build()?;
 
         let expected = "Projection: customer.c_custkey [c_custkey:Int64]\
-        \n  LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
+        \n  LeftSemi Join:  Filter: customer.c_custkey = __correlated_sq_1.orders.o_custkey + Int32(1) AND customer.c_custkey = __correlated_sq_1.o_custkey [c_custkey:Int64, c_name:Utf8]\
         \n    TableScan: customer [c_custkey:Int64, c_name:Utf8]\
-        \n    SubqueryAlias: __correlated_sq_1 [o_custkey + Int32(1):Int64, orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
-        \n      Projection: orders.o_custkey + Int32(1) AS o_custkey + Int32(1), orders.o_custkey + Int32(1), orders.o_custkey [o_custkey + Int32(1):Int64, orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
+        \n    SubqueryAlias: __correlated_sq_1 [orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
+        \n      Projection: orders.o_custkey + Int32(1), orders.o_custkey [orders.o_custkey + Int32(1):Int64, o_custkey:Int64]\
         \n        TableScan: orders [o_orderkey:Int64, o_custkey:Int64, o_orderstatus:Utf8, o_totalprice:Float64;N]";
 
         assert_optimized_plan_eq_display_indent(
@@ -1098,10 +1096,10 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.b [b:UInt32]\
-        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
+        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2) [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32]\
+        \n      Projection: sq.c * UInt32(2) [sq.c * UInt32(2):UInt32]\
         \n        TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_eq_display_indent(
@@ -1132,10 +1130,10 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.b [b:UInt32]\
-        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
+        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a = __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2), sq.a [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32]\
+        \n      Projection: sq.c * UInt32(2), sq.a [sq.c * UInt32(2):UInt32, a:UInt32]\
         \n        Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32]\
         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -1168,10 +1166,10 @@ mod tests {
             .build()?;
 
         let expected = "Projection: test.b [b:UInt32]\
-        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32]\
+        \n  LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.sq.c * UInt32(2) AND test.a + test.b = __correlated_sq_1.a + __correlated_sq_1.b [a:UInt32, b:UInt32, c:UInt32]\
         \n    TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n    SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
-        \n      Projection: sq.c * UInt32(2) AS c * UInt32(2), sq.c * UInt32(2), sq.a, sq.b [c * UInt32(2):UInt32, sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
+        \n    SubqueryAlias: __correlated_sq_1 [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
+        \n      Projection: sq.c * UInt32(2), sq.a, sq.b [sq.c * UInt32(2):UInt32, a:UInt32, b:UInt32]\
         \n        Filter: sq.a + UInt32(1) = sq.b [a:UInt32, b:UInt32, c:UInt32]\
         \n          TableScan: sq [a:UInt32, b:UInt32, c:UInt32]";
 
@@ -1211,14 +1209,14 @@ mod tests {
 
         let expected = "Projection: test.b [b:UInt32]\
         \n  Filter: test.c > UInt32(1) [a:UInt32, b:UInt32, c:UInt32]\
-        \n    LeftSemi Join:  Filter: test.c * UInt32(2) = __correlated_sq_2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32]\
-        \n      LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
+        \n    LeftSemi Join:  Filter: test.c * UInt32(2) = __correlated_sq_2.sq2.c * UInt32(2) AND test.a > __correlated_sq_2.a [a:UInt32, b:UInt32, c:UInt32]\
+        \n      LeftSemi Join:  Filter: test.c + UInt32(1) = __correlated_sq_1.sq1.c * UInt32(2) AND test.a > __correlated_sq_1.a [a:UInt32, b:UInt32, c:UInt32]\
         \n        TableScan: test [a:UInt32, b:UInt32, c:UInt32]\
-        \n        SubqueryAlias: __correlated_sq_1 [c * UInt32(2):UInt32, sq1.c * UInt32(2):UInt32, a:UInt32]\
-        \n          Projection: sq1.c * UInt32(2) AS c * UInt32(2), sq1.c * UInt32(2), sq1.a [c * UInt32(2):UInt32, sq1.c * UInt32(2):UInt32, a:UInt32]\
+        \n        SubqueryAlias: __correlated_sq_1 [sq1.c * UInt32(2):UInt32, a:UInt32]\
+        \n          Projection: sq1.c * UInt32(2), sq1.a [sq1.c * UInt32(2):UInt32, a:UInt32]\
         \n            TableScan: sq1 [a:UInt32, b:UInt32, c:UInt32]\
-        \n      SubqueryAlias: __correlated_sq_2 [c * UInt32(2):UInt32, sq2.c * UInt32(2):UInt32, a:UInt32]\
-        \n        Projection: sq2.c * UInt32(2) AS c * UInt32(2), sq2.c * UInt32(2), sq2.a [c * UInt32(2):UInt32, sq2.c * UInt32(2):UInt32, a:UInt32]\
+        \n      SubqueryAlias: __correlated_sq_2 [sq2.c * UInt32(2):UInt32, a:UInt32]\
+        \n        Projection: sq2.c * UInt32(2), sq2.a [sq2.c * UInt32(2):UInt32, a:UInt32]\
         \n          TableScan: sq2 [a:UInt32, b:UInt32, c:UInt32]";
 
         assert_optimized_plan_eq_display_indent(
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 21e217249e444..fa06e1ddbfafb 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -24,6 +24,7 @@ use datafusion_common::tree_node::{
     RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
 };
 use datafusion_common::{Column, DataFusionError, Result, ScalarValue};
+use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
 use std::collections::{BTreeSet, HashMap};
@@ -221,18 +222,10 @@ impl TreeNodeRewriter for ExtractScalarSubQuery {
                     )),
                     Ok,
                 )?;
-                match scalar_expr {
-                    Expr::Alias(_, alias) => {
-                        Ok(Expr::Column(Column::new(Some(subqry_alias), alias)))
-                    }
-                    Expr::Column(Column { relation: _, name }) => {
-                        Ok(Expr::Column(Column::new(Some(subqry_alias), name)))
-                    }
-                    _ => {
-                        let scalar_column = scalar_expr.display_name()?;
-                        Ok(Expr::Column(Column::new(Some(subqry_alias), scalar_column)))
-                    }
-                }
+                Ok(Expr::Column(create_col_from_scalar_expr(
+                    &scalar_expr,
+                    subqry_alias,
+                )?))
             }
             _ => Ok(expr),
         }

From 7662a3af4ed7e2eca1f17ac90faa55c0f5eaefcf Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Tue, 13 Jun 2023 16:49:03 +0800
Subject: [PATCH 12/13] fix joins.slt

---
 .../tests/sqllogictests/test_files/joins.slt  | 57 +++++++++++--------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/joins.slt b/datafusion/core/tests/sqllogictests/test_files/joins.slt
index 4486d7c47b491..c4b4e9ab68a12 100644
--- a/datafusion/core/tests/sqllogictests/test_files/joins.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/joins.slt
@@ -1700,10 +1700,10 @@ from join_t1
 where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2)
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_5.CAST(t2_id AS Int64) + Int64(1)
+LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_5.join_t2.t2_id + Int64(1)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_5
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1)
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
 ------TableScan: join_t2 projection=[t2_id]
 
 query ITI rowsort
@@ -1729,10 +1729,10 @@ where join_t1.t1_id + 12 in
     )
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_7.CAST(t2_id AS Int64) + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_7.t2_int
+LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_7.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_7.t2_int
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_7
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), join_t2.t2_int
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int
 ------Filter: join_t2.t2_int > UInt32(0)
 --------TableScan: join_t2 projection=[t2_id, t2_int]
 
@@ -1765,10 +1765,10 @@ where join_t1.t1_id + 12 in
     )
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_9.CAST(t2_id AS Int64) + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_9.t2_int AND join_t1.t1_name != __correlated_sq_9.t2_name
+LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_9.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_9.t2_int AND join_t1.t1_name != __correlated_sq_9.t2_name
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_9
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
 ------Filter: join_t2.t2_int > UInt32(0)
 --------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
@@ -1797,11 +1797,11 @@ where join_t1.t1_id + 12 in
     (select join_t2.t2_id + 1 from join_t2 where join_t1.t1_int > 0)
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_11.CAST(t2_id AS Int64) + Int64(1)
+LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_11.join_t2.t2_id + Int64(1)
 --Filter: join_t1.t1_int > UInt32(0)
 ----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_11
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1)
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
 ------TableScan: join_t2 projection=[t2_id]
 
 # Not in subquery to join with correlated outer filter
@@ -1814,10 +1814,10 @@ where join_t1.t1_id + 12 not in
     (select join_t2.t2_id + 1 from join_t2 where join_t1.t1_int > 0)
 ----
 logical_plan
-LeftAnti Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_12.CAST(t2_id AS Int64) + Int64(1) Filter: join_t1.t1_int > UInt32(0)
+LeftAnti Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_12.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int > UInt32(0)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_12
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1)
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
 ------TableScan: join_t2 projection=[t2_id]
 
 # In subquery to join with outer filter
@@ -1836,11 +1836,11 @@ where join_t1.t1_id + 12 in
   and join_t1.t1_id > 0
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_13.CAST(t2_id AS Int64) + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_13.t2_int AND join_t1.t1_name != __correlated_sq_13.t2_name
+LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_13.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_13.t2_int AND join_t1.t1_name != __correlated_sq_13.t2_name
 --Filter: join_t1.t1_id > UInt32(0)
 ----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_13
-----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
+----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
 ------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 query ITI rowsort
@@ -1869,15 +1869,15 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2)
   and join_t1.t1_id > 0
 ----
 logical_plan
-LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_16.CAST(t2_int AS Int64) + Int64(1)
---LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_15.CAST(t2_id AS Int64) + Int64(1)
+LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_16.join_t2.t2_int + Int64(1)
+--LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_15.join_t2.t2_id + Int64(1)
 ----Filter: join_t1.t1_id > UInt32(0)
 ------TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 ----SubqueryAlias: __correlated_sq_15
-------Projection: CAST(join_t2.t2_id AS Int64) + Int64(1) AS CAST(t2_id AS Int64) + Int64(1)
+------Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
 --------TableScan: join_t2 projection=[t2_id]
 --SubqueryAlias: __correlated_sq_16
-----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1) AS CAST(t2_int AS Int64) + Int64(1)
+----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1)
 ------TableScan: join_t2 projection=[t2_int]
 
 query ITI
@@ -2197,8 +2197,10 @@ logical_plan
 LeftAnti Join:  Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_31.t2_id AS Int64) * Int64(2)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_31
-----Aggregate: groupBy=[[join_t2.t2_id]], aggr=[[]]
-------TableScan: join_t2 projection=[t2_id]
+----Projection: join_t2.t2_id
+------Aggregate: groupBy=[[join_t2.t2_int, join_t2.t2_id]], aggr=[[]]
+--------Projection: join_t2.t2_int, join_t2.t2_id
+----------TableScan: join_t2 projection=[t2_id, t2_int]
 
 statement ok
 set datafusion.optimizer.repartition_joins = false;
@@ -2244,8 +2246,10 @@ logical_plan
 LeftAnti Join:  Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_34.t2_id AS Int64) * Int64(2)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_34
-----Aggregate: groupBy=[[join_t2.t2_id]], aggr=[[]]
-------TableScan: join_t2 projection=[t2_id]
+----Projection: join_t2.t2_id
+------Aggregate: groupBy=[[join_t2.t2_id + join_t2.t2_int, join_t2.t2_int, join_t2.t2_id]], aggr=[[]]
+--------Projection: join_t2.t2_id + join_t2.t2_int, join_t2.t2_int, join_t2.t2_id
+----------TableScan: join_t2 projection=[t2_id, t2_int]
 
 statement ok
 set datafusion.optimizer.repartition_joins = false;
@@ -2293,8 +2297,10 @@ logical_plan
 LeftAnti Join:  Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_37.t2_id AS Int64) * Int64(2)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_37
-----Aggregate: groupBy=[[join_t2.t2_id]], aggr=[[]]
-------TableScan: join_t2 projection=[t2_id]
+----Projection: join_t2.t2_id
+------Aggregate: groupBy=[[Int64(1), join_t2.t2_int, join_t2.t2_id]], aggr=[[]]
+--------Projection: Int64(1), join_t2.t2_int, join_t2.t2_id
+----------TableScan: join_t2 projection=[t2_id, t2_int]
 
 query ITI
 SELECT * FROM join_t1
@@ -2322,8 +2328,10 @@ logical_plan
 LeftAnti Join:  Filter: CAST(join_t1.t1_id AS Int64) + Int64(1) > CAST(__correlated_sq_39.t2_id AS Int64) * Int64(2)
 --TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 --SubqueryAlias: __correlated_sq_39
-----Aggregate: groupBy=[[join_t2.t2_id]], aggr=[[]]
-------TableScan: join_t2 projection=[t2_id]
+----Projection: join_t2.t2_id
+------Aggregate: groupBy=[[Int64(1), join_t2.t2_int, join_t2.t2_id]], aggr=[[]]
+--------Projection: Int64(1), join_t2.t2_int, join_t2.t2_id
+----------TableScan: join_t2 projection=[t2_id, t2_int]
 
 query ITI
 SELECT * FROM join_t1
@@ -2936,4 +2944,3 @@ set datafusion.execution.target_partitions = 2;
 
 statement ok
 set datafusion.execution.batch_size = 4096;
-

From 2112d2bd8dfcb2e476f5aa99fabf3db4812efb5f Mon Sep 17 00:00:00 2001
From: "mingmwang@ebay.com" <mingmwang@ebay.com>
Date: Tue, 13 Jun 2023 18:10:06 +0800
Subject: [PATCH 13/13] fix fmt

---
 datafusion/expr/src/logical_plan/plan.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 4d1f2382ded6f..4f21ad38c4d2f 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-///! Logical plan types
+//! Logical plan types
+
 use crate::expr::InSubquery;
 use crate::expr::{Exists, Placeholder};
 use crate::expr_rewriter::create_col_from_scalar_expr;