From aecc83ef182dff76ff9bc84d84873e51cafe62df Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 28 Aug 2023 07:01:55 -0400
Subject: [PATCH 01/15] Split out `datafusion-physical-plan` crate

---
 Cargo.toml                                    |  2 +-
 datafusion/physical-plan/Cargo.toml           | 69 +++++++++++++++++++
 .../src}/aggregates/group_values/mod.rs       |  0
 .../src}/aggregates/group_values/primitive.rs |  0
 .../src}/aggregates/group_values/row.rs       |  0
 .../src}/aggregates/mod.rs                    |  0
 .../src}/aggregates/no_grouping.rs            |  0
 .../src}/aggregates/order/full.rs             |  0
 .../src}/aggregates/order/mod.rs              |  0
 .../src}/aggregates/order/partial.rs          |  0
 .../src}/aggregates/row_hash.rs               |  0
 .../src}/analyze.rs                           |  0
 .../src}/coalesce_batches.rs                  |  0
 .../src}/coalesce_partitions.rs               |  0
 .../src}/common.rs                            |  0
 .../src}/display.rs                           |  0
 .../src}/empty.rs                             |  0
 .../src}/explain.rs                           |  0
 .../src}/filter.rs                            |  0
 .../src}/insert.rs                            |  0
 .../src}/joins/cross_join.rs                  |  0
 .../src}/joins/hash_join.rs                   |  0
 .../src}/joins/hash_join_utils.rs             |  0
 .../src}/joins/mod.rs                         |  0
 .../src}/joins/nested_loop_join.rs            |  0
 .../src}/joins/sort_merge_join.rs             |  0
 .../src}/joins/symmetric_hash_join.rs         |  0
 .../src}/joins/test_utils.rs                  |  0
 .../src}/joins/utils.rs                       |  0
 .../mod.rs => physical-plan/src/lib.rs}       |  0
 .../src}/limit.rs                             |  0
 .../src}/memory.rs                            |  0
 .../src}/metrics/baseline.rs                  |  0
 .../src}/metrics/builder.rs                   |  0
 .../src}/metrics/mod.rs                       |  0
 .../src}/metrics/value.rs                     |  0
 .../src}/projection.rs                        |  0
 .../src}/repartition/distributor_channels.rs  |  0
 .../src}/repartition/mod.rs                   |  0
 .../src}/sorts/builder.rs                     |  0
 .../src}/sorts/cursor.rs                      |  0
 .../src}/sorts/index.rs                       |  0
 .../src}/sorts/merge.rs                       |  0
 .../src}/sorts/mod.rs                         |  0
 .../src}/sorts/sort.rs                        |  0
 .../src}/sorts/sort_preserving_merge.rs       |  0
 .../src}/sorts/stream.rs                      |  0
 .../src}/stream.rs                            |  0
 .../src}/streaming.rs                         |  0
 .../src}/tree_node.rs                         |  0
 .../src}/udaf.rs                              |  0
 .../src}/union.rs                             |  0
 .../src}/unnest.rs                            |  0
 .../src}/values.rs                            |  0
 .../src}/visitor.rs                           |  0
 .../src}/windows/bounded_window_agg_exec.rs   |  0
 .../src}/windows/mod.rs                       |  0
 .../src}/windows/window_agg_exec.rs           |  0
 58 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/physical-plan/Cargo.toml
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/group_values/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/group_values/primitive.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/group_values/row.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/no_grouping.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/order/full.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/order/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/order/partial.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/aggregates/row_hash.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/analyze.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/coalesce_batches.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/coalesce_partitions.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/common.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/display.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/empty.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/explain.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/filter.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/insert.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/cross_join.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/hash_join.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/hash_join_utils.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/nested_loop_join.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/sort_merge_join.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/symmetric_hash_join.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/test_utils.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/joins/utils.rs (100%)
 rename datafusion/{core/src/physical_plan/mod.rs => physical-plan/src/lib.rs} (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/limit.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/memory.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/metrics/baseline.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/metrics/builder.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/metrics/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/metrics/value.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/projection.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/repartition/distributor_channels.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/repartition/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/builder.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/cursor.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/index.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/merge.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/sort.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/sort_preserving_merge.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/sorts/stream.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/stream.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/streaming.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/tree_node.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/udaf.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/union.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/unnest.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/values.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/visitor.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/windows/bounded_window_agg_exec.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/windows/mod.rs (100%)
 rename datafusion/{core/src/physical_plan => physical-plan/src}/windows/window_agg_exec.rs (100%)

diff --git a/Cargo.toml b/Cargo.toml
index 1dae101d2f8fc..ae3ce0bf6cbf6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
 
 [workspace]
 exclude = ["datafusion-cli"]
-members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion-examples", "test-utils", "benchmarks",
+members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion-examples", "test-utils", "benchmarks",
 ]
 resolver = "2"
 
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
new file mode 100644
index 0000000000000..ebf4781d223f6
--- /dev/null
+++ b/datafusion/physical-plan/Cargo.toml
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-physical-plan"
+description = "Physical (ExecutionPlan) implementations for DataFusion query engine"
+keywords = ["arrow", "query", "sql"]
+version = { workspace = true }
+edition = { workspace = true }
+readme = { workspace = true }
+homepage = { workspace = true }
+repository = { workspace = true }
+license = { workspace = true }
+authors = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+name = "datafusion_physical_plan"
+path = "src/lib.rs"
+
+[features]
+
+[dependencies]
+#ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
+arrow = { workspace = true }
+arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-schema = { workspace = true }
+#base64 = { version = "0.21", optional = true }
+#blake2 = { version = "^0.10.2", optional = true }
+#blake3 = { version = "1.0", optional = true }
+#chrono = { version = "0.4.23", default-features = false }
+datafusion-common = { path = "../common", version = "30.0.0" }
+datafusion-expr = { path = "../expr", version = "30.0.0" }
+datafusion-physical-expr = { path = "../physical-expr", version = "30.0.0" }
+#half = { version = "2.1", default-features = false }
+#hashbrown = { version = "0.14", features = ["raw"] }
+#hex = { version = "0.4", optional = true }
+#indexmap = "2.0.0"
+#itertools = { version = "0.11", features = ["use_std"] }
+#libc = "0.2.140"
+#log = "^0.4"
+#md-5 = { version = "^0.10.0", optional = true }
+#paste = "^1.0"
+#petgraph = "0.6.2"
+#rand = "0.8"
+#regex = { version = "1.8", optional = true }
+#sha2 = { version = "^0.10.1", optional = true }
+#unicode-segmentation = { version = "^1.7.1", optional = true }
+#uuid = { version = "^1.2", features = ["v4"] }
+
+#[dev-dependencies]
+#criterion = "0.5"
+#rand = "0.8"
+#rstest = "0.18.0"
diff --git a/datafusion/core/src/physical_plan/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/group_values/mod.rs
rename to datafusion/physical-plan/src/aggregates/group_values/mod.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/group_values/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/group_values/primitive.rs
rename to datafusion/physical-plan/src/aggregates/group_values/primitive.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/group_values/row.rs
rename to datafusion/physical-plan/src/aggregates/group_values/row.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/mod.rs
rename to datafusion/physical-plan/src/aggregates/mod.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/no_grouping.rs
rename to datafusion/physical-plan/src/aggregates/no_grouping.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/order/full.rs
rename to datafusion/physical-plan/src/aggregates/order/full.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/order/mod.rs b/datafusion/physical-plan/src/aggregates/order/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/order/mod.rs
rename to datafusion/physical-plan/src/aggregates/order/mod.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/order/partial.rs
rename to datafusion/physical-plan/src/aggregates/order/partial.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/aggregates/row_hash.rs
rename to datafusion/physical-plan/src/aggregates/row_hash.rs
diff --git a/datafusion/core/src/physical_plan/analyze.rs b/datafusion/physical-plan/src/analyze.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/analyze.rs
rename to datafusion/physical-plan/src/analyze.rs
diff --git a/datafusion/core/src/physical_plan/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/coalesce_batches.rs
rename to datafusion/physical-plan/src/coalesce_batches.rs
diff --git a/datafusion/core/src/physical_plan/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/coalesce_partitions.rs
rename to datafusion/physical-plan/src/coalesce_partitions.rs
diff --git a/datafusion/core/src/physical_plan/common.rs b/datafusion/physical-plan/src/common.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/common.rs
rename to datafusion/physical-plan/src/common.rs
diff --git a/datafusion/core/src/physical_plan/display.rs b/datafusion/physical-plan/src/display.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/display.rs
rename to datafusion/physical-plan/src/display.rs
diff --git a/datafusion/core/src/physical_plan/empty.rs b/datafusion/physical-plan/src/empty.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/empty.rs
rename to datafusion/physical-plan/src/empty.rs
diff --git a/datafusion/core/src/physical_plan/explain.rs b/datafusion/physical-plan/src/explain.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/explain.rs
rename to datafusion/physical-plan/src/explain.rs
diff --git a/datafusion/core/src/physical_plan/filter.rs b/datafusion/physical-plan/src/filter.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/filter.rs
rename to datafusion/physical-plan/src/filter.rs
diff --git a/datafusion/core/src/physical_plan/insert.rs b/datafusion/physical-plan/src/insert.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/insert.rs
rename to datafusion/physical-plan/src/insert.rs
diff --git a/datafusion/core/src/physical_plan/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/cross_join.rs
rename to datafusion/physical-plan/src/joins/cross_join.rs
diff --git a/datafusion/core/src/physical_plan/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/hash_join.rs
rename to datafusion/physical-plan/src/joins/hash_join.rs
diff --git a/datafusion/core/src/physical_plan/joins/hash_join_utils.rs b/datafusion/physical-plan/src/joins/hash_join_utils.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/hash_join_utils.rs
rename to datafusion/physical-plan/src/joins/hash_join_utils.rs
diff --git a/datafusion/core/src/physical_plan/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/mod.rs
rename to datafusion/physical-plan/src/joins/mod.rs
diff --git a/datafusion/core/src/physical_plan/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/nested_loop_join.rs
rename to datafusion/physical-plan/src/joins/nested_loop_join.rs
diff --git a/datafusion/core/src/physical_plan/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/sort_merge_join.rs
rename to datafusion/physical-plan/src/joins/sort_merge_join.rs
diff --git a/datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/symmetric_hash_join.rs
rename to datafusion/physical-plan/src/joins/symmetric_hash_join.rs
diff --git a/datafusion/core/src/physical_plan/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/test_utils.rs
rename to datafusion/physical-plan/src/joins/test_utils.rs
diff --git a/datafusion/core/src/physical_plan/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/joins/utils.rs
rename to datafusion/physical-plan/src/joins/utils.rs
diff --git a/datafusion/core/src/physical_plan/mod.rs b/datafusion/physical-plan/src/lib.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/mod.rs
rename to datafusion/physical-plan/src/lib.rs
diff --git a/datafusion/core/src/physical_plan/limit.rs b/datafusion/physical-plan/src/limit.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/limit.rs
rename to datafusion/physical-plan/src/limit.rs
diff --git a/datafusion/core/src/physical_plan/memory.rs b/datafusion/physical-plan/src/memory.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/memory.rs
rename to datafusion/physical-plan/src/memory.rs
diff --git a/datafusion/core/src/physical_plan/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/metrics/baseline.rs
rename to datafusion/physical-plan/src/metrics/baseline.rs
diff --git a/datafusion/core/src/physical_plan/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/metrics/builder.rs
rename to datafusion/physical-plan/src/metrics/builder.rs
diff --git a/datafusion/core/src/physical_plan/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/metrics/mod.rs
rename to datafusion/physical-plan/src/metrics/mod.rs
diff --git a/datafusion/core/src/physical_plan/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/metrics/value.rs
rename to datafusion/physical-plan/src/metrics/value.rs
diff --git a/datafusion/core/src/physical_plan/projection.rs b/datafusion/physical-plan/src/projection.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/projection.rs
rename to datafusion/physical-plan/src/projection.rs
diff --git a/datafusion/core/src/physical_plan/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/repartition/distributor_channels.rs
rename to datafusion/physical-plan/src/repartition/distributor_channels.rs
diff --git a/datafusion/core/src/physical_plan/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/repartition/mod.rs
rename to datafusion/physical-plan/src/repartition/mod.rs
diff --git a/datafusion/core/src/physical_plan/sorts/builder.rs b/datafusion/physical-plan/src/sorts/builder.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/builder.rs
rename to datafusion/physical-plan/src/sorts/builder.rs
diff --git a/datafusion/core/src/physical_plan/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/cursor.rs
rename to datafusion/physical-plan/src/sorts/cursor.rs
diff --git a/datafusion/core/src/physical_plan/sorts/index.rs b/datafusion/physical-plan/src/sorts/index.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/index.rs
rename to datafusion/physical-plan/src/sorts/index.rs
diff --git a/datafusion/core/src/physical_plan/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/merge.rs
rename to datafusion/physical-plan/src/sorts/merge.rs
diff --git a/datafusion/core/src/physical_plan/sorts/mod.rs b/datafusion/physical-plan/src/sorts/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/mod.rs
rename to datafusion/physical-plan/src/sorts/mod.rs
diff --git a/datafusion/core/src/physical_plan/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/sort.rs
rename to datafusion/physical-plan/src/sorts/sort.rs
diff --git a/datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/sort_preserving_merge.rs
rename to datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
diff --git a/datafusion/core/src/physical_plan/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/sorts/stream.rs
rename to datafusion/physical-plan/src/sorts/stream.rs
diff --git a/datafusion/core/src/physical_plan/stream.rs b/datafusion/physical-plan/src/stream.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/stream.rs
rename to datafusion/physical-plan/src/stream.rs
diff --git a/datafusion/core/src/physical_plan/streaming.rs b/datafusion/physical-plan/src/streaming.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/streaming.rs
rename to datafusion/physical-plan/src/streaming.rs
diff --git a/datafusion/core/src/physical_plan/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/tree_node.rs
rename to datafusion/physical-plan/src/tree_node.rs
diff --git a/datafusion/core/src/physical_plan/udaf.rs b/datafusion/physical-plan/src/udaf.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/udaf.rs
rename to datafusion/physical-plan/src/udaf.rs
diff --git a/datafusion/core/src/physical_plan/union.rs b/datafusion/physical-plan/src/union.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/union.rs
rename to datafusion/physical-plan/src/union.rs
diff --git a/datafusion/core/src/physical_plan/unnest.rs b/datafusion/physical-plan/src/unnest.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/unnest.rs
rename to datafusion/physical-plan/src/unnest.rs
diff --git a/datafusion/core/src/physical_plan/values.rs b/datafusion/physical-plan/src/values.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/values.rs
rename to datafusion/physical-plan/src/values.rs
diff --git a/datafusion/core/src/physical_plan/visitor.rs b/datafusion/physical-plan/src/visitor.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/visitor.rs
rename to datafusion/physical-plan/src/visitor.rs
diff --git a/datafusion/core/src/physical_plan/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/windows/bounded_window_agg_exec.rs
rename to datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
diff --git a/datafusion/core/src/physical_plan/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/windows/mod.rs
rename to datafusion/physical-plan/src/windows/mod.rs
diff --git a/datafusion/core/src/physical_plan/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
similarity index 100%
rename from datafusion/core/src/physical_plan/windows/window_agg_exec.rs
rename to datafusion/physical-plan/src/windows/window_agg_exec.rs

From 70f3fb5487671efddaa1b4abca4479a786fe8438 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 28 Aug 2023 07:13:00 -0400
Subject: [PATCH 02/15] Update paths

---
 datafusion-examples/examples/csv_opener.rs    |  2 +-
 datafusion/physical-plan/Cargo.toml           | 21 +++++---
 .../src/aggregates/group_values/primitive.rs  |  2 +-
 .../src/aggregates/group_values/row.rs        |  2 +-
 .../physical-plan/src/aggregates/mod.rs       |  6 +--
 .../src/aggregates/no_grouping.rs             |  8 +--
 .../src/aggregates/order/full.rs              |  2 +-
 .../src/aggregates/order/partial.rs           |  2 +-
 .../physical-plan/src/aggregates/row_hash.rs  | 10 ++--
 datafusion/physical-plan/src/analyze.rs       |  2 +-
 .../physical-plan/src/coalesce_batches.rs     |  4 +-
 .../physical-plan/src/coalesce_partitions.rs  |  6 +--
 datafusion/physical-plan/src/common.rs        | 10 ++--
 datafusion/physical-plan/src/display.rs       |  8 +--
 datafusion/physical-plan/src/empty.rs         |  6 +--
 datafusion/physical-plan/src/explain.rs       |  4 +-
 datafusion/physical-plan/src/filter.rs        |  8 +--
 datafusion/physical-plan/src/insert.rs        |  2 +-
 .../physical-plan/src/joins/cross_join.rs     |  8 +--
 .../physical-plan/src/joins/hash_join.rs      |  8 +--
 .../src/joins/hash_join_utils.rs              |  6 +--
 .../src/joins/nested_loop_join.rs             | 10 ++--
 .../src/joins/sort_merge_join.rs              | 20 ++++----
 .../src/joins/symmetric_hash_join.rs          | 18 +++----
 datafusion/physical-plan/src/joins/utils.rs   |  6 +--
 datafusion/physical-plan/src/lib.rs           |  6 +--
 datafusion/physical-plan/src/limit.rs         |  6 +--
 datafusion/physical-plan/src/memory.rs        |  6 +--
 datafusion/physical-plan/src/projection.rs    |  6 +--
 .../physical-plan/src/repartition/mod.rs      | 50 +++++++------------
 datafusion/physical-plan/src/sorts/cursor.rs  |  2 +-
 datafusion/physical-plan/src/sorts/merge.rs   | 14 ++----
 datafusion/physical-plan/src/sorts/sort.rs    | 20 ++++----
 .../src/sorts/sort_preserving_merge.rs        | 26 +++++-----
 datafusion/physical-plan/src/sorts/stream.rs  |  6 +--
 datafusion/physical-plan/src/stream.rs        |  2 +-
 datafusion/physical-plan/src/streaming.rs     |  6 +--
 datafusion/physical-plan/src/tree_node.rs     |  2 +-
 datafusion/physical-plan/src/udaf.rs          |  2 +-
 datafusion/physical-plan/src/union.rs         |  6 +--
 datafusion/physical-plan/src/unnest.rs        |  8 +--
 datafusion/physical-plan/src/values.rs        |  2 +-
 .../src/windows/bounded_window_agg_exec.rs    | 10 ++--
 datafusion/physical-plan/src/windows/mod.rs   |  8 +--
 .../src/windows/window_agg_exec.rs            | 12 ++---
 45 files changed, 176 insertions(+), 205 deletions(-)

diff --git a/datafusion-examples/examples/csv_opener.rs b/datafusion-examples/examples/csv_opener.rs
index 0587b515b2d8d..6366f16b7aa3f 100644
--- a/datafusion-examples/examples/csv_opener.rs
+++ b/datafusion-examples/examples/csv_opener.rs
@@ -17,6 +17,7 @@
 
 use std::{sync::Arc, vec};
 
+use crate::metrics::ExecutionPlanMetricsSet;
 use datafusion::{
     assert_batches_eq,
     datasource::{
@@ -25,7 +26,6 @@ use datafusion::{
         physical_plan::{CsvConfig, CsvOpener, FileScanConfig, FileStream},
     },
     error::Result,
-    physical_plan::metrics::ExecutionPlanMetricsSet,
     test_util::aggr_test_schema,
 };
 use datafusion_common::FileCompressionType;
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index ebf4781d223f6..161569513a783 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -35,31 +35,38 @@ path = "src/lib.rs"
 [features]
 
 [dependencies]
-#ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
+ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
 arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
+async-trait = "0.1.41"
 #base64 = { version = "0.21", optional = true }
 #blake2 = { version = "^0.10.2", optional = true }
 #blake3 = { version = "1.0", optional = true }
-#chrono = { version = "0.4.23", default-features = false }
+chrono = { version = "0.4.23", default-features = false }
 datafusion-common = { path = "../common", version = "30.0.0" }
 datafusion-expr = { path = "../expr", version = "30.0.0" }
+datafusion-execution = { path = "../execution", version = "30.0.0" }
 datafusion-physical-expr = { path = "../physical-expr", version = "30.0.0" }
-#half = { version = "2.1", default-features = false }
-#hashbrown = { version = "0.14", features = ["raw"] }
+futures = "0.3"
+half = { version = "2.1", default-features = false }
+hashbrown = { version = "0.14", features = ["raw"] }
 #hex = { version = "0.4", optional = true }
-#indexmap = "2.0.0"
-#itertools = { version = "0.11", features = ["use_std"] }
+indexmap = "2.0.0"
+itertools = { version = "0.11", features = ["use_std"] }
 #libc = "0.2.140"
-#log = "^0.4"
+log = "^0.4"
 #md-5 = { version = "^0.10.0", optional = true }
+parking_lot = "0.12"
+pin-project-lite = "^0.2.7"
 #paste = "^1.0"
 #petgraph = "0.6.2"
 #rand = "0.8"
 #regex = { version = "1.8", optional = true }
 #sha2 = { version = "^0.10.1", optional = true }
+tempfile = "3"
+tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
 #unicode-segmentation = { version = "^1.7.1", optional = true }
 #uuid = { version = "^1.2", features = ["v4"] }
 
diff --git a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
index 7b8691c67fdd0..b2cb5a8bcaf38 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/primitive.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_plan::aggregates::group_values::GroupValues;
+use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
 use arrow::array::BooleanBufferBuilder;
 use arrow::buffer::NullBuffer;
diff --git a/datafusion/physical-plan/src/aggregates/group_values/row.rs b/datafusion/physical-plan/src/aggregates/group_values/row.rs
index 4eb660d52590f..f215f29dc42ba 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/row.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/row.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_plan::aggregates::group_values::GroupValues;
+use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
 use arrow::row::{RowConverter, Rows, SortField};
 use arrow_array::ArrayRef;
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 78ef5e37b239d..cb88d132d0d8a 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -17,11 +17,11 @@
 
 //! Aggregates functionalities
 
-use crate::physical_plan::aggregates::{
+use crate::aggregates::{
     no_grouping::AggregateStream, row_hash::GroupedHashAggregateStream,
 };
-use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use crate::physical_plan::{
+use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::{
     DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning,
     SendableRecordBatchStream, Statistics,
 };
diff --git a/datafusion/physical-plan/src/aggregates/no_grouping.rs b/datafusion/physical-plan/src/aggregates/no_grouping.rs
index 610c24faab1c7..32c0bbc78a5de 100644
--- a/datafusion/physical-plan/src/aggregates/no_grouping.rs
+++ b/datafusion/physical-plan/src/aggregates/no_grouping.rs
@@ -17,12 +17,12 @@
 
 //! Aggregate without grouping columns
 
-use crate::physical_plan::aggregates::{
+use crate::aggregates::{
     aggregate_expressions, create_accumulators, finalize_aggregation, AccumulatorItem,
     AggregateMode,
 };
-use crate::physical_plan::metrics::{BaselineMetrics, RecordOutput};
-use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
+use crate::metrics::{BaselineMetrics, RecordOutput};
+use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::Result;
@@ -33,7 +33,7 @@ use std::borrow::Cow;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::physical_plan::filter::batch_filter;
+use crate::filter::batch_filter;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use futures::stream::{Stream, StreamExt};
 
diff --git a/datafusion/physical-plan/src/aggregates/order/full.rs b/datafusion/physical-plan/src/aggregates/order/full.rs
index 69b308da7c8ca..f46ee687faf16 100644
--- a/datafusion/physical-plan/src/aggregates/order/full.rs
+++ b/datafusion/physical-plan/src/aggregates/order/full.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_expr::EmitTo;
+use datafusion_physical_expr::EmitTo;
 
 /// Tracks grouping state when the data is ordered entirely by its
 /// group keys
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index 019e61ef26885..8c72d834e729b 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -15,12 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_expr::EmitTo;
 use arrow::row::{OwnedRow, RowConverter, Rows, SortField};
 use arrow_array::ArrayRef;
 use arrow_schema::Schema;
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
+use datafusion_physical_expr::EmitTo;
 use datafusion_physical_expr::PhysicalSortExpr;
 
 /// Tracks grouping state when the data is ordered by some subset of
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 4613a2e46443e..e80497ac59a5e 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -28,14 +28,14 @@ use std::vec;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 
-use crate::physical_plan::aggregates::group_values::{new_group_values, GroupValues};
-use crate::physical_plan::aggregates::{
+use crate::aggregates::group_values::{new_group_values, GroupValues};
+use crate::aggregates::{
     evaluate_group_by, evaluate_many, evaluate_optional, group_schema, AggregateMode,
     PhysicalGroupBy,
 };
-use crate::physical_plan::metrics::{BaselineMetrics, RecordOutput};
-use crate::physical_plan::{aggregates, PhysicalExpr};
-use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
+use crate::metrics::{BaselineMetrics, RecordOutput};
+use crate::{aggregates, PhysicalExpr};
+use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::Result;
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 98fce19a1dd7d..e055400c18696 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -20,7 +20,7 @@
 use std::sync::Arc;
 use std::{any::Any, time::Instant};
 
-use crate::physical_plan::{
+use crate::{
     display::DisplayableExecutionPlan, DisplayFormatType, ExecutionPlan, Partitioning,
     Statistics,
 };
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index cc32d4163b19b..7e6e129934168 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -23,7 +23,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::physical_plan::{
+use crate::{
     DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning,
     RecordBatchStream, SendableRecordBatchStream,
 };
@@ -309,8 +309,8 @@ pub fn concat_batches(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::{memory::MemoryExec, repartition::RepartitionExec};
     use crate::test::create_vec_batches;
+    use crate::{memory::MemoryExec, repartition::RepartitionExec};
     use arrow::datatypes::{DataType, Field, Schema};
 
     #[tokio::test(flavor = "multi_thread")]
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 78cb7b201f263..296743ba63145 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -26,9 +26,7 @@ use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::stream::{ObservedStream, RecordBatchReceiverStream};
 use super::{DisplayAs, SendableRecordBatchStream, Statistics};
 
-use crate::physical_plan::{
-    DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning,
-};
+use crate::{DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning};
 
 use arrow::datatypes::SchemaRef;
 use datafusion_common::{internal_err, DataFusionError, Result};
@@ -174,11 +172,11 @@ mod tests {
     use tempfile::TempDir;
 
     use super::*;
-    use crate::physical_plan::{collect, common};
     use crate::test::exec::{
         assert_strong_count_converges_to_zero, BlockingExec, PanicExec,
     };
     use crate::test::{self, assert_is_pending};
+    use crate::{collect, common};
 
     #[tokio::test]
     async fn merge() -> Result<()> {
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index 787f3eed2673e..c6cfbbfbbac74 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -18,8 +18,8 @@
 //! Defines common code used in execution plans
 
 use super::SendableRecordBatchStream;
-use crate::physical_plan::stream::RecordBatchReceiverStream;
-use crate::physical_plan::{ColumnStatistics, ExecutionPlan, Statistics};
+use crate::stream::RecordBatchReceiverStream;
+use crate::{ColumnStatistics, ExecutionPlan, Statistics};
 use arrow::datatypes::Schema;
 use arrow::ipc::writer::{FileWriter, IpcWriteOptions};
 use arrow::record_batch::RecordBatch;
@@ -375,9 +375,9 @@ mod tests {
     use std::ops::Not;
 
     use super::*;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::sorts::sort::SortExec;
-    use crate::physical_plan::union::UnionExec;
+    use crate::memory::MemoryExec;
+    use crate::sorts::sort::SortExec;
+    use crate::union::UnionExec;
     use arrow::compute::SortOptions;
     use arrow::{
         array::{Float32Array, Float64Array},
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 3b345bdf9e3ad..0b4379fe96e58 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -26,7 +26,7 @@ use datafusion_common::display::StringifiedPlan;
 use datafusion_physical_expr::PhysicalSortExpr;
 
 use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
-use datafusion_common::display::GraphvizBuilder;
+use datafusion_common::display::{GraphvizBuilder, PlanType};
 
 /// Options for controlling how each [`ExecutionPlan`] should format itself
 #[derive(Debug, Clone, Copy)]
@@ -204,11 +204,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
     }
 
     /// format as a `StringifiedPlan`
-    pub fn to_stringified(
-        &self,
-        verbose: bool,
-        plan_type: crate::logical_expr::PlanType,
-    ) -> StringifiedPlan {
+    pub fn to_stringified(&self, verbose: bool, plan_type: PlanType) -> StringifiedPlan {
         StringifiedPlan::new(plan_type, self.indent(verbose).to_string())
     }
 }
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index b38486991e82f..2f03e0750e7b0 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -20,9 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::physical_plan::{
-    memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning,
-};
+use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning};
 use arrow::array::{ArrayRef, NullArray};
 use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
@@ -174,7 +172,7 @@ impl ExecutionPlan for EmptyExec {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::with_new_children_if_necessary;
+    use crate::with_new_children_if_necessary;
     use crate::{physical_plan::common, test_util};
 
     #[tokio::test]
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index 7ee6f268d8151..8d6bf4105f6a1 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -24,13 +24,13 @@ use datafusion_common::display::StringifiedPlan;
 
 use datafusion_common::{internal_err, DataFusionError, Result};
 
-use crate::physical_plan::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
 use log::trace;
 
 use super::DisplayAs;
 use super::{expressions::PhysicalSortExpr, SendableRecordBatchStream};
-use crate::physical_plan::stream::RecordBatchStreamAdapter;
+use crate::stream::RecordBatchStreamAdapter;
 use datafusion_execution::TaskContext;
 
 /// Explain execution plan operator. This operator contains the string
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 0b878814e305a..1030eeba42508 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -28,7 +28,7 @@ use super::{
     ColumnStatistics, DisplayAs, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
-use crate::physical_plan::{
+use crate::{
     metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
     Column, DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning,
 };
@@ -378,12 +378,12 @@ pub type EqualAndNonEqual<'a> =
 mod tests {
 
     use super::*;
-    use crate::physical_plan::expressions::*;
-    use crate::physical_plan::ExecutionPlan;
-    use crate::physical_plan::{collect, with_new_children_if_necessary};
+    use crate::expressions::*;
     use crate::test;
     use crate::test::exec::StatisticsExec;
     use crate::test_util;
+    use crate::ExecutionPlan;
+    use crate::{collect, with_new_children_if_necessary};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::utils::DataPtr;
     use datafusion_common::ColumnStatistics;
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 8c03fb543f5b1..e60afcbcb0412 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -35,7 +35,7 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use crate::physical_plan::stream::RecordBatchStreamAdapter;
+use crate::stream::RecordBatchStreamAdapter;
 use datafusion_common::{exec_err, internal_err, DataFusionError};
 use datafusion_execution::TaskContext;
 
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 256942754350e..e5302420c27a7 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -25,9 +25,9 @@ use std::{any::Any, sync::Arc, task::Poll};
 use arrow::datatypes::{Fields, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 
-use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use crate::physical_plan::DisplayAs;
-use crate::physical_plan::{
+use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::DisplayAs;
+use crate::{
     coalesce_batches::concat_batches, coalesce_partitions::CoalescePartitionsExec,
     ColumnStatistics, DisplayFormatType, Distribution, EquivalenceProperties,
     ExecutionPlan, Partitioning, PhysicalSortExpr, RecordBatchStream,
@@ -458,8 +458,8 @@ impl CrossJoinStream {
 mod tests {
     use super::*;
     use crate::assert_batches_sorted_eq;
+    use crate::common;
     use crate::common::assert_contains;
-    use crate::physical_plan::common;
     use crate::test::{build_table_scan_i32, columns};
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index e0e522b2150af..75807863198ce 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -24,13 +24,13 @@ use std::sync::Arc;
 use std::task::Poll;
 use std::{any::Any, usize, vec};
 
-use crate::physical_plan::joins::utils::{
+use crate::joins::utils::{
     adjust_indices_by_join_type, apply_join_filter_to_indices, build_batch_from_indices,
     calculate_join_output_ordering, combine_join_ordering_equivalence_properties,
     get_final_indices_from_bit_map, need_produce_result_in_final, JoinSide,
 };
-use crate::physical_plan::DisplayAs;
-use crate::physical_plan::{
+use crate::DisplayAs;
+use crate::{
     coalesce_batches::concat_batches,
     coalesce_partitions::CoalescePartitionsExec,
     expressions::Column,
@@ -2765,7 +2765,7 @@ mod tests {
             let stream = join.execute(0, task_ctx).unwrap();
 
             // Expect that an error is returned
-            let result_string = crate::physical_plan::common::collect(stream)
+            let result_string = crate::common::collect(stream)
                 .await
                 .unwrap_err()
                 .to_string();
diff --git a/datafusion/physical-plan/src/joins/hash_join_utils.rs b/datafusion/physical-plan/src/joins/hash_join_utils.rs
index ac0b183818147..bb7976345800d 100644
--- a/datafusion/physical-plan/src/joins/hash_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/hash_join_utils.rs
@@ -24,8 +24,8 @@ use std::ops::IndexMut;
 use std::sync::Arc;
 use std::{fmt, usize};
 
-use crate::physical_plan::joins::utils::{JoinFilter, JoinSide};
-use crate::physical_plan::ExecutionPlan;
+use crate::joins::utils::{JoinFilter, JoinSide};
+use crate::ExecutionPlan;
 
 use arrow::compute::concat_batches;
 use arrow::datatypes::{ArrowNativeType, SchemaRef};
@@ -830,7 +830,7 @@ pub fn record_visited_indices<T: ArrowPrimitiveType>(
 #[cfg(test)]
 pub mod tests {
     use super::*;
-    use crate::physical_plan::{
+    use crate::{
         expressions::Column,
         expressions::PhysicalSortExpr,
         joins::utils::{ColumnIndex, JoinFilter, JoinSide},
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 618dd66e69a09..b66454c732262 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -19,7 +19,7 @@
 //! The nested loop join can execute in parallel by partitions and it is
 //! determined by the [`JoinType`].
 
-use crate::physical_plan::joins::utils::{
+use crate::joins::utils::{
     append_right_indices, apply_join_filter_to_indices, build_batch_from_indices,
     build_join_schema, check_join_is_valid, combine_join_equivalence_properties,
     estimate_join_statistics, get_anti_indices, get_anti_u64_indices,
@@ -27,8 +27,8 @@ use crate::physical_plan::joins::utils::{
     partitioned_join_output_partitioning, BuildProbeJoinMetrics, ColumnIndex, JoinFilter,
     JoinSide, OnceAsync, OnceFut,
 };
-use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
-use crate::physical_plan::{
+use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
     RecordBatchStream, SendableRecordBatchStream,
 };
@@ -48,7 +48,7 @@ use std::fmt::Formatter;
 use std::sync::Arc;
 use std::task::Poll;
 
-use crate::physical_plan::coalesce_batches::concat_batches;
+use crate::coalesce_batches::concat_batches;
 use datafusion_common::Result;
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
@@ -753,7 +753,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field};
     use datafusion_expr::Operator;
 
-    use crate::physical_plan::joins::utils::JoinSide;
+    use crate::joins::utils::JoinSide;
     use datafusion_common::ScalarValue;
     use datafusion_physical_expr::expressions::Literal;
     use datafusion_physical_expr::PhysicalExpr;
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 3de98f5452c57..e85f4bcb2ecc3 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -30,15 +30,15 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::physical_plan::expressions::Column;
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::joins::utils::{
+use crate::expressions::Column;
+use crate::expressions::PhysicalSortExpr;
+use crate::joins::utils::{
     build_join_schema, calculate_join_output_ordering, check_join_is_valid,
     combine_join_equivalence_properties, combine_join_ordering_equivalence_properties,
     estimate_join_statistics, partitioned_join_output_partitioning, JoinOn, JoinSide,
 };
-use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
-use crate::physical_plan::{
+use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
+use crate::{
     metrics, DisplayAs, DisplayFormatType, Distribution, EquivalenceProperties,
     ExecutionPlan, Partitioning, PhysicalExpr, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
@@ -1393,13 +1393,13 @@ mod tests {
     use datafusion_execution::TaskContext;
 
     use crate::common::assert_contains;
-    use crate::physical_plan::expressions::Column;
-    use crate::physical_plan::joins::utils::JoinOn;
-    use crate::physical_plan::joins::SortMergeJoinExec;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::{common, ExecutionPlan};
+    use crate::expressions::Column;
+    use crate::joins::utils::JoinOn;
+    use crate::joins::SortMergeJoinExec;
+    use crate::memory::MemoryExec;
     use crate::test::{build_table_i32, columns};
     use crate::{assert_batches_eq, assert_batches_sorted_eq};
+    use crate::{common, ExecutionPlan};
     use datafusion_common::JoinType;
     use datafusion_common::Result;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 1c664adfbb715..69f7aaf840dfe 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -32,19 +32,17 @@ use std::task::Poll;
 use std::vec;
 use std::{any::Any, usize};
 
-use crate::physical_plan::common::SharedMemoryReservation;
-use crate::physical_plan::joins::hash_join::{
-    build_equal_condition_join_indices, update_hash,
-};
-use crate::physical_plan::joins::hash_join_utils::{
+use crate::common::SharedMemoryReservation;
+use crate::joins::hash_join::{build_equal_condition_join_indices, update_hash};
+use crate::joins::hash_join_utils::{
     build_filter_expression_graph, calculate_filter_expr_intervals, combine_two_batches,
     convert_sort_expr_with_filter_schema, get_pruning_anti_indices,
     get_pruning_semi_indices, record_visited_indices, IntervalCalculatorInnerState,
     PruningJoinHashMap,
 };
-use crate::physical_plan::joins::StreamJoinPartitionMode;
-use crate::physical_plan::DisplayAs;
-use crate::physical_plan::{
+use crate::joins::StreamJoinPartitionMode;
+use crate::DisplayAs;
+use crate::{
     expressions::Column,
     expressions::PhysicalSortExpr,
     joins::{
@@ -1220,9 +1218,9 @@ mod tests {
     use datafusion_physical_expr::expressions::{binary, col, Column};
     use datafusion_physical_expr::intervals::test_utils::gen_conjunctive_numerical_expr;
 
-    use crate::physical_plan::joins::hash_join_utils::tests::complicated_filter;
+    use crate::joins::hash_join_utils::tests::complicated_filter;
 
-    use crate::physical_plan::joins::test_utils::{
+    use crate::joins::test_utils::{
         build_sides_record_batches, compare_batches, create_memory_table,
         join_expr_tests_fixture_f64, join_expr_tests_fixture_i32,
         join_expr_tests_fixture_temporal, partitioned_hash_join_with_filter,
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index bd3de1acbf0fd..e33de001df304 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -25,9 +25,9 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::usize;
 
-use crate::physical_plan::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder};
-use crate::physical_plan::SchemaRef;
-use crate::physical_plan::{
+use crate::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder};
+use crate::SchemaRef;
+use crate::{
     ColumnStatistics, EquivalenceProperties, ExecutionPlan, Partitioning, Statistics,
 };
 
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 89e3852a3e972..a34e8e651b3bd 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -23,9 +23,9 @@ use self::metrics::MetricsSet;
 use self::{
     coalesce_partitions::CoalescePartitionsExec, display::DisplayableExecutionPlan,
 };
-use crate::physical_plan::expressions::PhysicalSortExpr;
 use datafusion_common::Result;
 pub use datafusion_common::{internal_err, ColumnStatistics, Statistics};
+use datafusion_physical_expr::PhysicalSortExpr;
 pub use visitor::{accept, visit_execution_plan, ExecutionPlanVisitor};
 
 use arrow::datatypes::SchemaRef;
@@ -397,8 +397,8 @@ pub mod unnest;
 pub mod values;
 pub mod windows;
 
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use crate::repartition::RepartitionExec;
+use crate::sorts::sort_preserving_merge::SortPreservingMergeExec;
 pub use datafusion_common::utils::project_schema;
 use datafusion_execution::TaskContext;
 pub use datafusion_physical_expr::{
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 87a07f8d46fec..360b0d6179ed0 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -22,7 +22,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::physical_plan::{
+use crate::{
     DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning,
 };
 
@@ -525,8 +525,8 @@ mod tests {
     use tempfile::TempDir;
 
     use super::*;
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::common;
+    use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::common;
     use crate::test;
 
     #[tokio::test]
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 877410c97ca54..d36d93d29edd0 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -30,7 +30,7 @@ use std::any::Any;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::physical_plan::ordering_equivalence_properties_helper;
+use crate::ordering_equivalence_properties_helper;
 use datafusion_common::DataFusionError;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{LexOrdering, OrderingEquivalenceProperties};
@@ -260,8 +260,8 @@ impl RecordBatchStream for MemoryStream {
 
 #[cfg(test)]
 mod tests {
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::ExecutionPlan;
+    use crate::memory::MemoryExec;
+    use crate::ExecutionPlan;
     use arrow_schema::{DataType, Field, Schema, SortOptions};
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalSortExpr;
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 12c89eee19312..1de23d4b89267 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -29,7 +29,7 @@ use std::task::{Context, Poll};
 use super::expressions::{Column, PhysicalSortExpr};
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{DisplayAs, RecordBatchStream, SendableRecordBatchStream, Statistics};
-use crate::physical_plan::{
+use crate::{
     ColumnStatistics, DisplayFormatType, EquivalenceProperties, ExecutionPlan,
     Partitioning, PhysicalExpr,
 };
@@ -508,8 +508,8 @@ impl RecordBatchStream for ProjectionStream {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::common::collect;
-    use crate::physical_plan::expressions::{self, col};
+    use crate::common::collect;
+    use crate::expressions::{self, col};
     use crate::test::{self};
     use crate::test_util;
     use arrow_schema::DataType;
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index a69f33aa19c1e..44a7739a343c5 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -24,14 +24,12 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::{any::Any, vec};
 
-use crate::physical_plan::common::transpose;
-use crate::physical_plan::hash_utils::create_hashes;
-use crate::physical_plan::metrics::BaselineMetrics;
-use crate::physical_plan::repartition::distributor_channels::{
-    channels, partition_aware_channels,
-};
-use crate::physical_plan::sorts::streaming_merge;
-use crate::physical_plan::{
+use crate::common::transpose;
+use crate::hash_utils::create_hashes;
+use crate::metrics::BaselineMetrics;
+use crate::repartition::distributor_channels::{channels, partition_aware_channels};
+use crate::sorts::streaming_merge;
+use crate::{
     DisplayFormatType, EquivalenceProperties, ExecutionPlan, Partitioning, Statistics,
 };
 
@@ -1075,7 +1073,7 @@ mod tests {
         let output_stream = exec.execute(0, task_ctx).unwrap();
 
         // Expect that an error is returned
-        let result_string = crate::physical_plan::common::collect(output_stream)
+        let result_string = crate::common::collect(output_stream)
             .await
             .unwrap_err()
             .to_string();
@@ -1101,7 +1099,7 @@ mod tests {
         let output_stream = exec.execute(0, task_ctx).unwrap();
 
         // Expect that an error is returned
-        let result_string = crate::physical_plan::common::collect(output_stream)
+        let result_string = crate::common::collect(output_stream)
             .await
             .unwrap_err()
             .to_string();
@@ -1134,7 +1132,7 @@ mod tests {
         let output_stream = exec.execute(0, task_ctx).unwrap();
 
         // Expect that an error is returned
-        let result_string = crate::physical_plan::common::collect(output_stream)
+        let result_string = crate::common::collect(output_stream)
             .await
             .unwrap_err()
             .to_string();
@@ -1182,9 +1180,7 @@ mod tests {
         assert_batches_sorted_eq!(&expected, &expected_batches);
 
         let output_stream = exec.execute(0, task_ctx).unwrap();
-        let batches = crate::physical_plan::common::collect(output_stream)
-            .await
-            .unwrap();
+        let batches = crate::common::collect(output_stream).await.unwrap();
 
         assert_batches_sorted_eq!(&expected, &batches);
     }
@@ -1211,9 +1207,7 @@ mod tests {
         input.wait().await;
 
         // output stream 1 should *not* error and have one of the input batches
-        let batches = crate::physical_plan::common::collect(output_stream1)
-            .await
-            .unwrap();
+        let batches = crate::common::collect(output_stream1).await.unwrap();
 
         let expected = vec![
             "+------------------+",
@@ -1236,7 +1230,7 @@ mod tests {
     async fn hash_repartition_with_dropping_output_stream() {
         let task_ctx = Arc::new(TaskContext::default());
         let partitioning = Partitioning::Hash(
-            vec![Arc::new(crate::physical_plan::expressions::Column::new(
+            vec![Arc::new(crate::expressions::Column::new(
                 "my_awesome_field",
                 0,
             ))],
@@ -1248,9 +1242,7 @@ mod tests {
         let exec = RepartitionExec::try_new(input.clone(), partitioning.clone()).unwrap();
         let output_stream1 = exec.execute(1, task_ctx.clone()).unwrap();
         input.wait().await;
-        let batches_without_drop = crate::physical_plan::common::collect(output_stream1)
-            .await
-            .unwrap();
+        let batches_without_drop = crate::common::collect(output_stream1).await.unwrap();
 
         // run some checks on the result
         let items_vec = str_batches_to_vec(&batches_without_drop);
@@ -1272,9 +1264,7 @@ mod tests {
         // *before* any outputs are produced
         std::mem::drop(output_stream0);
         input.wait().await;
-        let batches_with_drop = crate::physical_plan::common::collect(output_stream1)
-            .await
-            .unwrap();
+        let batches_with_drop = crate::common::collect(output_stream1).await.unwrap();
 
         assert_eq!(batches_without_drop, batches_with_drop);
     }
@@ -1359,22 +1349,16 @@ mod tests {
         )])
         .unwrap();
         let partitioning = Partitioning::Hash(
-            vec![Arc::new(crate::physical_plan::expressions::Column::new(
-                "a", 0,
-            ))],
+            vec![Arc::new(crate::expressions::Column::new("a", 0))],
             2,
         );
         let schema = batch.schema();
         let input = MockExec::new(vec![Ok(batch)], schema);
         let exec = RepartitionExec::try_new(Arc::new(input), partitioning).unwrap();
         let output_stream0 = exec.execute(0, task_ctx.clone()).unwrap();
-        let batch0 = crate::physical_plan::common::collect(output_stream0)
-            .await
-            .unwrap();
+        let batch0 = crate::common::collect(output_stream0).await.unwrap();
         let output_stream1 = exec.execute(1, task_ctx.clone()).unwrap();
-        let batch1 = crate::physical_plan::common::collect(output_stream1)
-            .await
-            .unwrap();
+        let batch1 = crate::common::collect(output_stream1).await.unwrap();
         assert!(batch0.is_empty() || batch1.is_empty());
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs
index c0c791288644b..baa417649fb08 100644
--- a/datafusion/physical-plan/src/sorts/cursor.rs
+++ b/datafusion/physical-plan/src/sorts/cursor.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_plan::sorts::sort::SortOptions;
+use crate::sorts::sort::SortOptions;
 use arrow::buffer::ScalarBuffer;
 use arrow::datatypes::ArrowNativeTypeOp;
 use arrow::row::{Row, Rows};
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index f8a1457dd62a1..67685509abe5b 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -18,15 +18,11 @@
 //! Merge that deals with an arbitrary size of streaming inputs.
 //! This is an order-preserving merge.
 
-use crate::physical_plan::metrics::BaselineMetrics;
-use crate::physical_plan::sorts::builder::BatchBuilder;
-use crate::physical_plan::sorts::cursor::Cursor;
-use crate::physical_plan::sorts::stream::{
-    FieldCursorStream, PartitionedStream, RowCursorStream,
-};
-use crate::physical_plan::{
-    PhysicalSortExpr, RecordBatchStream, SendableRecordBatchStream,
-};
+use crate::metrics::BaselineMetrics;
+use crate::sorts::builder::BatchBuilder;
+use crate::sorts::cursor::Cursor;
+use crate::sorts::stream::{FieldCursorStream, PartitionedStream, RowCursorStream};
+use crate::{PhysicalSortExpr, RecordBatchStream, SendableRecordBatchStream};
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_array::*;
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 82badb7d879c9..695272767696d 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -19,14 +19,14 @@
 //! It will do in-memory sorting if it has enough memory budget
 //! but spills to disk if needed.
 
-use crate::physical_plan::common::{spawn_buffered, IPCWriter};
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::metrics::{
+use crate::common::{spawn_buffered, IPCWriter};
+use crate::expressions::PhysicalSortExpr;
+use crate::metrics::{
     BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet,
 };
-use crate::physical_plan::sorts::merge::streaming_merge;
-use crate::physical_plan::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
-use crate::physical_plan::{
+use crate::sorts::merge::streaming_merge;
+use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
+use crate::{
     DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionPlan,
     Partitioning, SendableRecordBatchStream, Statistics,
 };
@@ -890,10 +890,10 @@ impl ExecutionPlan for SortExec {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::collect;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::memory::MemoryExec;
+    use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::collect;
+    use crate::expressions::col;
+    use crate::memory::MemoryExec;
     use crate::test;
     use crate::test::assert_is_pending;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index c9f693a8a24e4..507d66c920fb5 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -20,13 +20,11 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::physical_plan::common::spawn_buffered;
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::metrics::{
-    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet,
-};
-use crate::physical_plan::sorts::streaming_merge;
-use crate::physical_plan::{
+use crate::common::spawn_buffered;
+use crate::expressions::PhysicalSortExpr;
+use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use crate::sorts::streaming_merge;
+use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
     SendableRecordBatchStream, Statistics,
 };
@@ -280,16 +278,16 @@ mod tests {
     use futures::{FutureExt, StreamExt};
     use tempfile::TempDir;
 
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::metrics::MetricValue;
-    use crate::physical_plan::sorts::sort::SortExec;
-    use crate::physical_plan::stream::RecordBatchReceiverStream;
-    use crate::physical_plan::{collect, common};
+    use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::expressions::col;
+    use crate::memory::MemoryExec;
+    use crate::metrics::MetricValue;
+    use crate::sorts::sort::SortExec;
+    use crate::stream::RecordBatchReceiverStream;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, assert_is_pending};
     use crate::{assert_batches_eq, test_util};
+    use crate::{collect, common};
     use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray};
 
     use super::*;
diff --git a/datafusion/physical-plan/src/sorts/stream.rs b/datafusion/physical-plan/src/sorts/stream.rs
index 9ef13b7eb25e4..a7f9e7380c473 100644
--- a/datafusion/physical-plan/src/sorts/stream.rs
+++ b/datafusion/physical-plan/src/sorts/stream.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::physical_plan::sorts::cursor::{FieldArray, FieldCursor, RowCursor};
-use crate::physical_plan::SendableRecordBatchStream;
-use crate::physical_plan::{PhysicalExpr, PhysicalSortExpr};
+use crate::sorts::cursor::{FieldArray, FieldCursor, RowCursor};
+use crate::SendableRecordBatchStream;
+use crate::{PhysicalExpr, PhysicalSortExpr};
 use arrow::array::Array;
 use arrow::datatypes::Schema;
 use arrow::record_batch::RecordBatch;
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index 55683a5df3d7b..a3fb856c326d0 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -22,7 +22,7 @@ use std::sync::Arc;
 use std::task::Context;
 use std::task::Poll;
 
-use crate::physical_plan::displayable;
+use crate::displayable;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::DataFusionError;
 use datafusion_common::Result;
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index 6c33f88a3991e..00809b71e4431 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -28,9 +28,9 @@ use datafusion_common::{internal_err, plan_err, DataFusionError, Result, Statist
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use log::debug;
 
-use crate::physical_plan::display::{OutputOrderingDisplay, ProjectSchemaDisplay};
-use crate::physical_plan::stream::RecordBatchStreamAdapter;
-use crate::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
+use crate::display::{OutputOrderingDisplay, ProjectSchemaDisplay};
+use crate::stream::RecordBatchStreamAdapter;
+use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 use datafusion_execution::TaskContext;
 
 use super::{DisplayAs, DisplayFormatType};
diff --git a/datafusion/physical-plan/src/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs
index fad6508fdabef..bce906a00c4d8 100644
--- a/datafusion/physical-plan/src/tree_node.rs
+++ b/datafusion/physical-plan/src/tree_node.rs
@@ -17,7 +17,7 @@
 
 //! This module provides common traits for visiting or rewriting tree nodes easily.
 
-use crate::physical_plan::{with_new_children_if_necessary, ExecutionPlan};
+use crate::{with_new_children_if_necessary, ExecutionPlan};
 use datafusion_common::tree_node::{DynTreeNode, Transformed};
 use datafusion_common::Result;
 use std::sync::Arc;
diff --git a/datafusion/physical-plan/src/udaf.rs b/datafusion/physical-plan/src/udaf.rs
index 70a43bb397f30..7cc3cc7d59fed 100644
--- a/datafusion/physical-plan/src/udaf.rs
+++ b/datafusion/physical-plan/src/udaf.rs
@@ -27,9 +27,9 @@ use arrow::{
 };
 
 use super::{expressions::format_state_name, Accumulator, AggregateExpr};
-use crate::physical_plan::PhysicalExpr;
 use datafusion_common::{not_impl_err, DataFusionError, Result};
 pub use datafusion_expr::AggregateUDF;
+use datafusion_physical_expr::PhysicalExpr;
 
 use datafusion_physical_expr::aggregate::utils::down_cast_any_ref;
 use std::sync::Arc;
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 491d24c2897b8..8e0d871e0e34a 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -41,9 +41,9 @@ use super::{
     ColumnStatistics, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
-use crate::physical_plan::common::get_meet_of_orderings;
-use crate::physical_plan::stream::ObservedStream;
-use crate::physical_plan::{expressions, metrics::BaselineMetrics};
+use crate::common::get_meet_of_orderings;
+use crate::stream::ObservedStream;
+use crate::{expressions, metrics::BaselineMetrics};
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 use tokio::macros::support::thread_rng_n;
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 40c4edc953309..410ea97887e0c 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -37,7 +37,7 @@ use log::trace;
 use std::time::Instant;
 use std::{any::Any, sync::Arc};
 
-use crate::physical_plan::{
+use crate::{
     expressions::Column, DisplayFormatType, Distribution, EquivalenceProperties,
     ExecutionPlan, Partitioning, PhysicalExpr, PhysicalSortExpr, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
@@ -309,7 +309,7 @@ fn build_batch_generic_list<T: OffsetSizeTrait, P: ArrowPrimitiveType<Native = T
 }
 
 /// Given this `GenericList` list_array:
-///   
+///
 /// ```ignore
 /// [1], null, [2, 3, 4], null, [5, 6]
 /// ```
@@ -383,7 +383,7 @@ fn build_batch_fixedsize_list(
 }
 
 /// Given this `FixedSizeListArray` list_array:
-///   
+///
 /// ```ignore
 /// [1, 2], null, [3, 4], null, [5, 6]
 /// ```
@@ -453,7 +453,7 @@ fn unnest_fixed_list(
 /// Every column value needs to be repeated as many times as many elements there is in each corresponding array value.
 ///
 /// If the column being unnested looks like this:
-///    
+///
 /// ```ignore
 /// [1], null, [2, 3, 4], null, [5, 6]
 /// ```
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index 539a88a9d50e1..d6ca35b5d9bef 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -19,7 +19,7 @@
 
 use super::expressions::PhysicalSortExpr;
 use super::{common, DisplayAs, SendableRecordBatchStream, Statistics};
-use crate::physical_plan::{
+use crate::{
     memory::MemoryStream, ColumnarValue, DisplayFormatType, ExecutionPlan, Partitioning,
     PhysicalExpr,
 };
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 92e66783dc578..c6211c8061ff4 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -20,14 +20,12 @@
 //! the input data seen so far), which makes it appropriate when processing
 //! infinite inputs.
 
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::metrics::{
-    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet,
-};
-use crate::physical_plan::windows::{
+use crate::expressions::PhysicalSortExpr;
+use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use crate::windows::{
     calc_requirements, get_ordered_partition_by_indices, window_ordering_equivalence,
 };
-use crate::physical_plan::{
+use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan,
     Partitioning, RecordBatchStream, SendableRecordBatchStream, Statistics, WindowExpr,
 };
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 17c21834e121f..3f25275996389 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -17,7 +17,7 @@
 
 //! Physical expressions for window functions
 
-use crate::physical_plan::{
+use crate::{
     aggregates,
     expressions::{
         cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile,
@@ -358,12 +358,12 @@ pub(crate) fn window_ordering_equivalence(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::aggregates::AggregateFunction;
     use crate::datasource::physical_plan::CsvExec;
-    use crate::physical_plan::aggregates::AggregateFunction;
-    use crate::physical_plan::expressions::col;
-    use crate::physical_plan::{collect, ExecutionPlan};
+    use crate::expressions::col;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, assert_is_pending, csv_exec_sorted};
+    use crate::{collect, ExecutionPlan};
     use arrow::array::*;
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, SchemaRef};
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 68d1f33fcde48..da43f127f07be 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -17,15 +17,13 @@
 
 //! Stream and channel implementations for window function expressions.
 
-use crate::physical_plan::common::transpose;
-use crate::physical_plan::expressions::PhysicalSortExpr;
-use crate::physical_plan::metrics::{
-    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet,
-};
-use crate::physical_plan::windows::{
+use crate::common::transpose;
+use crate::expressions::PhysicalSortExpr;
+use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
+use crate::windows::{
     calc_requirements, get_ordered_partition_by_indices, window_ordering_equivalence,
 };
-use crate::physical_plan::{
+use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, EquivalenceProperties,
     ExecutionPlan, Partitioning, PhysicalExpr, RecordBatchStream,
     SendableRecordBatchStream, Statistics, WindowExpr,

From c47ae0d13dbc9474897609f3dea4f80a93cf4de0 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 28 Aug 2023 08:14:00 -0400
Subject: [PATCH 03/15] Update tests

---
 datafusion-cli/Cargo.lock                     | 121 ++++--
 datafusion-examples/examples/csv_opener.rs    |   2 +-
 datafusion/core/Cargo.toml                    |   1 +
 .../physical_plan/file_scan_config.rs         |  30 +-
 datafusion/core/src/lib.rs                    |   6 +-
 .../enforce_distribution.rs                   |  21 +-
 .../src/physical_optimizer/join_selection.rs  |   4 +-
 .../src/physical_optimizer/sort_pushdown.rs   |  20 +-
 datafusion/core/src/physical_planner.rs       |   4 +-
 datafusion/core/src/test/mod.rs               | 161 ++++----
 datafusion/core/tests/sql/displayable.rs      |  57 +++
 datafusion/core/tests/sql/mod.rs              |   1 +
 datafusion/physical-plan/Cargo.toml           |  31 +-
 .../physical-plan/src/aggregates/mod.rs       |  49 +--
 datafusion/physical-plan/src/analyze.rs       |   2 +-
 .../physical-plan/src/coalesce_batches.rs     |  21 +-
 .../physical-plan/src/coalesce_partitions.rs  |   8 +-
 datafusion/physical-plan/src/display.rs       |   3 +-
 datafusion/physical-plan/src/empty.rs         |  12 +-
 datafusion/physical-plan/src/filter.rs        |  59 +--
 .../physical-plan/src/joins/cross_join.rs     |  13 +-
 .../physical-plan/src/joins/hash_join.rs      |  41 ++-
 .../src/joins/nested_loop_join.rs             |  17 +-
 .../src/joins/sort_merge_join.rs              |  32 +-
 .../physical-plan/src/joins/test_utils.rs     |  10 +-
 datafusion/physical-plan/src/lib.rs           |  42 +--
 datafusion/physical-plan/src/limit.rs         |  52 +--
 .../physical-plan/src/metrics/baseline.rs     |   2 +-
 .../physical-plan/src/metrics/builder.rs      |   2 +-
 datafusion/physical-plan/src/metrics/mod.rs   |   2 +-
 datafusion/physical-plan/src/projection.rs    | 100 +----
 .../physical-plan/src/repartition/mod.rs      |  23 +-
 datafusion/physical-plan/src/sorts/sort.rs    | 118 ++----
 .../src/sorts/sort_preserving_merge.rs        |  98 ++---
 datafusion/physical-plan/src/test.rs          | 343 ++++++++++++++++++
 .../{core => physical-plan}/src/test/exec.rs  |  17 +-
 datafusion/physical-plan/src/union.rs         |  12 +-
 datafusion/physical-plan/src/values.rs        |  31 +-
 datafusion/physical-plan/src/windows/mod.rs   | 208 +----------
 39 files changed, 909 insertions(+), 867 deletions(-)
 create mode 100644 datafusion/core/tests/sql/displayable.rs
 create mode 100644 datafusion/physical-plan/src/test.rs
 rename datafusion/{core => physical-plan}/src/test/exec.rs (98%)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 3e16b756a01ba..0c6c6846a89a8 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -77,9 +77,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea"
+checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46"
 
 [[package]]
 name = "arrayref"
@@ -347,7 +347,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -674,9 +674,9 @@ dependencies = [
 
 [[package]]
 name = "base64"
-version = "0.21.3"
+version = "0.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53"
+checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
 
 [[package]]
 name = "base64-simd"
@@ -1031,7 +1031,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f34ba9a9bcb8645379e9de8cb3ecfcf4d1c85ba66d90deb3259206fa5aa193b"
 dependencies = [
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1066,6 +1066,7 @@ dependencies = [
  "datafusion-expr",
  "datafusion-optimizer",
  "datafusion-physical-expr",
+ "datafusion-physical-plan",
  "datafusion-sql",
  "flate2",
  "futures",
@@ -1111,7 +1112,7 @@ dependencies = [
  "parking_lot",
  "predicates",
  "regex",
- "rstest",
+ "rstest 0.17.0",
  "rustyline",
  "tokio",
  "url",
@@ -1217,6 +1218,35 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "datafusion-physical-plan"
+version = "31.0.0"
+dependencies = [
+ "ahash",
+ "arrow",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-schema",
+ "async-trait",
+ "chrono",
+ "datafusion-common",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "futures",
+ "half",
+ "hashbrown 0.14.0",
+ "indexmap 2.0.0",
+ "itertools 0.11.0",
+ "log",
+ "parking_lot",
+ "pin-project-lite",
+ "rand",
+ "rstest 0.18.2",
+ "tempfile",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-sql"
 version = "31.0.0"
@@ -1502,7 +1532,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1959,9 +1989,9 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.5"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
+checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128"
 
 [[package]]
 name = "lock_api"
@@ -2388,7 +2418,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -2602,6 +2632,12 @@ version = "0.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
 
+[[package]]
+name = "relative-path"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
+
 [[package]]
 name = "reqwest"
 version = "0.11.20"
@@ -2666,7 +2702,19 @@ checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962"
 dependencies = [
  "futures",
  "futures-timer",
- "rstest_macros",
+ "rstest_macros 0.17.0",
+ "rustc_version",
+]
+
+[[package]]
+name = "rstest"
+version = "0.18.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199"
+dependencies = [
+ "futures",
+ "futures-timer",
+ "rstest_macros 0.18.2",
  "rustc_version",
 ]
 
@@ -2684,6 +2732,23 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "rstest_macros"
+version = "0.18.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
+dependencies = [
+ "cfg-if",
+ "glob",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "relative-path",
+ "rustc_version",
+ "syn 2.0.32",
+ "unicode-ident",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.23"
@@ -2701,9 +2766,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.11"
+version = "0.38.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0c3dde1fc030af041adc40e79c0e7fbcf431dd24870053d187d7c66e4b87453"
+checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662"
 dependencies = [
  "bitflags 2.4.0",
  "errno",
@@ -2888,14 +2953,14 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.105"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360"
+checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2"
 dependencies = [
  "itoa",
  "ryu",
@@ -2986,9 +3051,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.3"
+version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877"
+checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e"
 dependencies = [
  "libc",
  "windows-sys",
@@ -3077,7 +3142,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3099,9 +3164,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.31"
+version = "2.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "718fa2415bcb8d8bd775917a1bf12a7931b6dfa890753378538118181e0cb398"
+checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3159,7 +3224,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3237,7 +3302,7 @@ dependencies = [
  "num_cpus",
  "parking_lot",
  "pin-project-lite",
- "socket2 0.5.3",
+ "socket2 0.5.4",
  "tokio-macros",
  "windows-sys",
 ]
@@ -3250,7 +3315,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3348,7 +3413,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3520,7 +3585,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
  "wasm-bindgen-shared",
 ]
 
@@ -3554,7 +3619,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.31",
+ "syn 2.0.32",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
diff --git a/datafusion-examples/examples/csv_opener.rs b/datafusion-examples/examples/csv_opener.rs
index 6366f16b7aa3f..0587b515b2d8d 100644
--- a/datafusion-examples/examples/csv_opener.rs
+++ b/datafusion-examples/examples/csv_opener.rs
@@ -17,7 +17,6 @@
 
 use std::{sync::Arc, vec};
 
-use crate::metrics::ExecutionPlanMetricsSet;
 use datafusion::{
     assert_batches_eq,
     datasource::{
@@ -26,6 +25,7 @@ use datafusion::{
         physical_plan::{CsvConfig, CsvOpener, FileScanConfig, FileStream},
     },
     error::Result,
+    physical_plan::metrics::ExecutionPlanMetricsSet,
     test_util::aggr_test_schema,
 };
 use datafusion_common::FileCompressionType;
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index d5738cb1fe13e..ab2821547d4e6 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -67,6 +67,7 @@ datafusion-execution = { path = "../execution", version = "31.0.0" }
 datafusion-expr = { path = "../expr", version = "31.0.0" }
 datafusion-optimizer = { path = "../optimizer", version = "31.0.0", default-features = false }
 datafusion-physical-expr = { path = "../physical-expr", version = "31.0.0", default-features = false }
+datafusion-physical-plan = { path = "../physical-plan", version = "31.0.0", default-features = false }
 datafusion-sql = { path = "../sql", version = "31.0.0" }
 flate2 = { version = "1.0.24", optional = true }
 futures = "0.3"
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index e9ce5238c5962..819bfabae2902 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -496,11 +496,10 @@ fn create_output_array(
 
 #[cfg(test)]
 mod tests {
+    use arrow_array::Int32Array;
+
     use super::*;
-    use crate::{
-        test::{build_table_i32, columns},
-        test_util::aggr_test_schema,
-    };
+    use crate::{test::columns, test_util::aggr_test_schema};
 
     #[test]
     fn physical_plan_config_no_projection() {
@@ -776,4 +775,27 @@ mod tests {
             infinite_source: false,
         }
     }
+
+    /// returns record batch with 3 columns of i32 in memory
+    pub fn build_table_i32(
+        a: (&str, &Vec<i32>),
+        b: (&str, &Vec<i32>),
+        c: (&str, &Vec<i32>),
+    ) -> RecordBatch {
+        let schema = Schema::new(vec![
+            Field::new(a.0, DataType::Int32, false),
+            Field::new(b.0, DataType::Int32, false),
+            Field::new(c.0, DataType::Int32, false),
+        ]);
+
+        RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Int32Array::from(a.1.clone())),
+                Arc::new(Int32Array::from(b.1.clone())),
+                Arc::new(Int32Array::from(c.1.clone())),
+            ],
+        )
+        .unwrap()
+    }
 }
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index c0ed13c8e063d..4f74888c840b1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -428,7 +428,6 @@ pub mod datasource;
 pub mod error;
 pub mod execution;
 pub mod physical_optimizer;
-pub mod physical_plan;
 pub mod physical_planner;
 pub mod prelude;
 pub mod scalar;
@@ -467,6 +466,11 @@ pub mod physical_expr {
     pub use datafusion_physical_expr::*;
 }
 
+/// re-export of [`datafusion_physical_plan`] crate
+pub mod physical_plan {
+    pub use datafusion_physical_plan::*;
+}
+
 /// re-export of [`datafusion_sql`] crate
 pub mod sql {
     pub use datafusion_sql::*;
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 5be53cf81b606..2fbe5a9b39be3 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -378,7 +378,7 @@ fn adjust_input_keys_ordering(
         )?)
     } else if let Some(aggregate_exec) = plan_any.downcast_ref::<AggregateExec>() {
         if !parent_required.is_empty() {
-            match aggregate_exec.mode {
+            match aggregate_exec.mode() {
                 AggregateMode::FinalPartitioned => Some(reorder_aggregate_keys(
                     requirements.plan.clone(),
                     &parent_required,
@@ -390,9 +390,8 @@ fn adjust_input_keys_ordering(
             // Keep everything unchanged
             None
         }
-    } else if let Some(ProjectionExec { expr, .. }) =
-        plan_any.downcast_ref::<ProjectionExec>()
-    {
+    } else if let Some(proj) = plan_any.downcast_ref::<ProjectionExec>() {
+        let expr = proj.expr();
         // For Projection, we need to transform the requirements to the columns before the Projection
         // And then to push down the requirements
         // Construct a mapping from new name to the the orginal Column
@@ -487,7 +486,7 @@ fn reorder_aggregate_keys(
     agg_exec: &AggregateExec,
 ) -> Result<PlanWithKeyRequirements> {
     let out_put_columns = agg_exec
-        .group_by
+        .group_by()
         .expr()
         .iter()
         .enumerate()
@@ -500,7 +499,7 @@ fn reorder_aggregate_keys(
         .collect::<Vec<_>>();
 
     if parent_required.len() != out_put_exprs.len()
-        || !agg_exec.group_by.null_expr().is_empty()
+        || !agg_exec.group_by().null_expr().is_empty()
         || expr_list_eq_strict_order(&out_put_exprs, parent_required)
     {
         Ok(PlanWithKeyRequirements::new(agg_plan))
@@ -519,7 +518,7 @@ fn reorder_aggregate_keys(
                     input_schema,
                     ..
                 }) =
-                    agg_exec.input.as_any().downcast_ref::<AggregateExec>()
+                    agg_exec.input().as_any().downcast_ref::<AggregateExec>()
                 {
                     if matches!(mode, AggregateMode::Partial) {
                         let mut new_group_exprs = vec![];
@@ -564,11 +563,11 @@ fn reorder_aggregate_keys(
                     let new_final_agg = Arc::new(AggregateExec::try_new(
                         AggregateMode::FinalPartitioned,
                         new_group_by,
-                        agg_exec.aggr_expr.to_vec(),
-                        agg_exec.filter_expr.to_vec(),
-                        agg_exec.order_by_expr.to_vec(),
+                        agg_exec.aggr_expr().to_vec(),
+                        agg_exec.filter_expr().to_vec(),
+                        agg_exec.order_by_expr().to_vec(),
                         partial_agg,
-                        agg_exec.input_schema.clone(),
+                        agg_exec.input_schema().clone(),
                     )?);
 
                     // Need to create a new projection to change the expr ordering back
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 628cc1da3b1eb..4cff4a8f6c555 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -579,14 +579,14 @@ fn apply_subrules(
 
 #[cfg(test)]
 mod tests_statistical {
+    use super::*;
     use crate::{
         physical_plan::{
             displayable, joins::PartitionMode, ColumnStatistics, Statistics,
         },
-        test::exec::StatisticsExec,
+        test::StatisticsExec,
     };
 
-    use super::*;
     use std::sync::Arc;
 
     use arrow::datatypes::{DataType, Field, Schema};
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 81c2e76b74c61..629011cb0faa5 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -140,7 +140,7 @@ pub(crate) fn pushdown_sorts(
             let parent_required_expr = PhysicalSortRequirement::to_sort_exprs(
                 parent_required.ok_or_else(err)?.iter().cloned(),
             );
-            new_plan = sort_exec.input.clone();
+            new_plan = sort_exec.input().clone();
             add_sort_above(&mut new_plan, parent_required_expr, sort_exec.fetch())?;
         };
         let required_ordering = new_plan
@@ -221,12 +221,12 @@ fn pushdown_requirement_to_children(
         ]))
     } else if let Some(smj) = plan.as_any().downcast_ref::<SortMergeJoinExec>() {
         // If the current plan is SortMergeJoinExec
-        let left_columns_len = smj.left.schema().fields().len();
+        let left_columns_len = smj.left().schema().fields().len();
         let parent_required_expr = PhysicalSortRequirement::to_sort_exprs(
             parent_required.ok_or_else(err)?.iter().cloned(),
         );
         let expr_source_side =
-            expr_source_sides(&parent_required_expr, smj.join_type, left_columns_len);
+            expr_source_sides(&parent_required_expr, smj.join_type(), left_columns_len);
         match expr_source_side {
             Some(JoinSide::Left) => try_pushdown_requirements_to_join(
                 smj,
@@ -236,7 +236,7 @@ fn pushdown_requirement_to_children(
             ),
             Some(JoinSide::Right) => {
                 let right_offset =
-                    smj.schema().fields.len() - smj.right.schema().fields.len();
+                    smj.schema().fields.len() - smj.right().schema().fields.len();
                 let new_right_required =
                     shift_right_required(parent_required.ok_or_else(err)?, right_offset)?;
                 let new_right_required_expr = PhysicalSortRequirement::to_sort_exprs(
@@ -331,8 +331,8 @@ fn try_pushdown_requirements_to_join(
     sort_expr: Vec<PhysicalSortExpr>,
     push_side: JoinSide,
 ) -> Result<Option<Vec<Option<Vec<PhysicalSortRequirement>>>>> {
-    let left_ordering = smj.left.output_ordering().unwrap_or(&[]);
-    let right_ordering = smj.right.output_ordering().unwrap_or(&[]);
+    let left_ordering = smj.left().output_ordering().unwrap_or(&[]);
+    let right_ordering = smj.right().output_ordering().unwrap_or(&[]);
     let (new_left_ordering, new_right_ordering) = match push_side {
         JoinSide::Left => (sort_expr.as_slice(), right_ordering),
         JoinSide::Right => (left_ordering, sort_expr.as_slice()),
@@ -340,11 +340,11 @@ fn try_pushdown_requirements_to_join(
     let new_output_ordering = calculate_join_output_ordering(
         new_left_ordering,
         new_right_ordering,
-        smj.join_type,
-        &smj.on,
-        smj.left.schema().fields.len(),
+        smj.join_type(),
+        smj.on(),
+        smj.left().schema().fields.len(),
         &smj.maintains_input_order(),
-        Some(SortMergeJoinExec::probe_side(&smj.join_type)),
+        Some(SortMergeJoinExec::probe_side(&smj.join_type())),
     )?;
     Ok(ordering_satisfy_requirement(
         new_output_ordering.as_deref(),
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index def4d59873df5..485ec777e4a5c 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -814,8 +814,8 @@ impl DefaultPhysicalPlanner {
                     // into a LAST_VALUE with the reverse ordering requirement.
                     // To reflect such changes to subsequent stages, use the updated
                     // `AggregateExpr`/`PhysicalSortExpr` objects.
-                    let updated_aggregates = initial_aggr.aggr_expr.clone();
-                    let updated_order_bys = initial_aggr.order_by_expr.clone();
+                    let updated_aggregates = initial_aggr.aggr_expr().to_vec();
+                    let updated_order_bys = initial_aggr.order_by_expr().to_vec();
 
                     let (initial_aggr, next_partition_mode): (
                         Arc<dyn ExecutionPlan>,
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 011d2e64281ad..a26be4857d4c2 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -17,38 +17,37 @@
 
 //! Common unit test utility methods
 
-use crate::arrow::array::UInt32Array;
 use crate::datasource::listing::PartitionedFile;
 use crate::datasource::object_store::ObjectStoreUrl;
 use crate::datasource::physical_plan::{CsvExec, FileScanConfig};
 use crate::datasource::{MemTable, TableProvider};
 use crate::error::Result;
 use crate::logical_expr::LogicalPlan;
-use crate::physical_plan::memory::MemoryExec;
 use crate::physical_plan::ExecutionPlan;
 use crate::test::object_store::local_unpartitioned_file;
 use crate::test_util::{aggr_test_schema, arrow_test_data};
 use array::ArrayRef;
 use arrow::array::{self, Array, Decimal128Builder, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-use arrow::record_batch::{RecordBatch, RecordBatchOptions};
+use arrow::record_batch::RecordBatch;
 #[cfg(feature = "compression")]
 use bzip2::write::BzEncoder;
 #[cfg(feature = "compression")]
 use bzip2::Compression as BzCompression;
 use datafusion_common::{DataFusionError, Statistics};
 use datafusion_common::{FileCompressionType, FileType};
-use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_physical_expr::{Partitioning, PhysicalSortExpr};
+use datafusion_physical_plan::{DisplayAs, DisplayFormatType};
 #[cfg(feature = "compression")]
 use flate2::write::GzEncoder;
 #[cfg(feature = "compression")]
 use flate2::Compression as GzCompression;
-use futures::{Future, FutureExt};
+use std::any::Any;
 use std::fs::File;
 use std::io::prelude::*;
 use std::io::{BufReader, BufWriter};
 use std::path::Path;
-use std::pin::Pin;
 use std::sync::Arc;
 #[cfg(feature = "compression")]
 use xz2::write::XzEncoder;
@@ -214,40 +213,6 @@ pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
     assert_eq!(actual, expected);
 }
 
-/// returns record batch with 3 columns of i32 in memory
-pub fn build_table_i32(
-    a: (&str, &Vec<i32>),
-    b: (&str, &Vec<i32>),
-    c: (&str, &Vec<i32>),
-) -> RecordBatch {
-    let schema = Schema::new(vec![
-        Field::new(a.0, DataType::Int32, false),
-        Field::new(b.0, DataType::Int32, false),
-        Field::new(c.0, DataType::Int32, false),
-    ]);
-
-    RecordBatch::try_new(
-        Arc::new(schema),
-        vec![
-            Arc::new(Int32Array::from(a.1.clone())),
-            Arc::new(Int32Array::from(b.1.clone())),
-            Arc::new(Int32Array::from(c.1.clone())),
-        ],
-    )
-    .unwrap()
-}
-
-/// returns memory table scan wrapped around record batch with 3 columns of i32
-pub fn build_table_scan_i32(
-    a: (&str, &Vec<i32>),
-    b: (&str, &Vec<i32>),
-    c: (&str, &Vec<i32>),
-) -> Arc<dyn ExecutionPlan> {
-    let batch = build_table_i32(a, b, c);
-    let schema = batch.schema();
-    Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
-}
-
 /// Returns the column names on the schema
 pub fn columns(schema: &Schema) -> Vec<String> {
     schema.fields().iter().map(|f| f.name().clone()).collect()
@@ -280,14 +245,6 @@ pub fn make_partition(sz: i32) -> RecordBatch {
     RecordBatch::try_new(schema, vec![arr]).unwrap()
 }
 
-/// Return a RecordBatch with a single array with row_count sz
-pub fn make_batch_no_column(sz: usize) -> RecordBatch {
-    let schema = Arc::new(Schema::empty());
-
-    let options = RecordBatchOptions::new().with_row_count(Option::from(sz));
-    RecordBatch::try_new_with_options(schema, vec![], &options).unwrap()
-}
-
 /// Return a new table which provide this decimal column
 pub fn table_with_decimal() -> Arc<dyn TableProvider> {
     let batch_decimal = make_decimal();
@@ -312,25 +269,6 @@ fn make_decimal() -> RecordBatch {
     RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
 }
 
-/// Asserts that given future is pending.
-pub fn assert_is_pending<'a, T>(fut: &mut Pin<Box<dyn Future<Output = T> + Send + 'a>>) {
-    let waker = futures::task::noop_waker();
-    let mut cx = futures::task::Context::from_waker(&waker);
-    let poll = fut.poll_unpin(&mut cx);
-
-    assert!(poll.is_pending());
-}
-
-/// Create vector batches
-pub fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
-    let batch = create_batch(schema);
-    let mut vec = Vec::with_capacity(n);
-    for _ in 0..n {
-        vec.push(batch.clone());
-    }
-    vec
-}
-
 /// Created a sorted Csv exec
 pub fn csv_exec_sorted(
     schema: &SchemaRef,
@@ -359,15 +297,88 @@ pub fn csv_exec_sorted(
     ))
 }
 
-/// Create batch
-fn create_batch(schema: &Schema) -> RecordBatch {
-    RecordBatch::try_new(
-        Arc::new(schema.clone()),
-        vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
-    )
-    .unwrap()
+/// A mock execution plan that simply returns the provided statistics
+#[derive(Debug, Clone)]
+pub struct StatisticsExec {
+    stats: Statistics,
+    schema: Arc<Schema>,
+}
+impl StatisticsExec {
+    pub fn new(stats: Statistics, schema: Schema) -> Self {
+        assert!(
+            stats
+                .column_statistics
+                .as_ref()
+                .map(|cols| cols.len() == schema.fields().len())
+                .unwrap_or(true),
+            "if defined, the column statistics vector length should be the number of fields"
+        );
+        Self {
+            stats,
+            schema: Arc::new(schema),
+        }
+    }
+}
+
+impl DisplayAs for StatisticsExec {
+    fn fmt_as(
+        &self,
+        t: DisplayFormatType,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "StatisticsExec: col_count={}, row_count={:?}",
+                    self.schema.fields().len(),
+                    self.stats.num_rows,
+                )
+            }
+        }
+    }
+}
+
+impl ExecutionPlan for StatisticsExec {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+
+    fn output_partitioning(&self) -> Partitioning {
+        Partitioning::UnknownPartitioning(2)
+    }
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        None
+    }
+
+    fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
+        vec![]
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        _: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        Ok(self)
+    }
+
+    fn execute(
+        &self,
+        _partition: usize,
+        _context: Arc<TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        unimplemented!("This plan only serves for testing statistics")
+    }
+
+    fn statistics(&self) -> Statistics {
+        self.stats.clone()
+    }
 }
 
-pub mod exec;
 pub mod object_store;
 pub mod variable;
diff --git a/datafusion/core/tests/sql/displayable.rs b/datafusion/core/tests/sql/displayable.rs
new file mode 100644
index 0000000000000..b736820009cc9
--- /dev/null
+++ b/datafusion/core/tests/sql/displayable.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use object_store::path::Path;
+
+use datafusion::prelude::*;
+use datafusion_physical_plan::displayable;
+
+#[tokio::test]
+async fn teset_displayable() {
+    // Hard code target_partitions as it appears in the RepartitionExec output
+    let config = SessionConfig::new().with_target_partitions(3);
+    let ctx = SessionContext::with_config(config);
+
+    // register the a table
+    ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new())
+        .await
+        .unwrap();
+
+    // create a plan to run a SQL query
+    let dataframe = ctx.sql("SELECT a FROM example WHERE a < 5").await.unwrap();
+    let physical_plan = dataframe.create_physical_plan().await.unwrap();
+
+    // Format using display string in verbose mode
+    let displayable_plan = displayable(physical_plan.as_ref());
+    let plan_string = format!("{}", displayable_plan.indent(true));
+
+    let working_directory = std::env::current_dir().unwrap();
+    let normalized = Path::from_filesystem_path(working_directory).unwrap();
+    let plan_string = plan_string.replace(normalized.as_ref(), "WORKING_DIR");
+
+    assert_eq!("CoalesceBatchesExec: target_batch_size=8192\
+                \n  FilterExec: a@0 < 5\
+                \n    RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1\
+                \n      CsvExec: file_groups={1 group: [[WORKING_DIR/tests/data/example.csv]]}, projection=[a], has_header=true",
+               plan_string.trim());
+
+    let one_line = format!("{}", displayable_plan.one_line());
+    assert_eq!(
+        "CoalesceBatchesExec: target_batch_size=8192",
+        one_line.trim()
+    );
+}
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index 35423234db88b..1e29f791c560a 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -82,6 +82,7 @@ pub mod arrow_files;
 #[cfg(feature = "avro")]
 pub mod create_drop;
 pub mod csv_files;
+pub mod displayable;
 pub mod explain_analyze;
 pub mod expr;
 pub mod group_by;
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 161569513a783..60422269c9a70 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -32,8 +32,6 @@ rust-version = { workspace = true }
 name = "datafusion_physical_plan"
 path = "src/lib.rs"
 
-[features]
-
 [dependencies]
 ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
 arrow = { workspace = true }
@@ -41,36 +39,23 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
 async-trait = "0.1.41"
-#base64 = { version = "0.21", optional = true }
-#blake2 = { version = "^0.10.2", optional = true }
-#blake3 = { version = "1.0", optional = true }
 chrono = { version = "0.4.23", default-features = false }
-datafusion-common = { path = "../common", version = "30.0.0" }
-datafusion-expr = { path = "../expr", version = "30.0.0" }
-datafusion-execution = { path = "../execution", version = "30.0.0" }
-datafusion-physical-expr = { path = "../physical-expr", version = "30.0.0" }
+datafusion-common = { path = "../common", version = "31.0.0" }
+datafusion-execution = { path = "../execution", version = "31.0.0" }
+datafusion-expr = { path = "../expr", version = "31.0.0" }
+datafusion-physical-expr = { path = "../physical-expr", version = "31.0.0" }
 futures = "0.3"
 half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
-#hex = { version = "0.4", optional = true }
 indexmap = "2.0.0"
 itertools = { version = "0.11", features = ["use_std"] }
-#libc = "0.2.140"
 log = "^0.4"
-#md-5 = { version = "^0.10.0", optional = true }
 parking_lot = "0.12"
 pin-project-lite = "^0.2.7"
-#paste = "^1.0"
-#petgraph = "0.6.2"
-#rand = "0.8"
-#regex = { version = "1.8", optional = true }
-#sha2 = { version = "^0.10.1", optional = true }
-tempfile = "3"
-tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
-#unicode-segmentation = { version = "^1.7.1", optional = true }
-#uuid = { version = "^1.2", features = ["v4"] }
 
 #[dev-dependencies]
 #criterion = "0.5"
-#rand = "0.8"
-#rstest = "0.18.0"
+rand = "0.8"
+rstest = "0.18.0"
+tempfile = "3"
+tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index cb88d132d0d8a..f9256d3f565ce 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -256,23 +256,23 @@ pub(crate) struct AggregationOrdering {
 #[derive(Debug)]
 pub struct AggregateExec {
     /// Aggregation mode (full, partial)
-    pub(crate) mode: AggregateMode,
+    pub mode: AggregateMode,
     /// Group by expressions
-    pub(crate) group_by: PhysicalGroupBy,
+    pub group_by: PhysicalGroupBy,
     /// Aggregate expressions
-    pub(crate) aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+    pub aggr_expr: Vec<Arc<dyn AggregateExpr>>,
     /// FILTER (WHERE clause) expression for each aggregate expression
-    pub(crate) filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
+    pub filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
     /// (ORDER BY clause) expression for each aggregate expression
-    pub(crate) order_by_expr: Vec<Option<LexOrdering>>,
+    pub order_by_expr: Vec<Option<LexOrdering>>,
     /// Input plan, could be a partial aggregate or the input to the aggregate
-    pub(crate) input: Arc<dyn ExecutionPlan>,
+    pub input: Arc<dyn ExecutionPlan>,
     /// Schema after the aggregate is applied
     schema: SchemaRef,
     /// Input schema before any aggregation is applied. For partial aggregate this will be the
     /// same as input.schema() but for the final aggregate it will be the same as the input
     /// to the partial aggregate
-    pub(crate) input_schema: SchemaRef,
+    pub input_schema: SchemaRef,
     /// The columns map used to normalize out expressions like Partitioning and PhysicalSortExpr
     /// The key is the column from the input schema and the values are the columns from the output schema
     columns_map: HashMap<Column, Vec<Column>>,
@@ -727,6 +727,10 @@ impl AggregateExec {
             ))
         }
     }
+
+    pub fn group_by(&self) -> &PhysicalGroupBy {
+        &self.group_by
+    }
 }
 
 impl DisplayAs for AggregateExec {
@@ -1182,24 +1186,22 @@ fn evaluate_group_by(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_plan::aggregates::GroupByOrderMode::{
-        FullyOrdered, PartiallyOrdered,
-    };
-    use crate::physical_plan::aggregates::{
+    use crate::aggregates::GroupByOrderMode::{FullyOrdered, PartiallyOrdered};
+    use crate::aggregates::{
         get_finest_requirement, get_working_mode, AggregateExec, AggregateMode,
         PhysicalGroupBy,
     };
-    use crate::physical_plan::coalesce_batches::CoalesceBatchesExec;
-    use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-    use crate::physical_plan::expressions::{col, Avg};
-    use crate::physical_plan::memory::MemoryExec;
-    use crate::physical_plan::{
+    use crate::coalesce_batches::CoalesceBatchesExec;
+    use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::expressions::{col, Avg};
+    use crate::memory::MemoryExec;
+    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
+    use crate::test::{assert_is_pending, mem_exec};
+    use crate::{assert_batches_eq, assert_batches_sorted_eq, common};
+    use crate::{
         DisplayAs, ExecutionPlan, Partitioning, RecordBatchStream,
         SendableRecordBatchStream, Statistics,
     };
-    use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
-    use crate::test::{assert_is_pending, csv_exec_sorted};
-    use crate::{assert_batches_eq, assert_batches_sorted_eq, physical_plan::common};
 
     use arrow::array::{Float64Array, UInt32Array};
     use arrow::compute::{concat_batches, SortOptions};
@@ -1260,7 +1262,8 @@ mod tests {
             sort_expr("b", &test_schema),
             sort_expr("c", &test_schema),
         ];
-        let input = csv_exec_sorted(&test_schema, sort_exprs, true);
+        let input = mem_exec(1).with_sort_information(vec![sort_exprs]);
+        let input = Arc::new(input) as _;
 
         // test cases consists of vector of tuples. Where each tuple represents a single test case.
         // First field in the tuple is Vec<str> where each element in the vector represents GROUP BY columns
@@ -1846,7 +1849,7 @@ mod tests {
             schema,
         )?);
 
-        let fut = crate::physical_plan::collect(aggregate_exec, task_ctx);
+        let fut = crate::collect(aggregate_exec, task_ctx);
         let mut fut = fut.boxed();
 
         assert_is_pending(&mut fut);
@@ -1885,7 +1888,7 @@ mod tests {
             schema,
         )?);
 
-        let fut = crate::physical_plan::collect(aggregate_exec, task_ctx);
+        let fut = crate::collect(aggregate_exec, task_ctx);
         let mut fut = fut.boxed();
 
         assert_is_pending(&mut fut);
@@ -1995,7 +1998,7 @@ mod tests {
             schema,
         )?) as Arc<dyn ExecutionPlan>;
 
-        let result = crate::physical_plan::collect(aggregate_final, task_ctx).await?;
+        let result = crate::collect(aggregate_final, task_ctx).await?;
         if is_first_acc {
             let expected = [
                 "+---+----------------+",
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index e055400c18696..b7e95c8b1b802 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -242,7 +242,7 @@ mod tests {
     use futures::FutureExt;
 
     use crate::{
-        physical_plan::collect,
+        collect,
         test::{
             assert_is_pending,
             exec::{assert_strong_count_converges_to_zero, BlockingExec},
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 7e6e129934168..f46a228064fe7 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -309,9 +309,9 @@ pub fn concat_batches(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::test::create_vec_batches;
     use crate::{memory::MemoryExec, repartition::RepartitionExec};
     use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_array::UInt32Array;
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_concat_batches() -> Result<()> {
@@ -365,4 +365,23 @@ mod tests {
         }
         Ok(output_partitions)
     }
+
+    /// Create vector batches
+    fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
+        let batch = create_batch(schema);
+        let mut vec = Vec::with_capacity(n);
+        for _ in 0..n {
+            vec.push(batch.clone());
+        }
+        vec
+    }
+
+    /// Create batch
+    fn create_batch(schema: &Schema) -> RecordBatch {
+        RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
+        )
+        .unwrap()
+    }
 }
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 296743ba63145..8eddf57ae5515 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -169,7 +169,6 @@ mod tests {
 
     use arrow::datatypes::{DataType, Field, Schema};
     use futures::FutureExt;
-    use tempfile::TempDir;
 
     use super::*;
     use crate::test::exec::{
@@ -183,8 +182,7 @@ mod tests {
         let task_ctx = Arc::new(TaskContext::default());
 
         let num_partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(num_partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(num_partitions);
 
         // input should have 4 partitions
         assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
@@ -199,9 +197,9 @@ mod tests {
         let batches = common::collect(iter).await?;
         assert_eq!(batches.len(), num_partitions);
 
-        // there should be a total of 100 rows
+        // there should be a total of 400 rows (100 per each partition)
         let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(row_count, 100);
+        assert_eq!(row_count, 400);
 
         Ok(())
     }
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 0b4379fe96e58..e4a4e113eb07e 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -16,8 +16,7 @@
 // under the License.
 
 //! Implementation of physical plan display. See
-//! [`crate::physical_plan::displayable`] for examples of how to
-//! format
+//! [`crate::displayable`] for examples of how to format
 
 use std::fmt;
 
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index 2f03e0750e7b0..675dac9ad2656 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -173,12 +173,12 @@ impl ExecutionPlan for EmptyExec {
 mod tests {
     use super::*;
     use crate::with_new_children_if_necessary;
-    use crate::{physical_plan::common, test_util};
+    use crate::{common, test};
 
     #[tokio::test]
     async fn empty() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
 
         let empty = EmptyExec::new(false, schema.clone());
         assert_eq!(empty.schema(), schema);
@@ -193,7 +193,7 @@ mod tests {
 
     #[test]
     fn with_new_children() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let empty = Arc::new(EmptyExec::new(false, schema.clone()));
         let empty_with_row = Arc::new(EmptyExec::new(true, schema));
 
@@ -215,7 +215,7 @@ mod tests {
     #[tokio::test]
     async fn invalid_execute() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let empty = EmptyExec::new(false, schema);
 
         // ask for the wrong partition
@@ -227,7 +227,7 @@ mod tests {
     #[tokio::test]
     async fn produce_one_row() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let empty = EmptyExec::new(true, schema);
 
         let iter = empty.execute(0, task_ctx)?;
@@ -242,7 +242,7 @@ mod tests {
     #[tokio::test]
     async fn produce_one_row_multiple_partition() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let partitions = 3;
         let empty = EmptyExec::new(true, schema).with_partitions(partitions);
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 1030eeba42508..15208fd0829e2 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -381,74 +381,17 @@ mod tests {
     use crate::expressions::*;
     use crate::test;
     use crate::test::exec::StatisticsExec;
-    use crate::test_util;
     use crate::ExecutionPlan;
-    use crate::{collect, with_new_children_if_necessary};
     use arrow::datatypes::{DataType, Field, Schema};
-    use datafusion_common::utils::DataPtr;
     use datafusion_common::ColumnStatistics;
     use datafusion_common::ScalarValue;
     use datafusion_expr::Operator;
     use std::iter::Iterator;
     use std::sync::Arc;
-    use tempfile::TempDir;
-
-    #[tokio::test]
-    async fn simple_predicate() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
-
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-
-        let predicate: Arc<dyn PhysicalExpr> = binary(
-            binary(col("c2", &schema)?, Operator::Gt, lit(1u32), &schema)?,
-            Operator::And,
-            binary(col("c2", &schema)?, Operator::Lt, lit(4u32), &schema)?,
-            &schema,
-        )?;
-
-        let filter: Arc<dyn ExecutionPlan> =
-            Arc::new(FilterExec::try_new(predicate, csv)?);
-
-        let results = collect(filter, task_ctx).await?;
-
-        results
-            .iter()
-            .for_each(|batch| assert_eq!(13, batch.num_columns()));
-        let row_count: usize = results.iter().map(|batch| batch.num_rows()).sum();
-        assert_eq!(41, row_count);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn with_new_children() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let input = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-
-        let predicate: Arc<dyn PhysicalExpr> =
-            binary(col("c2", &schema)?, Operator::Gt, lit(1u32), &schema)?;
-
-        let filter: Arc<dyn ExecutionPlan> =
-            Arc::new(FilterExec::try_new(predicate, input.clone())?);
-
-        let new_filter = filter.clone().with_new_children(vec![input.clone()])?;
-        assert!(!Arc::data_ptr_eq(&filter, &new_filter));
-
-        let new_filter2 =
-            with_new_children_if_necessary(filter.clone(), vec![input])?.into();
-        assert!(Arc::data_ptr_eq(&filter, &new_filter2));
-
-        Ok(())
-    }
 
     #[tokio::test]
     async fn collect_columns_predicates() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let predicate: Arc<dyn PhysicalExpr> = binary(
             binary(
                 binary(col("c2", &schema)?, Operator::GtEq, lit(1u32), &schema)?,
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index e5302420c27a7..90d84282fd0dd 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -52,9 +52,9 @@ type JoinLeftData = (RecordBatch, MemoryReservation);
 #[derive(Debug)]
 pub struct CrossJoinExec {
     /// left (build) side which gets loaded in memory
-    pub(crate) left: Arc<dyn ExecutionPlan>,
+    pub left: Arc<dyn ExecutionPlan>,
     /// right (probe) side which are combined with left side
-    pub(crate) right: Arc<dyn ExecutionPlan>,
+    pub right: Arc<dyn ExecutionPlan>,
     /// The schema once the join is applied
     schema: SchemaRef,
     /// Build-side data
@@ -458,9 +458,9 @@ impl CrossJoinStream {
 mod tests {
     use super::*;
     use crate::assert_batches_sorted_eq;
+    use crate::assert_contains;
     use crate::common;
-    use crate::common::assert_contains;
-    use crate::test::{build_table_scan_i32, columns};
+    use crate::test::build_table_scan_i32;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 
     async fn join_collect(
@@ -678,4 +678,9 @@ mod tests {
 
         Ok(())
     }
+
+    /// Returns the column names on the schema
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
 }
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 75807863198ce..43ade366f3257 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -85,15 +85,15 @@ type JoinLeftData = (JoinHashMap, RecordBatch, MemoryReservation);
 #[derive(Debug)]
 pub struct HashJoinExec {
     /// left (build) side which gets hashed
-    pub(crate) left: Arc<dyn ExecutionPlan>,
+    pub left: Arc<dyn ExecutionPlan>,
     /// right (probe) side which are filtered by the hash table
-    pub(crate) right: Arc<dyn ExecutionPlan>,
+    pub right: Arc<dyn ExecutionPlan>,
     /// Set of common columns used to join on
-    pub(crate) on: Vec<(Column, Column)>,
+    pub on: Vec<(Column, Column)>,
     /// Filters which are applied while finding matching rows
-    pub(crate) filter: Option<JoinFilter>,
+    pub filter: Option<JoinFilter>,
     /// How the join is performed
-    pub(crate) join_type: JoinType,
+    pub join_type: JoinType,
     /// The schema once the join is applied
     schema: SchemaRef,
     /// Build-side data
@@ -103,13 +103,13 @@ pub struct HashJoinExec {
     /// Output order
     output_order: Option<Vec<PhysicalSortExpr>>,
     /// Partitioning mode to use
-    pub(crate) mode: PartitionMode,
+    pub mode: PartitionMode,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Information of index and left / right placement of columns
     column_indices: Vec<ColumnIndex>,
     /// If null_equals_null is true, null == null else null != null
-    pub(crate) null_equals_null: bool,
+    pub null_equals_null: bool,
 }
 
 impl HashJoinExec {
@@ -1067,23 +1067,19 @@ mod tests {
     use datafusion_physical_expr::expressions::Literal;
     use hashbrown::raw::RawTable;
 
-    use crate::execution::context::SessionConfig;
-    use crate::physical_expr::expressions::BinaryExpr;
     use crate::{
-        assert_batches_sorted_eq,
-        common::assert_contains,
-        physical_plan::{
-            common,
-            expressions::Column,
-            hash_utils::create_hashes,
-            joins::{hash_join::build_equal_condition_join_indices, utils::JoinSide},
-            memory::MemoryExec,
-            repartition::RepartitionExec,
-        },
+        assert_batches_sorted_eq, assert_contains, common,
+        expressions::Column,
+        hash_utils::create_hashes,
+        joins::{hash_join::build_equal_condition_join_indices, utils::JoinSide},
+        memory::MemoryExec,
+        repartition::RepartitionExec,
+        test::build_table_i32,
         test::exec::MockExec,
-        test::{build_table_i32, columns},
     };
+    use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+    use datafusion_physical_expr::expressions::BinaryExpr;
 
     use super::*;
 
@@ -2906,4 +2902,9 @@ mod tests {
 
         Ok(())
     }
+
+    /// Returns the column names on the schema
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
 }
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index b66454c732262..a0222f84a70e9 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -740,18 +740,14 @@ impl RecordBatchStream for NestedLoopJoinStream {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::physical_expr::expressions::BinaryExpr;
     use crate::{
-        assert_batches_sorted_eq,
-        common::assert_contains,
-        execution::runtime_env::{RuntimeConfig, RuntimeEnv},
-        physical_plan::{
-            common, expressions::Column, memory::MemoryExec, repartition::RepartitionExec,
-        },
-        test::{build_table_i32, columns},
+        assert_batches_sorted_eq, assert_contains, common, expressions::Column,
+        memory::MemoryExec, repartition::RepartitionExec, test::build_table_i32,
     };
     use arrow::datatypes::{DataType, Field};
+    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_expr::Operator;
+    use datafusion_physical_expr::expressions::BinaryExpr;
 
     use crate::joins::utils::JoinSide;
     use datafusion_common::ScalarValue;
@@ -1169,4 +1165,9 @@ mod tests {
 
         Ok(())
     }
+
+    /// Returns the column names on the schema
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
 }
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index e85f4bcb2ecc3..bf2f977d820d6 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -63,13 +63,13 @@ use futures::{Stream, StreamExt};
 #[derive(Debug)]
 pub struct SortMergeJoinExec {
     /// Left sorted joining execution plan
-    pub(crate) left: Arc<dyn ExecutionPlan>,
+    pub left: Arc<dyn ExecutionPlan>,
     /// Right sorting joining execution plan
-    pub(crate) right: Arc<dyn ExecutionPlan>,
+    pub right: Arc<dyn ExecutionPlan>,
     /// Set of common columns used to join on
-    pub(crate) on: JoinOn,
+    pub on: JoinOn,
     /// How the join is performed
-    pub(crate) join_type: JoinType,
+    pub join_type: JoinType,
     /// The schema once the join is applied
     schema: SchemaRef,
     /// Execution metrics
@@ -81,9 +81,9 @@ pub struct SortMergeJoinExec {
     /// The output ordering
     output_ordering: Option<Vec<PhysicalSortExpr>>,
     /// Sort options of join columns used in sorting left and right execution plans
-    pub(crate) sort_options: Vec<SortOptions>,
+    pub sort_options: Vec<SortOptions>,
     /// If null_equals_null is true, null == null else null != null
-    pub(crate) null_equals_null: bool,
+    pub null_equals_null: bool,
 }
 
 impl SortMergeJoinExec {
@@ -194,6 +194,18 @@ impl SortMergeJoinExec {
     pub fn on(&self) -> &[(Column, Column)] {
         &self.on
     }
+
+    pub fn right(&self) -> &dyn ExecutionPlan {
+        self.right.as_ref()
+    }
+
+    pub fn join_type(&self) -> JoinType {
+        self.join_type
+    }
+
+    pub fn left(&self) -> &dyn ExecutionPlan {
+        self.left.as_ref()
+    }
 }
 
 impl DisplayAs for SortMergeJoinExec {
@@ -1392,12 +1404,12 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::TaskContext;
 
-    use crate::common::assert_contains;
+    use crate::assert_contains;
     use crate::expressions::Column;
     use crate::joins::utils::JoinOn;
     use crate::joins::SortMergeJoinExec;
     use crate::memory::MemoryExec;
-    use crate::test::{build_table_i32, columns};
+    use crate::test::build_table_i32;
     use crate::{assert_batches_eq, assert_batches_sorted_eq};
     use crate::{common, ExecutionPlan};
     use datafusion_common::JoinType;
@@ -2422,4 +2434,8 @@ mod tests {
 
         Ok(())
     }
+    /// Returns the column names on the schema
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
 }
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index 44610ab09a38b..af3dc6f9a1944 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -17,13 +17,13 @@
 
 //! This file has test utils for hash joins
 
-use crate::physical_plan::joins::utils::{JoinFilter, JoinOn};
-use crate::physical_plan::joins::{
+use crate::joins::utils::{JoinFilter, JoinOn};
+use crate::joins::{
     HashJoinExec, PartitionMode, StreamJoinPartitionMode, SymmetricHashJoinExec,
 };
-use crate::physical_plan::memory::MemoryExec;
-use crate::physical_plan::repartition::RepartitionExec;
-use crate::physical_plan::{common, ExecutionPlan, Partitioning};
+use crate::memory::MemoryExec;
+use crate::repartition::RepartitionExec;
+use crate::{common, ExecutionPlan, Partitioning};
 use arrow::util::pretty::pretty_format_batches;
 use arrow_array::{
     ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, RecordBatch,
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index a34e8e651b3bd..9c116e73ead7e 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -242,45 +242,6 @@ pub fn with_new_children_if_necessary(
 /// Return a [wrapper](DisplayableExecutionPlan) around an
 /// [`ExecutionPlan`] which can be displayed in various easier to
 /// understand ways.
-///
-/// ```
-/// use datafusion::prelude::*;
-/// use datafusion::physical_plan::displayable;
-/// use object_store::path::Path;
-///
-/// #[tokio::main]
-/// async fn main() {
-///   // Hard code target_partitions as it appears in the RepartitionExec output
-///   let config = SessionConfig::new()
-///       .with_target_partitions(3);
-///   let mut ctx = SessionContext::with_config(config);
-///
-///   // register the a table
-///   ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await.unwrap();
-///
-///   // create a plan to run a SQL query
-///   let dataframe = ctx.sql("SELECT a FROM example WHERE a < 5").await.unwrap();
-///   let physical_plan = dataframe.create_physical_plan().await.unwrap();
-///
-///   // Format using display string in verbose mode
-///   let displayable_plan = displayable(physical_plan.as_ref());
-///   let plan_string = format!("{}", displayable_plan.indent(true));
-///
-///   let working_directory = std::env::current_dir().unwrap();
-///   let normalized = Path::from_filesystem_path(working_directory).unwrap();
-///   let plan_string = plan_string.replace(normalized.as_ref(), "WORKING_DIR");
-///
-///   assert_eq!("CoalesceBatchesExec: target_batch_size=8192\
-///              \n  FilterExec: a@0 < 5\
-///              \n    RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1\
-///              \n      CsvExec: file_groups={1 group: [[WORKING_DIR/tests/data/example.csv]]}, projection=[a], has_header=true",
-///               plan_string.trim());
-///
-///   let one_line = format!("{}", displayable_plan.one_line());
-///   assert_eq!("CoalesceBatchesExec: target_batch_size=8192", one_line.trim());
-/// }
-/// ```
-///
 pub fn displayable(plan: &dyn ExecutionPlan) -> DisplayableExecutionPlan<'_> {
     DisplayableExecutionPlan::new(plan)
 }
@@ -404,3 +365,6 @@ use datafusion_execution::TaskContext;
 pub use datafusion_physical_expr::{
     expressions, functions, hash_utils, ordering_equivalence_properties_helper, udf,
 };
+
+#[cfg(test)]
+pub mod test;
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 360b0d6179ed0..c6d51b7d9c5d7 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -521,8 +521,8 @@ impl RecordBatchStream for LimitStream {
 #[cfg(test)]
 mod tests {
 
+    use arrow_schema::Schema;
     use common::collect;
-    use tempfile::TempDir;
 
     use super::*;
     use crate::coalesce_partitions::CoalescePartitionsExec;
@@ -534,8 +534,7 @@ mod tests {
         let task_ctx = Arc::new(TaskContext::default());
 
         let num_partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(num_partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(num_partitions);
 
         // input should have 4 partitions
         assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
@@ -619,9 +618,9 @@ mod tests {
     #[tokio::test]
     async fn limit_no_column() -> Result<()> {
         let batches = vec![
-            test::make_batch_no_column(6),
-            test::make_batch_no_column(6),
-            test::make_batch_no_column(6),
+            make_batch_no_column(6),
+            make_batch_no_column(6),
+            make_batch_no_column(6),
         ];
         let input = test::exec::TestStream::new(batches);
 
@@ -650,9 +649,9 @@ mod tests {
     async fn skip_and_fetch(skip: usize, fetch: Option<usize>) -> Result<usize> {
         let task_ctx = Arc::new(TaskContext::default());
 
+        // 4 partitions @ 100 rows apiece
         let num_partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(num_partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(num_partitions);
 
         assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
 
@@ -668,7 +667,7 @@ mod tests {
     #[tokio::test]
     async fn skip_none_fetch_none() -> Result<()> {
         let row_count = skip_and_fetch(0, None).await?;
-        assert_eq!(row_count, 100);
+        assert_eq!(row_count, 400);
         Ok(())
     }
 
@@ -681,9 +680,9 @@ mod tests {
 
     #[tokio::test]
     async fn skip_3_fetch_none() -> Result<()> {
-        // there are total of 100 rows, we skipped 3 rows (offset = 3)
+        // there are total of 400 rows, we skipped 3 rows (offset = 3)
         let row_count = skip_and_fetch(3, None).await?;
-        assert_eq!(row_count, 97);
+        assert_eq!(row_count, 397);
         Ok(())
     }
 
@@ -696,23 +695,24 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn skip_100_fetch_none() -> Result<()> {
-        let row_count = skip_and_fetch(100, None).await?;
+    async fn skip_400_fetch_none() -> Result<()> {
+        let row_count = skip_and_fetch(400, None).await?;
         assert_eq!(row_count, 0);
         Ok(())
     }
 
     #[tokio::test]
-    async fn skip_100_fetch_1() -> Result<()> {
-        let row_count = skip_and_fetch(100, Some(1)).await?;
+    async fn skip_400_fetch_1() -> Result<()> {
+        // there are a total of 400 rows
+        let row_count = skip_and_fetch(400, Some(1)).await?;
         assert_eq!(row_count, 0);
         Ok(())
     }
 
     #[tokio::test]
-    async fn skip_101_fetch_none() -> Result<()> {
-        // there are total of 100 rows, we skipped 101 rows (offset = 3)
-        let row_count = skip_and_fetch(101, None).await?;
+    async fn skip_401_fetch_none() -> Result<()> {
+        // there are total of 400 rows, we skipped 401 rows (offset = 3)
+        let row_count = skip_and_fetch(401, None).await?;
         assert_eq!(row_count, 0);
         Ok(())
     }
@@ -731,7 +731,7 @@ mod tests {
     #[tokio::test]
     async fn test_row_number_statistics_for_local_limit() -> Result<()> {
         let row_count = row_number_statistics_for_local_limit(4, 10).await?;
-        assert_eq!(row_count, Some(40));
+        assert_eq!(row_count, Some(10));
 
         Ok(())
     }
@@ -741,8 +741,7 @@ mod tests {
         fetch: Option<usize>,
     ) -> Result<Option<usize>> {
         let num_partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(num_partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(num_partitions);
 
         assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
 
@@ -756,8 +755,7 @@ mod tests {
         num_partitions: usize,
         fetch: usize,
     ) -> Result<Option<usize>> {
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(num_partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(num_partitions);
 
         assert_eq!(csv.output_partitioning().partition_count(), num_partitions);
 
@@ -765,4 +763,12 @@ mod tests {
 
         Ok(offset.statistics().num_rows)
     }
+
+    /// Return a RecordBatch with a single array with row_count sz
+    fn make_batch_no_column(sz: usize) -> RecordBatch {
+        let schema = Arc::new(Schema::empty());
+
+        let options = RecordBatchOptions::new().with_row_count(Option::from(sz));
+        RecordBatch::try_new_with_options(schema, vec![], &options).unwrap()
+    }
 }
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
index 7d72a6a9fae17..dc345cd8cdcd6 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-plan/src/metrics/baseline.rs
@@ -29,7 +29,7 @@ use datafusion_common::Result;
 ///
 /// Example:
 /// ```
-/// use datafusion::physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet};
+/// use datafusion_physical_plan::metrics::{BaselineMetrics, ExecutionPlanMetricsSet};
 /// let metrics = ExecutionPlanMetricsSet::new();
 ///
 /// let partition = 2;
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 30e9764c64460..beecc13e0029b 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -29,7 +29,7 @@ use super::{
 /// case of constant strings
 ///
 /// ```rust
-///  use datafusion::physical_plan::metrics::*;
+///  use datafusion_physical_plan::metrics::*;
 ///
 ///  let metrics = ExecutionPlanMetricsSet::new();
 ///  let partition = 1;
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 652c0af5c2e44..b2e0086f69e9a 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -43,7 +43,7 @@ pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp};
 /// [`ExecutionPlanMetricsSet`].
 ///
 /// ```
-///  use datafusion::physical_plan::metrics::*;
+///  use datafusion_physical_plan::metrics::*;
 ///
 ///  let metrics = ExecutionPlanMetricsSet::new();
 ///  assert!(metrics.clone_inner().output_rows().is_none());
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 1de23d4b89267..f1ec0a68a6e76 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -509,109 +509,19 @@ impl RecordBatchStream for ProjectionStream {
 mod tests {
     use super::*;
     use crate::common::collect;
-    use crate::expressions::{self, col};
-    use crate::test::{self};
-    use crate::test_util;
+    use crate::expressions;
+    use crate::test;
     use arrow_schema::DataType;
     use datafusion_common::ScalarValue;
-    use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::binary;
-    use futures::future;
-    use tempfile::TempDir;
-
-    // Create a binary expression without coercion. Used here when we do not want to coerce the expressions
-    // to valid types. Usage can result in an execution (after plan) error.
-    fn binary_simple(
-        l: Arc<dyn PhysicalExpr>,
-        op: Operator,
-        r: Arc<dyn PhysicalExpr>,
-        input_schema: &Schema,
-    ) -> Arc<dyn PhysicalExpr> {
-        binary(l, op, r, input_schema).unwrap()
-    }
-
-    #[tokio::test]
-    async fn project_first_column() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
-
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-
-        // pick column c1 and name it column c1 in the output schema
-        let projection =
-            ProjectionExec::try_new(vec![(col("c1", &schema)?, "c1".to_string())], csv)?;
-
-        let col_field = projection.schema.field(0);
-        let col_metadata = col_field.metadata();
-        let data: &str = &col_metadata["testing"];
-        assert_eq!(data, "test");
-
-        let mut partition_count = 0;
-        let mut row_count = 0;
-        for partition in 0..projection.output_partitioning().partition_count() {
-            partition_count += 1;
-            let stream = projection.execute(partition, task_ctx.clone())?;
-
-            row_count += stream
-                .map(|batch| {
-                    let batch = batch.unwrap();
-                    assert_eq!(1, batch.num_columns());
-                    batch.num_rows()
-                })
-                .fold(0, |acc, x| future::ready(acc + x))
-                .await;
-        }
-        assert_eq!(partitions, partition_count);
-        assert_eq!(100, row_count);
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn project_input_not_partitioning() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
-
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-
-        // pick column c1 and name it column c1 in the output schema
-        let projection =
-            ProjectionExec::try_new(vec![(col("c1", &schema)?, "c1".to_string())], csv)?;
-        assert!(!projection.benefits_from_input_partitioning()[0]);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn project_input_partitioning() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
-
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-
-        let c1 = col("c2", &schema).unwrap();
-        let c2 = col("c9", &schema).unwrap();
-        let c1_plus_c2 = binary_simple(c1, Operator::Plus, c2, &schema);
-
-        let projection =
-            ProjectionExec::try_new(vec![(c1_plus_c2, "c2 + c9".to_string())], csv)?;
-
-        assert!(projection.benefits_from_input_partitioning()[0]);
-        Ok(())
-    }
 
     #[tokio::test]
     async fn project_no_column() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
 
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(1, tmp_dir.path())?;
-        let expected = collect(csv.execute(0, task_ctx.clone())?).await.unwrap();
+        let exec = test::scan_partitioned(1);
+        let expected = collect(exec.execute(0, task_ctx.clone())?).await.unwrap();
 
-        let projection = ProjectionExec::try_new(vec![], csv)?;
+        let projection = ProjectionExec::try_new(vec![], exec)?;
         let stream = projection.execute(0, task_ctx.clone())?;
         let output = collect(stream).await.unwrap();
         assert_eq!(output.len(), expected.len());
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 44a7739a343c5..c10bfc78b117a 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -894,10 +894,8 @@ impl RecordBatchStream for PerPartitionStream {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::test::create_vec_batches;
     use crate::{
         assert_batches_sorted_eq,
-        physical_plan::{collect, expressions::col, memory::MemoryExec},
         test::{
             assert_is_pending,
             exec::{
@@ -905,10 +903,12 @@ mod tests {
                 ErrorExec, MockExec,
             },
         },
+        {collect, expressions::col, memory::MemoryExec},
     };
     use arrow::array::{ArrayRef, StringArray};
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
+    use arrow_array::UInt32Array;
     use datafusion_common::cast::as_string_array;
     use datafusion_common::exec_err;
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
@@ -1398,4 +1398,23 @@ mod tests {
 
         Ok(())
     }
+
+    /// Create vector batches
+    fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
+        let batch = create_batch(schema);
+        let mut vec = Vec::with_capacity(n);
+        for _ in 0..n {
+            vec.push(batch.clone());
+        }
+        vec
+    }
+
+    /// Create batch
+    fn create_batch(schema: &Schema) -> RecordBatch {
+        RecordBatch::try_new(
+            Arc::new(schema.clone()),
+            vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
+        )
+        .unwrap()
+    }
 }
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 695272767696d..7b4d239eebd00 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -900,59 +900,31 @@ mod tests {
     use arrow::array::*;
     use arrow::compute::SortOptions;
     use arrow::datatypes::*;
-    use datafusion_common::cast::{as_primitive_array, as_string_array};
+    use datafusion_common::cast::as_primitive_array;
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::runtime_env::RuntimeConfig;
     use futures::FutureExt;
     use std::collections::HashMap;
-    use tempfile::TempDir;
 
     #[tokio::test]
     async fn test_in_mem_sort() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
         let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
+        let csv = test::scan_partitioned(partitions);
         let schema = csv.schema();
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![
-                // c1 string column
-                PhysicalSortExpr {
-                    expr: col("c1", &schema)?,
-                    options: SortOptions::default(),
-                },
-                // c2 uin32 column
-                PhysicalSortExpr {
-                    expr: col("c2", &schema)?,
-                    options: SortOptions::default(),
-                },
-                // c7 uin8 column
-                PhysicalSortExpr {
-                    expr: col("c7", &schema)?,
-                    options: SortOptions::default(),
-                },
-            ],
+            vec![PhysicalSortExpr {
+                expr: col("i", &schema)?,
+                options: SortOptions::default(),
+            }],
             Arc::new(CoalescePartitionsExec::new(csv)),
         ));
 
         let result = collect(sort_exec, task_ctx.clone()).await?;
 
         assert_eq!(result.len(), 1);
-
-        let columns = result[0].columns();
-
-        let c1 = as_string_array(&columns[0])?;
-        assert_eq!(c1.value(0), "a");
-        assert_eq!(c1.value(c1.len() - 1), "e");
-
-        let c2 = as_primitive_array::<UInt32Type>(&columns[1])?;
-        assert_eq!(c2.value(0), 1);
-        assert_eq!(c2.value(c2.len() - 1), 5,);
-
-        let c7 = as_primitive_array::<UInt8Type>(&columns[6])?;
-        assert_eq!(c7.value(0), 15);
-        assert_eq!(c7.value(c7.len() - 1), 254,);
+        assert_eq!(result[0].num_rows(), 400);
 
         assert_eq!(
             task_ctx.runtime_env().memory_pool.reserved(),
@@ -965,7 +937,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_sort_spill() -> Result<()> {
-        // trigger spill there will be 4 batches with 5.5KB for each
+        // trigger spill w/ 100 batches
         let session_config = SessionConfig::new();
         let sort_spill_reservation_bytes = session_config
             .options()
@@ -980,57 +952,35 @@ mod tests {
                 .with_runtime(runtime),
         );
 
-        let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
-        let schema = csv.schema();
+        let partitions = 100;
+        let input = test::scan_partitioned(partitions);
+        let schema = input.schema();
 
         let sort_exec = Arc::new(SortExec::new(
-            vec![
-                // c1 string column
-                PhysicalSortExpr {
-                    expr: col("c1", &schema)?,
-                    options: SortOptions::default(),
-                },
-                // c2 uin32 column
-                PhysicalSortExpr {
-                    expr: col("c2", &schema)?,
-                    options: SortOptions::default(),
-                },
-                // c7 uin8 column
-                PhysicalSortExpr {
-                    expr: col("c7", &schema)?,
-                    options: SortOptions::default(),
-                },
-            ],
-            Arc::new(CoalescePartitionsExec::new(csv)),
+            vec![PhysicalSortExpr {
+                expr: col("i", &schema)?,
+                options: SortOptions::default(),
+            }],
+            Arc::new(CoalescePartitionsExec::new(input)),
         ));
 
         let result = collect(sort_exec.clone(), task_ctx.clone()).await?;
 
-        assert_eq!(result.len(), 1);
+        assert_eq!(result.len(), 2);
 
         // Now, validate metrics
         let metrics = sort_exec.metrics().unwrap();
 
-        assert_eq!(metrics.output_rows().unwrap(), 100);
+        assert_eq!(metrics.output_rows().unwrap(), 10000);
         assert!(metrics.elapsed_compute().unwrap() > 0);
         assert!(metrics.spill_count().unwrap() > 0);
         assert!(metrics.spilled_bytes().unwrap() > 0);
 
         let columns = result[0].columns();
 
-        let c1 = as_string_array(&columns[0])?;
-        assert_eq!(c1.value(0), "a");
-        assert_eq!(c1.value(c1.len() - 1), "e");
-
-        let c2 = as_primitive_array::<UInt32Type>(&columns[1])?;
-        assert_eq!(c2.value(0), 1);
-        assert_eq!(c2.value(c2.len() - 1), 5,);
-
-        let c7 = as_primitive_array::<UInt8Type>(&columns[6])?;
-        assert_eq!(c7.value(0), 15);
-        assert_eq!(c7.value(c7.len() - 1), 254,);
+        let i = as_primitive_array::<Int32Type>(&columns[0])?;
+        assert_eq!(i.value(0), 0);
+        assert_eq!(i.value(i.len() - 1), 81);
 
         assert_eq!(
             task_ctx.runtime_env().memory_pool.reserved(),
@@ -1044,7 +994,7 @@ mod tests {
     #[tokio::test]
     async fn test_sort_fetch_memory_calculation() -> Result<()> {
         // This test mirrors down the size from the example above.
-        let avg_batch_size = 4000;
+        let avg_batch_size = 400;
         let partitions = 4;
 
         // A tuple of (fetch, expect_spillage)
@@ -1075,29 +1025,15 @@ mod tests {
                     .with_session_config(session_config),
             );
 
-            let tmp_dir = TempDir::new()?;
-            let csv = test::scan_partitioned_csv(partitions, tmp_dir.path())?;
+            let csv = test::scan_partitioned(partitions);
             let schema = csv.schema();
 
             let sort_exec = Arc::new(
                 SortExec::new(
-                    vec![
-                        // c1 string column
-                        PhysicalSortExpr {
-                            expr: col("c1", &schema)?,
-                            options: SortOptions::default(),
-                        },
-                        // c2 uin32 column
-                        PhysicalSortExpr {
-                            expr: col("c2", &schema)?,
-                            options: SortOptions::default(),
-                        },
-                        // c7 uin8 column
-                        PhysicalSortExpr {
-                            expr: col("c7", &schema)?,
-                            options: SortOptions::default(),
-                        },
-                    ],
+                    vec![PhysicalSortExpr {
+                        expr: col("i", &schema)?,
+                        options: SortOptions::default(),
+                    }],
                     Arc::new(CoalescePartitionsExec::new(csv)),
                 )
                 .with_fetch(fetch),
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 507d66c920fb5..b9e2c9662b14c 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -276,8 +276,8 @@ mod tests {
     use arrow::record_batch::RecordBatch;
     use datafusion_execution::config::SessionConfig;
     use futures::{FutureExt, StreamExt};
-    use tempfile::TempDir;
 
+    use crate::assert_batches_eq;
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::expressions::col;
     use crate::memory::MemoryExec;
@@ -285,8 +285,7 @@ mod tests {
     use crate::sorts::sort::SortExec;
     use crate::stream::RecordBatchReceiverStream;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
-    use crate::test::{self, assert_is_pending};
-    use crate::{assert_batches_eq, test_util};
+    use crate::test::{self, assert_is_pending, make_partition};
     use crate::{collect, common};
     use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray};
 
@@ -561,31 +560,16 @@ mod tests {
     async fn test_partition_sort() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
         let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path()).unwrap();
+        let csv = test::scan_partitioned(partitions);
         let schema = csv.schema();
 
-        let sort = vec![
-            PhysicalSortExpr {
-                expr: col("c1", &schema).unwrap(),
-                options: SortOptions {
-                    descending: true,
-                    nulls_first: true,
-                },
-            },
-            PhysicalSortExpr {
-                expr: col("c2", &schema).unwrap(),
-                options: Default::default(),
-            },
-            PhysicalSortExpr {
-                expr: col("c7", &schema).unwrap(),
-                options: SortOptions::default(),
-            },
-            PhysicalSortExpr {
-                expr: col("c12", &schema).unwrap(),
-                options: SortOptions::default(),
+        let sort = vec![PhysicalSortExpr {
+            expr: col("i", &schema).unwrap(),
+            options: SortOptions {
+                descending: true,
+                nulls_first: true,
             },
-        ];
+        }];
 
         let basic = basic_sort(csv.clone(), sort.clone(), task_ctx.clone()).await;
         let partition = partition_sort(csv, sort, task_ctx.clone()).await;
@@ -634,8 +618,7 @@ mod tests {
         context: Arc<TaskContext>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let partitions = 4;
-        let tmp_dir = TempDir::new()?;
-        let csv = test::scan_partitioned_csv(partitions, tmp_dir.path()).unwrap();
+        let csv = test::scan_partitioned(partitions);
 
         let sorted = basic_sort(csv, sort, context).await;
         let split: Vec<_> = sizes.iter().map(|x| split_batch(&sorted, *x)).collect();
@@ -648,29 +631,11 @@ mod tests {
     #[tokio::test]
     async fn test_partition_sort_streaming_input() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
-        let sort = vec![
-            // uint8
-            PhysicalSortExpr {
-                expr: col("c7", &schema).unwrap(),
-                options: Default::default(),
-            },
-            // int16
-            PhysicalSortExpr {
-                expr: col("c4", &schema).unwrap(),
-                options: Default::default(),
-            },
-            // utf-8
-            PhysicalSortExpr {
-                expr: col("c1", &schema).unwrap(),
-                options: SortOptions::default(),
-            },
-            // utf-8
-            PhysicalSortExpr {
-                expr: col("c13", &schema).unwrap(),
-                options: SortOptions::default(),
-            },
-        ];
+        let schema = make_partition(11).schema();
+        let sort = vec![PhysicalSortExpr {
+            expr: col("i", &schema).unwrap(),
+            options: Default::default(),
+        }];
 
         let input =
             sorted_partitioned_input(sort.clone(), &[10, 3, 11], task_ctx.clone())
@@ -678,8 +643,8 @@ mod tests {
         let basic = basic_sort(input.clone(), sort.clone(), task_ctx.clone()).await;
         let partition = sorted_merge(input, sort, task_ctx.clone()).await;
 
-        assert_eq!(basic.num_rows(), 300);
-        assert_eq!(partition.num_rows(), 300);
+        assert_eq!(basic.num_rows(), 1200);
+        assert_eq!(partition.num_rows(), 1200);
 
         let basic = arrow::util::pretty::pretty_format_batches(&[basic])
             .unwrap()
@@ -695,20 +660,11 @@ mod tests {
 
     #[tokio::test]
     async fn test_partition_sort_streaming_input_output() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
-
-        let sort = vec![
-            // float64
-            PhysicalSortExpr {
-                expr: col("c12", &schema).unwrap(),
-                options: Default::default(),
-            },
-            // utf-8
-            PhysicalSortExpr {
-                expr: col("c13", &schema).unwrap(),
-                options: Default::default(),
-            },
-        ];
+        let schema = make_partition(11).schema();
+        let sort = vec![PhysicalSortExpr {
+            expr: col("i", &schema).unwrap(),
+            options: Default::default(),
+        }];
 
         // Test streaming with default batch size
         let task_ctx = Arc::new(TaskContext::default());
@@ -725,10 +681,10 @@ mod tests {
         let merge = Arc::new(SortPreservingMergeExec::new(sort, input));
         let merged = collect(merge, task_ctx).await.unwrap();
 
-        assert_eq!(merged.len(), 14);
+        assert_eq!(merged.len(), 53);
 
-        assert_eq!(basic.num_rows(), 300);
-        assert_eq!(merged.iter().map(|x| x.num_rows()).sum::<usize>(), 300);
+        assert_eq!(basic.num_rows(), 1200);
+        assert_eq!(merged.iter().map(|x| x.num_rows()).sum::<usize>(), 1200);
 
         let basic = arrow::util::pretty::pretty_format_batches(&[basic])
             .unwrap()
@@ -826,9 +782,9 @@ mod tests {
     #[tokio::test]
     async fn test_async() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
-        let schema = test_util::aggr_test_schema();
+        let schema = make_partition(11).schema();
         let sort = vec![PhysicalSortExpr {
-            expr: col("c12", &schema).unwrap(),
+            expr: col("i", &schema).unwrap(),
             options: SortOptions::default(),
         }];
 
diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs
new file mode 100644
index 0000000000000..e0c612387470c
--- /dev/null
+++ b/datafusion/physical-plan/src/test.rs
@@ -0,0 +1,343 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utilities for testing datafusion-physical-plan
+
+use std::error::Error;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::{collections::HashMap, path::PathBuf};
+
+use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+use arrow_schema::{DataType, Field, Schema, SchemaRef};
+use futures::{Future, FutureExt};
+
+use crate::memory::MemoryExec;
+use crate::ExecutionPlan;
+
+pub mod exec;
+
+/// A macro to assert that one string is contained within another with
+/// a nice error message if they are not.
+///
+/// Usage: `assert_contains!(actual, expected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
+#[macro_export]
+macro_rules! assert_contains {
+    ($ACTUAL: expr, $EXPECTED: expr) => {
+        let actual_value: String = $ACTUAL.into();
+        let expected_value: String = $EXPECTED.into();
+        assert!(
+            actual_value.contains(&expected_value),
+            "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
+            expected_value,
+            actual_value
+        );
+    };
+}
+
+/// A macro to assert that one string is NOT contained within another with
+/// a nice error message if they are are.
+///
+/// Usage: `assert_not_contains!(actual, unexpected)`
+///
+/// Is a macro so test error
+/// messages are on the same line as the failure;
+///
+/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
+#[macro_export]
+macro_rules! assert_not_contains {
+    ($ACTUAL: expr, $UNEXPECTED: expr) => {
+        let actual_value: String = $ACTUAL.into();
+        let unexpected_value: String = $UNEXPECTED.into();
+        assert!(
+            !actual_value.contains(&unexpected_value),
+            "Found unexpected in actual.\n\nUnexpected:\n{}\n\nActual:\n{}",
+            unexpected_value,
+            actual_value
+        );
+    };
+}
+
+/// Compares formatted output of a record batch with an expected
+/// vector of strings, with the result of pretty formatting record
+/// batches. This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        let formatted = arrow::util::pretty::pretty_format_batches_with_options(
+            $CHUNKS,
+            &datafusion_common::format::DEFAULT_FORMAT_OPTIONS,
+        )
+        .unwrap()
+        .to_string();
+
+        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
+/// Compares formatted output of a record batch with an expected
+/// vector of strings in a way that order does not matter.
+/// This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_sorted_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let mut expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        // sort except for header + footer
+        let num_lines = expected_lines.len();
+        if num_lines > 3 {
+            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        let formatted = arrow::util::pretty::pretty_format_batches_with_options(
+            $CHUNKS,
+            &datafusion_common::format::DEFAULT_FORMAT_OPTIONS,
+        )
+        .unwrap()
+        .to_string();
+        // fix for windows: \r\n -->
+
+        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        // sort except for header + footer
+        let num_lines = actual_lines.len();
+        if num_lines > 3 {
+            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
+/// Returns the arrow test data directory, which is by default stored
+/// in a git submodule rooted at `testing/data`.
+///
+/// The default can be overridden by the optional environment
+/// variable `ARROW_TEST_DATA`
+///
+/// panics when the directory can not be found.
+///
+/// Example:
+/// ```
+/// let testdata = datafusion_common::test_util::arrow_test_data();
+/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
+/// assert!(std::path::PathBuf::from(csvdata).exists());
+/// ```
+pub fn arrow_test_data() -> String {
+    match get_data_dir("ARROW_TEST_DATA", "../../testing/data") {
+        Ok(pb) => pb.display().to_string(),
+        Err(err) => panic!("failed to get arrow data dir: {err}"),
+    }
+}
+
+/// Returns the parquet test data directory, which is by default
+/// stored in a git submodule rooted at
+/// `parquet-testing/data`.
+///
+/// The default can be overridden by the optional environment variable
+/// `PARQUET_TEST_DATA`
+///
+/// panics when the directory can not be found.
+///
+/// Example:
+/// ```
+/// let testdata = datafusion_common::test_util::parquet_test_data();
+/// let filename = format!("{}/binary.parquet", testdata);
+/// assert!(std::path::PathBuf::from(filename).exists());
+/// ```
+pub fn parquet_test_data() -> String {
+    match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") {
+        Ok(pb) => pb.display().to_string(),
+        Err(err) => panic!("failed to get parquet data dir: {err}"),
+    }
+}
+
+/// Returns a directory path for finding test data.
+///
+/// udf_env: name of an environment variable
+///
+/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
+///
+///  Returns either:
+/// The path referred to in `udf_env` if that variable is set and refers to a directory
+/// The submodule_data directory relative to CARGO_MANIFEST_PATH
+pub fn get_data_dir(
+    udf_env: &str,
+    submodule_data: &str,
+) -> Result<PathBuf, Box<dyn Error>> {
+    // Try user defined env.
+    if let Ok(dir) = std::env::var(udf_env) {
+        let trimmed = dir.trim().to_string();
+        if !trimmed.is_empty() {
+            let pb = PathBuf::from(trimmed);
+            if pb.is_dir() {
+                return Ok(pb);
+            } else {
+                return Err(format!(
+                    "the data dir `{}` defined by env {} not found",
+                    pb.display(),
+                    udf_env
+                )
+                .into());
+            }
+        }
+    }
+
+    // The env is undefined or its value is trimmed to empty, let's try default dir.
+
+    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
+    // set by `cargo run` or `cargo test`, see:
+    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
+    let dir = env!("CARGO_MANIFEST_DIR");
+
+    let pb = PathBuf::from(dir).join(submodule_data);
+    if pb.is_dir() {
+        Ok(pb)
+    } else {
+        Err(format!(
+            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
+             HINT: try running `git submodule update --init`",
+            udf_env,
+            pb.display(),
+        ).into())
+    }
+}
+
+/// Asserts that given future is pending.
+pub fn assert_is_pending<'a, T>(fut: &mut Pin<Box<dyn Future<Output = T> + Send + 'a>>) {
+    let waker = futures::task::noop_waker();
+    let mut cx = futures::task::Context::from_waker(&waker);
+    let poll = fut.poll_unpin(&mut cx);
+
+    assert!(poll.is_pending());
+}
+
+/// Get the schema for the aggregate_test_* csv files
+pub fn aggr_test_schema() -> SchemaRef {
+    let mut f1 = Field::new("c1", DataType::Utf8, false);
+    f1.set_metadata(HashMap::from_iter(vec![("testing".into(), "test".into())]));
+    let schema = Schema::new(vec![
+        f1,
+        Field::new("c2", DataType::UInt32, false),
+        Field::new("c3", DataType::Int8, false),
+        Field::new("c4", DataType::Int16, false),
+        Field::new("c5", DataType::Int32, false),
+        Field::new("c6", DataType::Int64, false),
+        Field::new("c7", DataType::UInt8, false),
+        Field::new("c8", DataType::UInt16, false),
+        Field::new("c9", DataType::UInt32, false),
+        Field::new("c10", DataType::UInt64, false),
+        Field::new("c11", DataType::Float32, false),
+        Field::new("c12", DataType::Float64, false),
+        Field::new("c13", DataType::Utf8, false),
+    ]);
+
+    Arc::new(schema)
+}
+
+/// returns record batch with 3 columns of i32 in memory
+pub fn build_table_i32(
+    a: (&str, &Vec<i32>),
+    b: (&str, &Vec<i32>),
+    c: (&str, &Vec<i32>),
+) -> RecordBatch {
+    let schema = Schema::new(vec![
+        Field::new(a.0, DataType::Int32, false),
+        Field::new(b.0, DataType::Int32, false),
+        Field::new(c.0, DataType::Int32, false),
+    ]);
+
+    RecordBatch::try_new(
+        Arc::new(schema),
+        vec![
+            Arc::new(Int32Array::from(a.1.clone())),
+            Arc::new(Int32Array::from(b.1.clone())),
+            Arc::new(Int32Array::from(c.1.clone())),
+        ],
+    )
+    .unwrap()
+}
+
+/// returns memory table scan wrapped around record batch with 3 columns of i32
+pub fn build_table_scan_i32(
+    a: (&str, &Vec<i32>),
+    b: (&str, &Vec<i32>),
+    c: (&str, &Vec<i32>),
+) -> Arc<dyn ExecutionPlan> {
+    let batch = build_table_i32(a, b, c);
+    let schema = batch.schema();
+    Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None).unwrap())
+}
+
+/// Return a RecordBatch with a single Int32 array with values (0..sz) in a field named "i"
+pub fn make_partition(sz: i32) -> RecordBatch {
+    let seq_start = 0;
+    let seq_end = sz;
+    let values = (seq_start..seq_end).collect::<Vec<_>>();
+    let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)]));
+    let arr = Arc::new(Int32Array::from(values));
+    let arr = arr as ArrayRef;
+
+    RecordBatch::try_new(schema, vec![arr]).unwrap()
+}
+
+/// Returns a `MemoryExec` that scans `partitions` of 100 batches each
+pub fn scan_partitioned(partitions: usize) -> Arc<dyn ExecutionPlan> {
+    Arc::new(mem_exec(partitions))
+}
+
+/// Returns a `MemoryExec` that scans `partitions` of 100 batches each
+pub fn mem_exec(partitions: usize) -> MemoryExec {
+    let data: Vec<Vec<_>> = (0..partitions).map(|_| vec![make_partition(100)]).collect();
+
+    let schema = data[0][0].schema();
+    let projection = None;
+    MemoryExec::try_new(&data, schema, projection).unwrap()
+}
diff --git a/datafusion/core/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
similarity index 98%
rename from datafusion/core/src/test/exec.rs
rename to datafusion/physical-plan/src/test/exec.rs
index 44ce5cf3282b1..a1f40c7ba909c 100644
--- a/datafusion/core/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -31,20 +31,15 @@ use arrow::{
 };
 use futures::Stream;
 
-use crate::physical_plan::{
-    common, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
-};
-use crate::physical_plan::{expressions::PhysicalSortExpr, DisplayAs};
 use crate::{
-    error::{DataFusionError, Result},
-    physical_plan::stream::RecordBatchReceiverStream,
-};
-use crate::{
-    execution::context::TaskContext, physical_plan::stream::RecordBatchStreamAdapter,
+    common, stream::RecordBatchReceiverStream, stream::RecordBatchStreamAdapter,
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
+use datafusion_physical_expr::PhysicalSortExpr;
 
-use datafusion_common::internal_err;
+use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_execution::TaskContext;
 
 /// Index into the data that has been returned so far
 #[derive(Debug, Default, Clone)]
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 8e0d871e0e34a..af765e257db2f 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -604,19 +604,17 @@ mod tests {
     use super::*;
     use crate::test;
 
-    use crate::{physical_plan::collect, scalar::ScalarValue};
+    use crate::collect;
     use arrow::record_batch::RecordBatch;
-    use tempfile::TempDir;
+    use datafusion_common::ScalarValue;
 
     #[tokio::test]
     async fn test_union_partitions() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
 
-        let tmp_dir = TempDir::new()?;
-
-        // Create csv's with different partitioning
-        let csv = test::scan_partitioned_csv(4, tmp_dir.path())?;
-        let csv2 = test::scan_partitioned_csv(5, tmp_dir.path())?;
+        // Create inputs with different partitioning
+        let csv = test::scan_partitioned(4);
+        let csv2 = test::scan_partitioned(5);
 
         let union_exec = Arc::new(UnionExec::new(vec![csv, csv2]));
 
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index d6ca35b5d9bef..2cf341d1fe600 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -195,14 +195,12 @@ impl ExecutionPlan for ValuesExec {
 #[cfg(test)]
 mod tests {
     use super::*;
-
-    use crate::test::create_vec_batches;
-    use crate::test_util;
+    use crate::test::{self, make_partition};
     use arrow_schema::{DataType, Field, Schema};
 
     #[tokio::test]
     async fn values_empty_case() -> Result<()> {
-        let schema = test_util::aggr_test_schema();
+        let schema = test::aggr_test_schema();
         let empty = ValuesExec::try_new(schema, vec![]);
         assert!(empty.is_err());
         Ok(())
@@ -210,33 +208,24 @@ mod tests {
 
     #[test]
     fn new_exec_with_batches() {
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "col0",
-            DataType::UInt32,
-            false,
-        )]));
-        let batches = create_vec_batches(&schema, 10);
+        let batch = make_partition(7);
+        let schema = batch.schema();
+        let batches = vec![batch.clone(), batch];
+
         let _exec = ValuesExec::try_new_from_batches(schema, batches).unwrap();
     }
 
     #[test]
     fn new_exec_with_batches_empty() {
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "col0",
-            DataType::UInt32,
-            false,
-        )]));
+        let batch = make_partition(7);
+        let schema = batch.schema();
         let _ = ValuesExec::try_new_from_batches(schema, Vec::new()).unwrap_err();
     }
 
     #[test]
     fn new_exec_with_batches_invalid_schema() {
-        let schema = Arc::new(Schema::new(vec![Field::new(
-            "col0",
-            DataType::UInt32,
-            false,
-        )]));
-        let batches = create_vec_batches(&schema, 10);
+        let batch = make_partition(7);
+        let batches = vec![batch.clone(), batch];
 
         let invalid_schema = Arc::new(Schema::new(vec![
             Field::new("col0", DataType::UInt32, false),
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 3f25275996389..2a2f8d6d211bf 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -359,30 +359,14 @@ pub(crate) fn window_ordering_equivalence(
 mod tests {
     use super::*;
     use crate::aggregates::AggregateFunction;
-    use crate::datasource::physical_plan::CsvExec;
+    use crate::collect;
     use crate::expressions::col;
+    use crate::test::assert_is_pending;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
-    use crate::test::{self, assert_is_pending, csv_exec_sorted};
-    use crate::{collect, ExecutionPlan};
-    use arrow::array::*;
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, SchemaRef};
-    use arrow::record_batch::RecordBatch;
-    use datafusion_common::cast::as_primitive_array;
     use datafusion_execution::TaskContext;
-    use datafusion_expr::{create_udaf, Accumulator, Volatility};
     use futures::FutureExt;
-    use std::path::Path;
-    use tempfile::TempDir;
-
-    fn create_test_schema(
-        partitions: usize,
-        work_dir: &Path,
-    ) -> Result<(Arc<CsvExec>, SchemaRef)> {
-        let csv = test::scan_partitioned_csv(partitions, work_dir)?;
-        let schema = csv.schema();
-        Ok((csv, schema))
-    }
 
     fn create_test_schema2() -> Result<SchemaRef> {
         let a = Field::new("a", DataType::Int32, true);
@@ -394,57 +378,6 @@ mod tests {
         Ok(schema)
     }
 
-    /// make PhysicalSortExpr with default options
-    fn sort_expr(name: &str, schema: &Schema) -> PhysicalSortExpr {
-        sort_expr_options(name, schema, SortOptions::default())
-    }
-
-    /// PhysicalSortExpr with specified options
-    fn sort_expr_options(
-        name: &str,
-        schema: &Schema,
-        options: SortOptions,
-    ) -> PhysicalSortExpr {
-        PhysicalSortExpr {
-            expr: col(name, schema).unwrap(),
-            options,
-        }
-    }
-
-    #[tokio::test]
-    async fn test_get_partition_by_ordering() -> Result<()> {
-        let test_schema = create_test_schema2()?;
-        // Columns a,c are nullable whereas b,d are not nullable.
-        // Source is sorted by a ASC NULLS FIRST, b ASC NULLS FIRST, c ASC NULLS FIRST, d ASC NULLS FIRST
-        // Column e is not ordered.
-        let sort_exprs = vec![
-            sort_expr("a", &test_schema),
-            sort_expr("b", &test_schema),
-            sort_expr("c", &test_schema),
-            sort_expr("d", &test_schema),
-        ];
-        // Input is ordered by a,b,c,d
-        let input = csv_exec_sorted(&test_schema, sort_exprs, true);
-        let test_data = vec![
-            (vec!["a", "b"], vec![0, 1]),
-            (vec!["b", "a"], vec![1, 0]),
-            (vec!["b", "a", "c"], vec![1, 0, 2]),
-            (vec!["d", "b", "a"], vec![2, 1]),
-            (vec!["d", "e", "a"], vec![2]),
-        ];
-        for (pb_names, expected) in test_data {
-            let pb_exprs = pb_names
-                .iter()
-                .map(|name| col(name, &test_schema))
-                .collect::<Result<Vec<_>>>()?;
-            assert_eq!(
-                get_ordered_partition_by_indices(&pb_exprs, &input),
-                expected
-            );
-        }
-        Ok(())
-    }
-
     #[tokio::test]
     async fn test_calc_requirements() -> Result<()> {
         let schema = create_test_schema2()?;
@@ -509,143 +442,6 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
-    async fn window_function_with_udaf() -> Result<()> {
-        #[derive(Debug)]
-        struct MyCount(i64);
-
-        impl Accumulator for MyCount {
-            fn state(&self) -> Result<Vec<ScalarValue>> {
-                Ok(vec![ScalarValue::Int64(Some(self.0))])
-            }
-
-            fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-                let array = &values[0];
-                self.0 += (array.len() - array.null_count()) as i64;
-                Ok(())
-            }
-
-            fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
-                let counts: &Int64Array = arrow::array::as_primitive_array(&states[0]);
-                if let Some(c) = &arrow::compute::sum(counts) {
-                    self.0 += *c;
-                }
-                Ok(())
-            }
-
-            fn evaluate(&self) -> Result<ScalarValue> {
-                Ok(ScalarValue::Int64(Some(self.0)))
-            }
-
-            fn size(&self) -> usize {
-                std::mem::size_of_val(self)
-            }
-        }
-
-        let my_count = create_udaf(
-            "my_count",
-            vec![DataType::Int64],
-            Arc::new(DataType::Int64),
-            Volatility::Immutable,
-            Arc::new(|_| Ok(Box::new(MyCount(0)))),
-            Arc::new(vec![DataType::Int64]),
-        );
-
-        let task_ctx = Arc::new(TaskContext::default());
-        let tmp_dir = TempDir::new()?;
-        let (input, schema) = create_test_schema(1, tmp_dir.path())?;
-
-        let window_exec = Arc::new(WindowAggExec::try_new(
-            vec![create_window_expr(
-                &WindowFunction::AggregateUDF(Arc::new(my_count)),
-                "my_count".to_owned(),
-                &[col("c3", &schema)?],
-                &[],
-                &[],
-                Arc::new(WindowFrame::new(false)),
-                schema.as_ref(),
-            )?],
-            input,
-            schema.clone(),
-            vec![],
-        )?);
-
-        let result: Vec<RecordBatch> = collect(window_exec, task_ctx).await?;
-        assert_eq!(result.len(), 1);
-
-        let n_schema_fields = schema.fields().len();
-        let columns = result[0].columns();
-
-        let count: &Int64Array = as_primitive_array(&columns[n_schema_fields])?;
-        assert_eq!(count.value(0), 100);
-        assert_eq!(count.value(99), 100);
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn window_function() -> Result<()> {
-        let task_ctx = Arc::new(TaskContext::default());
-        let tmp_dir = TempDir::new()?;
-        let (input, schema) = create_test_schema(1, tmp_dir.path())?;
-
-        let window_exec = Arc::new(WindowAggExec::try_new(
-            vec![
-                create_window_expr(
-                    &WindowFunction::AggregateFunction(AggregateFunction::Count),
-                    "count".to_owned(),
-                    &[col("c3", &schema)?],
-                    &[],
-                    &[],
-                    Arc::new(WindowFrame::new(false)),
-                    schema.as_ref(),
-                )?,
-                create_window_expr(
-                    &WindowFunction::AggregateFunction(AggregateFunction::Max),
-                    "max".to_owned(),
-                    &[col("c3", &schema)?],
-                    &[],
-                    &[],
-                    Arc::new(WindowFrame::new(false)),
-                    schema.as_ref(),
-                )?,
-                create_window_expr(
-                    &WindowFunction::AggregateFunction(AggregateFunction::Min),
-                    "min".to_owned(),
-                    &[col("c3", &schema)?],
-                    &[],
-                    &[],
-                    Arc::new(WindowFrame::new(false)),
-                    schema.as_ref(),
-                )?,
-            ],
-            input,
-            schema.clone(),
-            vec![],
-        )?);
-
-        let result: Vec<RecordBatch> = collect(window_exec, task_ctx).await?;
-        assert_eq!(result.len(), 1);
-
-        let n_schema_fields = schema.fields().len();
-        let columns = result[0].columns();
-
-        // c3 is small int
-
-        let count: &Int64Array = as_primitive_array(&columns[n_schema_fields])?;
-        assert_eq!(count.value(0), 100);
-        assert_eq!(count.value(99), 100);
-
-        let max: &Int8Array = as_primitive_array(&columns[n_schema_fields + 1])?;
-        assert_eq!(max.value(0), 125);
-        assert_eq!(max.value(99), 125);
-
-        let min: &Int8Array = as_primitive_array(&columns[n_schema_fields + 2])?;
-        assert_eq!(min.value(0), -117);
-        assert_eq!(min.value(99), -117);
-
-        Ok(())
-    }
-
     #[tokio::test]
     async fn test_drop_cancel() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());

From b762f3db8c18d25e891fe4276ed90abb4b963e62 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 13 Sep 2023 16:12:17 -0400
Subject: [PATCH 04/15] Update cargo.lock

---
 datafusion-cli/Cargo.lock | 265 ++++++++++++++++++++++++++++++--------
 1 file changed, 214 insertions(+), 51 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 0c6c6846a89a8..5c92e933f5a00 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -75,6 +75,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "anstyle"
 version = "1.0.3"
@@ -87,6 +96,12 @@ version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
 
+[[package]]
+name = "arrayvec"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
+
 [[package]]
 name = "arrayvec"
 version = "0.7.4"
@@ -172,7 +187,7 @@ dependencies = [
  "chrono",
  "comfy-table",
  "half",
- "lexical-core",
+ "lexical-core 0.8.5",
  "num",
 ]
 
@@ -190,8 +205,8 @@ dependencies = [
  "chrono",
  "csv",
  "csv-core",
- "lazy_static",
- "lexical-core",
+ "lazy_static 1.4.0",
+ "lexical-core 0.8.5",
  "regex",
 ]
 
@@ -235,9 +250,9 @@ dependencies = [
  "chrono",
  "half",
  "indexmap 2.0.0",
- "lexical-core",
+ "lexical-core 0.8.5",
  "num",
- "serde",
+ "serde 1.0.188",
  "serde_json",
 ]
 
@@ -323,9 +338,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.2"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d495b6dc0184693324491a5ac05f559acc97bf937ab31d7a1c33dd0016be6d2b"
+checksum = "bb42b2197bf15ccb092b62c74515dbd8b86d0effd934795f6687c93b6e679a2c"
 dependencies = [
  "bzip2",
  "flate2",
@@ -347,7 +362,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -438,7 +453,7 @@ dependencies = [
  "bytes",
  "http",
  "http-body",
- "lazy_static",
+ "lazy_static 1.4.0",
  "percent-encoding",
  "pin-project-lite",
  "tracing",
@@ -556,7 +571,7 @@ dependencies = [
  "http-body",
  "hyper",
  "hyper-rustls 0.23.2",
- "lazy_static",
+ "lazy_static 1.4.0",
  "pin-project-lite",
  "rustls 0.20.9",
  "tokio",
@@ -716,7 +731,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5"
 dependencies = [
  "arrayref",
- "arrayvec",
+ "arrayvec 0.7.4",
  "cc",
  "cfg-if",
  "constant_time_eq",
@@ -761,7 +776,7 @@ checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
 dependencies = [
  "memchr",
  "regex-automata",
- "serde",
+ "serde 1.0.188",
 ]
 
 [[package]]
@@ -837,8 +852,8 @@ checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
- "num-traits",
- "serde",
+ "num-traits 0.2.16",
+ "serde 1.0.188",
  "windows-targets",
 ]
 
@@ -925,6 +940,22 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "config"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b1b9d958c2b1368a663f05538fc1b5975adce1e19f435acceae987aceeeb369"
+dependencies = [
+ "lazy_static 1.4.0",
+ "nom",
+ "rust-ini",
+ "serde 1.0.188",
+ "serde-hjson",
+ "serde_json",
+ "toml",
+ "yaml-rust",
+]
+
 [[package]]
 name = "const-random"
 version = "0.1.15"
@@ -1012,7 +1043,7 @@ dependencies = [
  "csv-core",
  "itoa",
  "ryu",
- "serde",
+ "serde 1.0.188",
 ]
 
 [[package]]
@@ -1031,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1f34ba9a9bcb8645379e9de8cb3ecfcf4d1c85ba66d90deb3259206fa5aa193b"
 dependencies = [
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -1241,6 +1272,7 @@ dependencies = [
  "log",
  "parking_lot",
  "pin-project-lite",
+ "ptree",
  "rand",
  "rstest 0.18.2",
  "tempfile",
@@ -1282,6 +1314,15 @@ dependencies = [
  "subtle",
 ]
 
+[[package]]
+name = "directories"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f51c5d4ddabd36886dd3e1438cb358cdcb0d7c499cb99cb4ac2e38e18b5cb210"
+dependencies = [
+ "dirs-sys",
+]
+
 [[package]]
 name = "dirs"
 version = "4.0.0"
@@ -1458,7 +1499,7 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4"
 dependencies = [
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -1532,7 +1573,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -1631,7 +1672,7 @@ checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
 dependencies = [
  "cfg-if",
  "crunchy",
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -1895,12 +1936,31 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "lazy_static"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
+
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
+[[package]]
+name = "lexical-core"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
+dependencies = [
+ "arrayvec 0.5.2",
+ "bitflags 1.3.2",
+ "cfg-if",
+ "ryu",
+ "static_assertions",
+]
+
 [[package]]
 name = "lexical-core"
 version = "0.8.5"
@@ -1967,9 +2027,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.147"
+version = "0.2.148"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
 
 [[package]]
 name = "libm"
@@ -1987,6 +2047,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "linked-hash-map"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.7"
@@ -2110,6 +2176,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "nom"
+version = "5.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b"
+dependencies = [
+ "lexical-core 0.7.6",
+ "memchr",
+ "version_check",
+]
+
 [[package]]
 name = "normalize-line-endings"
 version = "0.3.0"
@@ -2127,7 +2204,7 @@ dependencies = [
  "num-integer",
  "num-iter",
  "num-rational",
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2138,7 +2215,7 @@ checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
 dependencies = [
  "autocfg",
  "num-integer",
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2147,7 +2224,7 @@ version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214"
 dependencies = [
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2157,7 +2234,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
 dependencies = [
  "autocfg",
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2168,7 +2245,7 @@ checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252"
 dependencies = [
  "autocfg",
  "num-integer",
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2180,7 +2257,16 @@ dependencies = [
  "autocfg",
  "num-bigint",
  "num-integer",
- "num-traits",
+ "num-traits 0.2.16",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
+dependencies = [
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2233,7 +2319,7 @@ dependencies = [
  "reqwest",
  "ring",
  "rustls-pemfile",
- "serde",
+ "serde 1.0.188",
  "serde_json",
  "snafu",
  "tokio",
@@ -2260,7 +2346,7 @@ version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87"
 dependencies = [
- "num-traits",
+ "num-traits 0.2.16",
 ]
 
 [[package]]
@@ -2418,7 +2504,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -2508,13 +2594,29 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.66"
+version = "1.0.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
 dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "ptree"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0de80796b316aec75344095a6d2ef68ec9b8f573b9e7adc821149ba3598e270"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "config",
+ "directories",
+ "petgraph",
+ "serde 1.0.188",
+ "serde-value",
+ "tint",
+]
+
 [[package]]
 name = "quick-xml"
 version = "0.28.2"
@@ -2522,7 +2624,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce5e73202a820a31f8a0ee32ada5e21029c81fd9e3ebf668a40832e4219d9d1"
 dependencies = [
  "memchr",
- "serde",
+ "serde 1.0.188",
 ]
 
 [[package]]
@@ -2663,7 +2765,7 @@ dependencies = [
  "pin-project-lite",
  "rustls 0.21.7",
  "rustls-pemfile",
- "serde",
+ "serde 1.0.188",
  "serde_json",
  "serde_urlencoded",
  "tokio",
@@ -2745,10 +2847,16 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.32",
+ "syn 2.0.33",
  "unicode-ident",
 ]
 
+[[package]]
+name = "rust-ini"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e52c148ef37f8c375d49d5a73aa70713125b7f19095948a923f80afdeb22ec2"
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.23"
@@ -2824,9 +2932,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.101.4"
+version = "0.101.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d93931baf2d282fff8d3a532bbfd7653f734643161b87e3e01e59a04439bf0d"
+checksum = "45a27e3b59326c16e23d30aeb7a36a24cc0d29e71d68ff611cdfb4a01d013bed"
 dependencies = [
  "ring",
  "untrusted",
@@ -2936,6 +3044,12 @@ version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
+[[package]]
+name = "serde"
+version = "0.8.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dad3f759919b92c3068c696c15c3d17238234498bbdcc80f2c469606f948ac8"
+
 [[package]]
 name = "serde"
 version = "1.0.188"
@@ -2945,6 +3059,28 @@ dependencies = [
  "serde_derive",
 ]
 
+[[package]]
+name = "serde-hjson"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a3a4e0ea8a88553209f6cc6cfe8724ecad22e1acf372793c27d995290fe74f8"
+dependencies = [
+ "lazy_static 1.4.0",
+ "num-traits 0.1.43",
+ "regex",
+ "serde 0.8.23",
+]
+
+[[package]]
+name = "serde-value"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
+dependencies = [
+ "ordered-float",
+ "serde 1.0.188",
+]
+
 [[package]]
 name = "serde_derive"
 version = "1.0.188"
@@ -2953,7 +3089,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -2964,7 +3100,7 @@ checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2"
 dependencies = [
  "itoa",
  "ryu",
- "serde",
+ "serde 1.0.188",
 ]
 
 [[package]]
@@ -2976,7 +3112,7 @@ dependencies = [
  "form_urlencoded",
  "itoa",
  "ryu",
- "serde",
+ "serde 1.0.188",
 ]
 
 [[package]]
@@ -3142,7 +3278,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -3164,9 +3300,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.32"
+version = "2.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
+checksum = "9caece70c63bfba29ec2fed841a09851b14a235c60010fa4de58089b6c025668"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3224,7 +3360,7 @@ checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -3245,7 +3381,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48"
 dependencies = [
  "deranged",
- "serde",
+ "serde 1.0.188",
  "time-core",
  "time-macros",
 ]
@@ -3265,6 +3401,15 @@ dependencies = [
  "time-core",
 ]
 
+[[package]]
+name = "tint"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7af24570664a3074673dbbf69a65bdae0ae0b72f2949b1adfbacb736ee4d6896"
+dependencies = [
+ "lazy_static 0.2.11",
+]
+
 [[package]]
 name = "tiny-keccak"
 version = "2.0.2"
@@ -3315,7 +3460,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -3364,6 +3509,15 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "toml"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
+dependencies = [
+ "serde 1.0.188",
+]
+
 [[package]]
 name = "tower"
 version = "0.4.13"
@@ -3413,7 +3567,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
 ]
 
 [[package]]
@@ -3455,9 +3609,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.11"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
 [[package]]
 name = "unicode-normalization"
@@ -3585,7 +3739,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
  "wasm-bindgen-shared",
 ]
 
@@ -3619,7 +3773,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.32",
+ "syn 2.0.33",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3800,6 +3954,15 @@ dependencies = [
  "lzma-sys",
 ]
 
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
+
 [[package]]
 name = "zeroize"
 version = "1.6.0"

From a3f01e1b6ec2411f55a1c8f5fc07e285bcff80dd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 16:09:32 -0400
Subject: [PATCH 05/15] fix merge issue

---
 datafusion/core/Cargo.toml                      | 1 -
 datafusion/physical-plan/Cargo.toml             | 2 +-
 datafusion/physical-plan/src/aggregates/mod.rs  | 2 +-
 datafusion/physical-plan/src/repartition/mod.rs | 6 +-----
 4 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index b80a7fda5bdad..d68032e9e45cc 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -111,7 +111,6 @@ rand_distr = "0.4.3"
 regex = "1.5.4"
 rstest = "0.18.0"
 rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
-termtree = "0.4.1"
 test-utils = { path = "../../test-utils" }
 thiserror = "1.0.37"
 tokio-postgres = "0.7.7"
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 98fd53b66150e..0e25020d826ff 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -54,7 +54,7 @@ parking_lot = "0.12"
 pin-project-lite = "^0.2.7"
 
 #[dev-dependencies]
-ptree = "0.4.0"
+termtree = "0.4.1"
 rand = "0.8"
 rstest = "0.18.0"
 tempfile = "3"
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 9299d017513eb..53d735882702a 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -262,7 +262,7 @@ pub(crate) struct AggregationOrdering {
 #[derive(Debug)]
 pub struct AggregateExec {
     /// Aggregation mode (full, partial)
-    pub mode: AggregateMode,
+    mode: AggregateMode,
     /// Group by expressions
     pub group_by: PhysicalGroupBy,
     /// Aggregate expressions
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index c10bfc78b117a..fd69b347c80bf 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -1402,11 +1402,7 @@ mod tests {
     /// Create vector batches
     fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
         let batch = create_batch(schema);
-        let mut vec = Vec::with_capacity(n);
-        for _ in 0..n {
-            vec.push(batch.clone());
-        }
-        vec
+        (0..n).map(|_| batch.clone()).collect()
     }
 
     /// Create batch

From 4fc9f3388a02b19b80211205ff2397cdde464d4d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 16:24:56 -0400
Subject: [PATCH 06/15] Make mode private

---
 .../combine_partial_final_agg.rs              | 105 ++++++++----------
 .../enforce_distribution.rs                   |  21 ++--
 .../physical_optimizer/topk_aggregation.rs    |   2 +-
 .../physical-plan/src/aggregates/mod.rs       |   6 +
 4 files changed, 64 insertions(+), 70 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index de47f3fbee730..1f8d708a47707 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -50,74 +50,59 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate {
         _config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         plan.transform_down(&|plan| {
-            let transformed = plan.as_any().downcast_ref::<AggregateExec>().and_then(
-                |AggregateExec {
-                     mode: final_mode,
-                     input: final_input,
-                     group_by: final_group_by,
-                     aggr_expr: final_aggr_expr,
-                     filter_expr: final_filter_expr,
-                     ..
-                 }| {
-                    if matches!(
-                        final_mode,
-                        AggregateMode::Final | AggregateMode::FinalPartitioned
-                    ) {
-                        final_input
-                            .as_any()
-                            .downcast_ref::<AggregateExec>()
-                            .and_then(
-                                |AggregateExec {
-                                     mode: input_mode,
-                                     input: partial_input,
-                                     group_by: input_group_by,
-                                     aggr_expr: input_aggr_expr,
-                                     filter_expr: input_filter_expr,
-                                     order_by_expr: input_order_by_expr,
-                                     input_schema,
-                                     ..
-                                 }| {
-                                    if matches!(input_mode, AggregateMode::Partial)
-                                        && can_combine(
-                                            (
-                                                final_group_by,
-                                                final_aggr_expr,
-                                                final_filter_expr,
-                                            ),
-                                            (
-                                                input_group_by,
-                                                input_aggr_expr,
-                                                input_filter_expr,
-                                            ),
-                                        )
-                                    {
-                                        let mode = if *final_mode == AggregateMode::Final
-                                        {
-                                            AggregateMode::Single
-                                        } else {
-                                            AggregateMode::SinglePartitioned
-                                        };
+            let transformed =
+                plan.as_any()
+                    .downcast_ref::<AggregateExec>()
+                    .and_then(|agg_exec| {
+                        if matches!(
+                            agg_exec.mode(),
+                            AggregateMode::Final | AggregateMode::FinalPartitioned
+                        ) {
+                            agg_exec
+                                .input()
+                                .as_any()
+                                .downcast_ref::<AggregateExec>()
+                                .and_then(|input_agg_exec| {
+                                    if matches!(
+                                        input_agg_exec.mode(),
+                                        AggregateMode::Partial
+                                    ) && can_combine(
+                                        (
+                                            agg_exec.group_by(),
+                                            agg_exec.aggr_expr(),
+                                            agg_exec.filter_expr(),
+                                        ),
+                                        (
+                                            input_agg_exec.group_by(),
+                                            input_agg_exec.aggr_expr(),
+                                            input_agg_exec.filter_expr(),
+                                        ),
+                                    ) {
+                                        let mode =
+                                            if agg_exec.mode() == &AggregateMode::Final {
+                                                AggregateMode::Single
+                                            } else {
+                                                AggregateMode::SinglePartitioned
+                                            };
                                         AggregateExec::try_new(
                                             mode,
-                                            input_group_by.clone(),
-                                            input_aggr_expr.to_vec(),
-                                            input_filter_expr.to_vec(),
-                                            input_order_by_expr.to_vec(),
-                                            partial_input.clone(),
-                                            input_schema.clone(),
+                                            input_agg_exec.group_by().clone(),
+                                            input_agg_exec.aggr_expr().to_vec(),
+                                            input_agg_exec.filter_expr.to_vec(),
+                                            input_agg_exec.order_by_expr.to_vec(),
+                                            input_agg_exec.input().clone(),
+                                            input_agg_exec.input_schema().clone(),
                                         )
                                         .ok()
                                         .map(Arc::new)
                                     } else {
                                         None
                                     }
-                                },
-                            )
-                    } else {
-                        None
-                    }
-                },
-            );
+                                })
+                        } else {
+                            None
+                        }
+                    });
 
             Ok(if let Some(transformed) = transformed {
                 Transformed::Yes(transformed)
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 6752d1a10f566..77d6e7d7123de 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -509,7 +509,9 @@ fn reorder_aggregate_keys(
         match new_positions {
             None => Ok(PlanWithKeyRequirements::new(agg_plan)),
             Some(positions) => {
-                let new_partial_agg = if let Some(AggregateExec {
+                let new_partial_agg = if let Some(agg_exec) =
+                    agg_exec.input().as_any().downcast_ref::<AggregateExec>()
+                /*AggregateExec {
                     mode,
                     group_by,
                     aggr_expr,
@@ -519,12 +521,13 @@ fn reorder_aggregate_keys(
                     input_schema,
                     ..
                 }) =
-                    agg_exec.input().as_any().downcast_ref::<AggregateExec>()
+                */
                 {
-                    if matches!(mode, AggregateMode::Partial) {
+                    if matches!(agg_exec.mode(), &AggregateMode::Partial) {
                         let mut new_group_exprs = vec![];
                         for idx in positions.iter() {
-                            new_group_exprs.push(group_by.expr()[*idx].clone());
+                            new_group_exprs
+                                .push(agg_exec.group_by().expr()[*idx].clone());
                         }
                         let new_partial_group_by =
                             PhysicalGroupBy::new_single(new_group_exprs);
@@ -532,11 +535,11 @@ fn reorder_aggregate_keys(
                         Some(Arc::new(AggregateExec::try_new(
                             AggregateMode::Partial,
                             new_partial_group_by,
-                            aggr_expr.clone(),
-                            filter_expr.clone(),
-                            order_by_expr.clone(),
-                            input.clone(),
-                            input_schema.clone(),
+                            agg_exec.aggr_expr().to_vec(),
+                            agg_exec.filter_expr().to_vec(),
+                            agg_exec.order_by_expr().to_vec(),
+                            agg_exec.input().clone(),
+                            agg_exec.input_schema.clone(),
                         )?))
                     } else {
                         None
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index f862675bf2051..2563a6123963c 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -68,7 +68,7 @@ impl TopKAggregation {
 
         // We found what we want: clone, copy the limit down, and return modified node
         let mut new_aggr = AggregateExec::try_new(
-            aggr.mode,
+            aggr.mode().clone(),
             aggr.group_by.clone(),
             aggr.aggr_expr.clone(),
             aggr.filter_expr.clone(),
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 53d735882702a..2ba517aab64c2 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -713,6 +713,12 @@ impl AggregateExec {
         &self.mode
     }
 
+    /// Set the mode of this AggregateExec to `mode`
+    pub fn with_mode(mut self, mode: AggregateMode) -> Self {
+        self.mode = mode;
+        self
+    }
+
     /// Grouping expressions
     pub fn group_expr(&self) -> &PhysicalGroupBy {
         &self.group_by

From 202e3b1e250674460fda8902f08c5db46a4f3bb3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 16:31:11 -0400
Subject: [PATCH 07/15] Make the other fields private

---
 .../combine_partial_final_agg.rs              |  4 ++--
 .../physical_optimizer/topk_aggregation.rs    | 20 +++++++++----------
 .../physical-plan/src/aggregates/mod.rs       | 17 ++++++++--------
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
index 1f8d708a47707..40b2bcc3e140e 100644
--- a/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
+++ b/datafusion/core/src/physical_optimizer/combine_partial_final_agg.rs
@@ -88,8 +88,8 @@ impl PhysicalOptimizerRule for CombinePartialFinalAggregate {
                                             mode,
                                             input_agg_exec.group_by().clone(),
                                             input_agg_exec.aggr_expr().to_vec(),
-                                            input_agg_exec.filter_expr.to_vec(),
-                                            input_agg_exec.order_by_expr.to_vec(),
+                                            input_agg_exec.filter_expr().to_vec(),
+                                            input_agg_exec.order_by_expr().to_vec(),
                                             input_agg_exec.input().clone(),
                                             input_agg_exec.input_schema().clone(),
                                         )
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index 2563a6123963c..7360e7d5c3a1a 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -56,7 +56,7 @@ impl TopKAggregation {
         if !kt.is_primitive() && kt != DataType::Utf8 {
             return None;
         }
-        if aggr.filter_expr.iter().any(|e| e.is_some()) {
+        if aggr.filter_expr().iter().any(|e| e.is_some()) {
             return None;
         }
 
@@ -67,17 +67,17 @@ impl TopKAggregation {
         }
 
         // We found what we want: clone, copy the limit down, and return modified node
-        let mut new_aggr = AggregateExec::try_new(
+        let new_aggr = AggregateExec::try_new(
             aggr.mode().clone(),
-            aggr.group_by.clone(),
-            aggr.aggr_expr.clone(),
-            aggr.filter_expr.clone(),
-            aggr.order_by_expr.clone(),
-            aggr.input.clone(),
-            aggr.input_schema.clone(),
+            aggr.group_by().clone(),
+            aggr.aggr_expr().to_vec(),
+            aggr.filter_expr().to_vec(),
+            aggr.order_by_expr().to_vec(),
+            aggr.input().clone(),
+            aggr.input_schema().clone(),
         )
-        .expect("Unable to copy Aggregate!");
-        new_aggr.limit = Some(limit);
+        .expect("Unable to copy Aggregate!")
+        .with_limit(Some(limit));
         Some(Arc::new(new_aggr))
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 2ba517aab64c2..43ba99c68d151 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -264,15 +264,15 @@ pub struct AggregateExec {
     /// Aggregation mode (full, partial)
     mode: AggregateMode,
     /// Group by expressions
-    pub group_by: PhysicalGroupBy,
+    group_by: PhysicalGroupBy,
     /// Aggregate expressions
-    pub aggr_expr: Vec<Arc<dyn AggregateExpr>>,
+    aggr_expr: Vec<Arc<dyn AggregateExpr>>,
     /// FILTER (WHERE clause) expression for each aggregate expression
-    pub filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
+    filter_expr: Vec<Option<Arc<dyn PhysicalExpr>>>,
     /// (ORDER BY clause) expression for each aggregate expression
-    pub order_by_expr: Vec<Option<LexOrdering>>,
+    order_by_expr: Vec<Option<LexOrdering>>,
     /// Set if the output of this aggregation is truncated by a upstream sort/limit clause
-    pub limit: Option<usize>,
+    limit: Option<usize>,
     /// Input plan, could be a partial aggregate or the input to the aggregate
     pub input: Arc<dyn ExecutionPlan>,
     /// Schema after the aggregate is applied
@@ -713,12 +713,11 @@ impl AggregateExec {
         &self.mode
     }
 
-    /// Set the mode of this AggregateExec to `mode`
-    pub fn with_mode(mut self, mode: AggregateMode) -> Self {
-        self.mode = mode;
+    /// Set the `limit` of this AggExec
+    pub fn with_limit(mut self, limit: Option<usize>) -> Self {
+        self.limit = limit;
         self
     }
-
     /// Grouping expressions
     pub fn group_expr(&self) -> &PhysicalGroupBy {
         &self.group_by

From ab180ad7a77b83fd170bb0977ff3c2fcff027685 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 16:40:24 -0400
Subject: [PATCH 08/15] cleanup

---
 .../physical-plan/src/repartition/mod.rs      | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index fd69b347c80bf..579b1b3d57b32 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -920,7 +920,7 @@ mod tests {
     async fn one_to_many_round_robin() -> Result<()> {
         // define input partitions
         let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
+        let partition = create_vec_batches(50);
         let partitions = vec![partition];
 
         // repartition from 1 input to 4 output
@@ -940,7 +940,7 @@ mod tests {
     async fn many_to_one_round_robin() -> Result<()> {
         // define input partitions
         let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
+        let partition = create_vec_batches(50);
         let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
 
         // repartition from 3 input to 1 output
@@ -957,7 +957,7 @@ mod tests {
     async fn many_to_many_round_robin() -> Result<()> {
         // define input partitions
         let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
+        let partition = create_vec_batches(50);
         let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
 
         // repartition from 3 input to 5 output
@@ -978,7 +978,7 @@ mod tests {
     async fn many_to_many_hash_partition() -> Result<()> {
         // define input partitions
         let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
+        let partition = create_vec_batches(50);
         let partitions = vec![partition.clone(), partition.clone(), partition.clone()];
 
         let output_partitions = repartition(
@@ -1033,7 +1033,7 @@ mod tests {
             tokio::spawn(async move {
                 // define input partitions
                 let schema = test_schema();
-                let partition = create_vec_batches(&schema, 50);
+                let partition = create_vec_batches(50);
                 let partitions =
                     vec![partition.clone(), partition.clone(), partition.clone()];
 
@@ -1367,7 +1367,7 @@ mod tests {
     async fn oom() -> Result<()> {
         // define input partitions
         let schema = test_schema();
-        let partition = create_vec_batches(&schema, 50);
+        let partition = create_vec_batches(50);
         let input_partitions = vec![partition];
         let partitioning = Partitioning::RoundRobinBatch(4);
 
@@ -1400,15 +1400,16 @@ mod tests {
     }
 
     /// Create vector batches
-    fn create_vec_batches(schema: &Schema, n: usize) -> Vec<RecordBatch> {
-        let batch = create_batch(schema);
+    fn create_vec_batches(n: usize) -> Vec<RecordBatch> {
+        let batch = create_batch();
         (0..n).map(|_| batch.clone()).collect()
     }
 
     /// Create batch
-    fn create_batch(schema: &Schema) -> RecordBatch {
+    fn create_batch() -> RecordBatch {
+        let schema = test_schema();
         RecordBatch::try_new(
-            Arc::new(schema.clone()),
+            schema,
             vec![Arc::new(UInt32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8]))],
         )
         .unwrap()

From db33d987436fb61c6b404714979ff74da528658f Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 17:02:44 -0400
Subject: [PATCH 09/15] avoid assert_batches duplication

---
 datafusion/common/src/lib.rs                  |   3 +
 datafusion/common/src/test_util.rs            |  79 +++++++
 .../physical-plan/src/aggregates/mod.rs       |   7 +-
 .../physical-plan/src/joins/cross_join.rs     |   3 +-
 .../physical-plan/src/joins/hash_join.rs      |   4 +-
 .../src/joins/nested_loop_join.rs             |   6 +-
 .../src/joins/sort_merge_join.rs              |   6 +-
 .../physical-plan/src/repartition/mod.rs      |   3 +-
 .../src/sorts/sort_preserving_merge.rs        |   2 +-
 datafusion/physical-plan/src/test.rs          | 222 +-----------------
 10 files changed, 99 insertions(+), 236 deletions(-)

diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 5f02d92e50c9b..420bcd963c305 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -63,6 +63,9 @@ pub use table_reference::{OwnedTableReference, ResolvedTableReference, TableRefe
 pub use unnest::UnnestOptions;
 pub use utils::project_schema;
 
+/// Reexport arrow crate
+pub use arrow;
+
 /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always succeed.
 ///
diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs
index 048b0d58d8c4d..60f1df7fd11ac 100644
--- a/datafusion/common/src/test_util.rs
+++ b/datafusion/common/src/test_util.rs
@@ -19,6 +19,85 @@
 
 use std::{error::Error, path::PathBuf};
 
+/// Compares formatted output of a record batch with an expected
+/// vector of strings, with the result of pretty formatting record
+/// batches. This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
+            $CHUNKS,
+            &$crate::format::DEFAULT_FORMAT_OPTIONS,
+        )
+        .unwrap()
+        .to_string();
+
+        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
+/// Compares formatted output of a record batch with an expected
+/// vector of strings in a way that order does not matter.
+/// This is a macro so errors appear on the correct line
+///
+/// Designed so that failure output can be directly copy/pasted
+/// into the test code as expected results.
+///
+/// Expects to be called about like this:
+///
+/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
+#[macro_export]
+macro_rules! assert_batches_sorted_eq {
+    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
+        let mut expected_lines: Vec<String> =
+            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
+
+        // sort except for header + footer
+        let num_lines = expected_lines.len();
+        if num_lines > 3 {
+            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
+            $CHUNKS,
+            &$crate::format::DEFAULT_FORMAT_OPTIONS,
+        )
+        .unwrap()
+        .to_string();
+        // fix for windows: \r\n -->
+
+        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
+
+        // sort except for header + footer
+        let num_lines = actual_lines.len();
+        if num_lines > 3 {
+            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
+        }
+
+        assert_eq!(
+            expected_lines, actual_lines,
+            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+            expected_lines, actual_lines
+        );
+    };
+}
+
 /// A macro to assert that one string is contained within another with
 /// a nice error message if they are not.
 ///
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index ac93621e36d6f..d7813f3186a51 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1275,11 +1275,11 @@ mod tests {
     };
     use crate::coalesce_batches::CoalesceBatchesExec;
     use crate::coalesce_partitions::CoalescePartitionsExec;
+    use crate::common;
     use crate::expressions::{col, Avg};
     use crate::memory::MemoryExec;
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{assert_is_pending, mem_exec};
-    use crate::{assert_batches_eq, assert_batches_sorted_eq, common};
     use crate::{
         DisplayAs, ExecutionPlan, Partitioning, RecordBatchStream,
         SendableRecordBatchStream, Statistics,
@@ -1289,7 +1289,10 @@ mod tests {
     use arrow::compute::{concat_batches, SortOptions};
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
     use arrow::record_batch::RecordBatch;
-    use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+    use datafusion_common::{
+        assert_batches_eq, assert_batches_sorted_eq, internal_err, DataFusionError,
+        Result, ScalarValue,
+    };
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use datafusion_physical_expr::expressions::{
         lit, ApproxDistinct, Column, Count, FirstValue, LastValue, Median,
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 90d84282fd0dd..4ba29524b3e2f 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -457,10 +457,9 @@ impl CrossJoinStream {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::assert_batches_sorted_eq;
-    use crate::assert_contains;
     use crate::common;
     use crate::test::build_table_scan_i32;
+    use datafusion_common::{assert_batches_sorted_eq, assert_contains};
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 
     async fn join_collect(
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 43ade366f3257..8e204634f3d94 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -1062,13 +1062,13 @@ mod tests {
     use arrow::array::{ArrayRef, Date32Array, Int32Array, UInt32Builder, UInt64Builder};
     use arrow::datatypes::{DataType, Field, Schema};
 
-    use datafusion_common::ScalarValue;
+    use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue};
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::Literal;
     use hashbrown::raw::RawTable;
 
     use crate::{
-        assert_batches_sorted_eq, assert_contains, common,
+        common,
         expressions::Column,
         hash_utils::create_hashes,
         joins::{hash_join::build_equal_condition_join_indices, utils::JoinSide},
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index a0222f84a70e9..c49c16dba3130 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -741,8 +741,8 @@ impl RecordBatchStream for NestedLoopJoinStream {
 mod tests {
     use super::*;
     use crate::{
-        assert_batches_sorted_eq, assert_contains, common, expressions::Column,
-        memory::MemoryExec, repartition::RepartitionExec, test::build_table_i32,
+        common, expressions::Column, memory::MemoryExec, repartition::RepartitionExec,
+        test::build_table_i32,
     };
     use arrow::datatypes::{DataType, Field};
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
@@ -750,7 +750,7 @@ mod tests {
     use datafusion_physical_expr::expressions::BinaryExpr;
 
     use crate::joins::utils::JoinSide;
-    use datafusion_common::ScalarValue;
+    use datafusion_common::{assert_batches_sorted_eq, assert_contains, ScalarValue};
     use datafusion_physical_expr::expressions::Literal;
     use datafusion_physical_expr::PhysicalExpr;
     use std::sync::Arc;
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index bf2f977d820d6..4de723ab73ea5 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -1404,16 +1404,16 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_execution::TaskContext;
 
-    use crate::assert_contains;
     use crate::expressions::Column;
     use crate::joins::utils::JoinOn;
     use crate::joins::SortMergeJoinExec;
     use crate::memory::MemoryExec;
     use crate::test::build_table_i32;
-    use crate::{assert_batches_eq, assert_batches_sorted_eq};
     use crate::{common, ExecutionPlan};
-    use datafusion_common::JoinType;
     use datafusion_common::Result;
+    use datafusion_common::{
+        assert_batches_eq, assert_batches_sorted_eq, assert_contains, JoinType,
+    };
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
 
     fn build_table(
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 579b1b3d57b32..14b54dc0614d8 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -895,7 +895,6 @@ impl RecordBatchStream for PerPartitionStream {
 mod tests {
     use super::*;
     use crate::{
-        assert_batches_sorted_eq,
         test::{
             assert_is_pending,
             exec::{
@@ -910,7 +909,7 @@ mod tests {
     use arrow::record_batch::RecordBatch;
     use arrow_array::UInt32Array;
     use datafusion_common::cast::as_string_array;
-    use datafusion_common::exec_err;
+    use datafusion_common::{assert_batches_sorted_eq, exec_err};
     use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
     use futures::FutureExt;
     use std::collections::HashSet;
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index b9e2c9662b14c..6e81f43c3d3f0 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -277,7 +277,6 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use futures::{FutureExt, StreamExt};
 
-    use crate::assert_batches_eq;
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::expressions::col;
     use crate::memory::MemoryExec;
@@ -288,6 +287,7 @@ mod tests {
     use crate::test::{self, assert_is_pending, make_partition};
     use crate::{collect, common};
     use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray};
+    use datafusion_common::assert_batches_eq;
 
     use super::*;
 
diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs
index e0c612387470c..9e6312284c08f 100644
--- a/datafusion/physical-plan/src/test.rs
+++ b/datafusion/physical-plan/src/test.rs
@@ -17,10 +17,9 @@
 
 //! Utilities for testing datafusion-physical-plan
 
-use std::error::Error;
+use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
-use std::{collections::HashMap, path::PathBuf};
 
 use arrow_array::{ArrayRef, Int32Array, RecordBatch};
 use arrow_schema::{DataType, Field, Schema, SchemaRef};
@@ -31,225 +30,6 @@ use crate::ExecutionPlan;
 
 pub mod exec;
 
-/// A macro to assert that one string is contained within another with
-/// a nice error message if they are not.
-///
-/// Usage: `assert_contains!(actual, expected)`
-///
-/// Is a macro so test error
-/// messages are on the same line as the failure;
-///
-/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
-#[macro_export]
-macro_rules! assert_contains {
-    ($ACTUAL: expr, $EXPECTED: expr) => {
-        let actual_value: String = $ACTUAL.into();
-        let expected_value: String = $EXPECTED.into();
-        assert!(
-            actual_value.contains(&expected_value),
-            "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}",
-            expected_value,
-            actual_value
-        );
-    };
-}
-
-/// A macro to assert that one string is NOT contained within another with
-/// a nice error message if they are are.
-///
-/// Usage: `assert_not_contains!(actual, unexpected)`
-///
-/// Is a macro so test error
-/// messages are on the same line as the failure;
-///
-/// Both arguments must be convertable into Strings ([`Into`]<[`String`]>)
-#[macro_export]
-macro_rules! assert_not_contains {
-    ($ACTUAL: expr, $UNEXPECTED: expr) => {
-        let actual_value: String = $ACTUAL.into();
-        let unexpected_value: String = $UNEXPECTED.into();
-        assert!(
-            !actual_value.contains(&unexpected_value),
-            "Found unexpected in actual.\n\nUnexpected:\n{}\n\nActual:\n{}",
-            unexpected_value,
-            actual_value
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings, with the result of pretty formatting record
-/// batches. This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        let formatted = arrow::util::pretty::pretty_format_batches_with_options(
-            $CHUNKS,
-            &datafusion_common::format::DEFAULT_FORMAT_OPTIONS,
-        )
-        .unwrap()
-        .to_string();
-
-        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings in a way that order does not matter.
-/// This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_sorted_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let mut expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        // sort except for header + footer
-        let num_lines = expected_lines.len();
-        if num_lines > 3 {
-            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        let formatted = arrow::util::pretty::pretty_format_batches_with_options(
-            $CHUNKS,
-            &datafusion_common::format::DEFAULT_FORMAT_OPTIONS,
-        )
-        .unwrap()
-        .to_string();
-        // fix for windows: \r\n -->
-
-        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        // sort except for header + footer
-        let num_lines = actual_lines.len();
-        if num_lines > 3 {
-            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Returns the arrow test data directory, which is by default stored
-/// in a git submodule rooted at `testing/data`.
-///
-/// The default can be overridden by the optional environment
-/// variable `ARROW_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = datafusion_common::test_util::arrow_test_data();
-/// let csvdata = format!("{}/csv/aggregate_test_100.csv", testdata);
-/// assert!(std::path::PathBuf::from(csvdata).exists());
-/// ```
-pub fn arrow_test_data() -> String {
-    match get_data_dir("ARROW_TEST_DATA", "../../testing/data") {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get arrow data dir: {err}"),
-    }
-}
-
-/// Returns the parquet test data directory, which is by default
-/// stored in a git submodule rooted at
-/// `parquet-testing/data`.
-///
-/// The default can be overridden by the optional environment variable
-/// `PARQUET_TEST_DATA`
-///
-/// panics when the directory can not be found.
-///
-/// Example:
-/// ```
-/// let testdata = datafusion_common::test_util::parquet_test_data();
-/// let filename = format!("{}/binary.parquet", testdata);
-/// assert!(std::path::PathBuf::from(filename).exists());
-/// ```
-pub fn parquet_test_data() -> String {
-    match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") {
-        Ok(pb) => pb.display().to_string(),
-        Err(err) => panic!("failed to get parquet data dir: {err}"),
-    }
-}
-
-/// Returns a directory path for finding test data.
-///
-/// udf_env: name of an environment variable
-///
-/// submodule_dir: fallback path (relative to CARGO_MANIFEST_DIR)
-///
-///  Returns either:
-/// The path referred to in `udf_env` if that variable is set and refers to a directory
-/// The submodule_data directory relative to CARGO_MANIFEST_PATH
-pub fn get_data_dir(
-    udf_env: &str,
-    submodule_data: &str,
-) -> Result<PathBuf, Box<dyn Error>> {
-    // Try user defined env.
-    if let Ok(dir) = std::env::var(udf_env) {
-        let trimmed = dir.trim().to_string();
-        if !trimmed.is_empty() {
-            let pb = PathBuf::from(trimmed);
-            if pb.is_dir() {
-                return Ok(pb);
-            } else {
-                return Err(format!(
-                    "the data dir `{}` defined by env {} not found",
-                    pb.display(),
-                    udf_env
-                )
-                .into());
-            }
-        }
-    }
-
-    // The env is undefined or its value is trimmed to empty, let's try default dir.
-
-    // env "CARGO_MANIFEST_DIR" is "the directory containing the manifest of your package",
-    // set by `cargo run` or `cargo test`, see:
-    // https://doc.rust-lang.org/cargo/reference/environment-variables.html
-    let dir = env!("CARGO_MANIFEST_DIR");
-
-    let pb = PathBuf::from(dir).join(submodule_data);
-    if pb.is_dir() {
-        Ok(pb)
-    } else {
-        Err(format!(
-            "env `{}` is undefined or has empty value, and the pre-defined data dir `{}` not found\n\
-             HINT: try running `git submodule update --init`",
-            udf_env,
-            pb.display(),
-        ).into())
-    }
-}
-
 /// Asserts that given future is pending.
 pub fn assert_is_pending<'a, T>(fut: &mut Pin<Box<dyn Future<Output = T> + Send + 'a>>) {
     let waker = futures::task::noop_waker();

From df292a715e034a0fe8407a8d5e78bf22936951c7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 14 Sep 2023 17:06:47 -0400
Subject: [PATCH 10/15] Remove duplicated macro definition

---
 datafusion/core/src/lib.rs           |  4 ++
 datafusion/core/src/test_util/mod.rs | 80 +---------------------------
 2 files changed, 6 insertions(+), 78 deletions(-)

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index 4f74888c840b1..576f66a5ed7c1 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -471,6 +471,10 @@ pub mod physical_plan {
     pub use datafusion_physical_plan::*;
 }
 
+// Reexport testing macros for compatibility
+pub use datafusion_common::assert_batches_eq;
+pub use datafusion_common::assert_batches_sorted_eq;
+
 /// re-export of [`datafusion_sql`] crate
 pub mod sql {
     pub use datafusion_sql::*;
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index ab29cecbb8eaa..bd52c3eedaa4f 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -50,84 +50,8 @@ pub use datafusion_common::test_util::{
     arrow_test_data, get_data_dir, parquet_test_data,
 };
 
-/// Compares formatted output of a record batch with an expected
-/// vector of strings, with the result of pretty formatting record
-/// batches. This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
-            $CHUNKS,
-            &$crate::common::format::DEFAULT_FORMAT_OPTIONS,
-        )
-        .unwrap()
-        .to_string();
-
-        let actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
-
-/// Compares formatted output of a record batch with an expected
-/// vector of strings in a way that order does not matter.
-/// This is a macro so errors appear on the correct line
-///
-/// Designed so that failure output can be directly copy/pasted
-/// into the test code as expected results.
-///
-/// Expects to be called about like this:
-///
-/// `assert_batch_sorted_eq!(expected_lines: &[&str], batches: &[RecordBatch])`
-#[macro_export]
-macro_rules! assert_batches_sorted_eq {
-    ($EXPECTED_LINES: expr, $CHUNKS: expr) => {
-        let mut expected_lines: Vec<String> =
-            $EXPECTED_LINES.iter().map(|&s| s.into()).collect();
-
-        // sort except for header + footer
-        let num_lines = expected_lines.len();
-        if num_lines > 3 {
-            expected_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        let formatted = $crate::arrow::util::pretty::pretty_format_batches_with_options(
-            $CHUNKS,
-            &$crate::common::format::DEFAULT_FORMAT_OPTIONS,
-        )
-        .unwrap()
-        .to_string();
-        // fix for windows: \r\n -->
-
-        let mut actual_lines: Vec<&str> = formatted.trim().lines().collect();
-
-        // sort except for header + footer
-        let num_lines = actual_lines.len();
-        if num_lines > 3 {
-            actual_lines.as_mut_slice()[2..num_lines - 1].sort_unstable()
-        }
-
-        assert_eq!(
-            expected_lines, actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
-}
+pub use datafusion_common::assert_batches_eq;
+pub use datafusion_common::assert_batches_sorted_eq;
 
 /// Scan an empty data source, mainly used in tests
 pub fn scan_empty(

From 01e5c85a9c8719cbe0574587a16fd395893c5ca5 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 15 Sep 2023 07:36:47 -0400
Subject: [PATCH 11/15] move dep

---
 datafusion-cli/Cargo.lock           | 2 +-
 datafusion/core/Cargo.toml          | 1 -
 datafusion/physical-plan/Cargo.toml | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 783d7c26a2375..0b5af8252c85b 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1078,7 +1078,6 @@ dependencies = [
  "log",
  "num_cpus",
  "object_store",
- "once_cell",
  "parking_lot",
  "parquet",
  "percent-encoding",
@@ -1240,6 +1239,7 @@ dependencies = [
  "indexmap 2.0.0",
  "itertools 0.11.0",
  "log",
+ "once_cell",
  "parking_lot",
  "pin-project-lite",
  "rand",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 38f5e969ead2b..f414d875bf6c7 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -81,7 +81,6 @@ log = "^0.4"
 num-traits = { version = "0.2", optional = true }
 num_cpus = "1.13.0"
 object_store = "0.7.0"
-once_cell = "1.18.0"
 parking_lot = "0.12"
 parquet = { workspace = true }
 percent-encoding = "2.2.0"
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 0e25020d826ff..1a1b80437d67f 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -50,6 +50,7 @@ hashbrown = { version = "0.14", features = ["raw"] }
 indexmap = "2.0.0"
 itertools = { version = "0.11", features = ["use_std"] }
 log = "^0.4"
+once_cell = "1.18.0"
 parking_lot = "0.12"
 pin-project-lite = "^0.2.7"
 

From f50e7e16510bae0a4c4a131ed1e1f3a3f3328539 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 15 Sep 2023 09:17:42 -0400
Subject: [PATCH 12/15] clippy

---
 datafusion/core/src/physical_optimizer/topk_aggregation.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index 7360e7d5c3a1a..4789226d7aa9c 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -68,7 +68,7 @@ impl TopKAggregation {
 
         // We found what we want: clone, copy the limit down, and return modified node
         let new_aggr = AggregateExec::try_new(
-            aggr.mode().clone(),
+            *aggr.mode(),
             aggr.group_by().clone(),
             aggr.aggr_expr().to_vec(),
             aggr.filter_expr().to_vec(),

From 4775782649a26d6b6db16e87381f5783ba7880ed Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 15 Sep 2023 10:46:50 -0400
Subject: [PATCH 13/15] toml lint

---
 datafusion/physical-plan/Cargo.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 1a1b80437d67f..02f154762760a 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -53,10 +53,10 @@ log = "^0.4"
 once_cell = "1.18.0"
 parking_lot = "0.12"
 pin-project-lite = "^0.2.7"
-
-#[dev-dependencies]
-termtree = "0.4.1"
 rand = "0.8"
 rstest = "0.18.0"
 tempfile = "3"
+
+#[dev-dependencies]
+termtree = "0.4.1"
 tokio = { version = "1.28", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] }

From 4fdbeaac0b4d98b0b2f12630065c60ebcfd12617 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 15 Sep 2023 11:39:59 -0400
Subject: [PATCH 14/15] Use timestamp_nanos_opt instead of deprecated
 timestamp_nanos

---
 datafusion-cli/Cargo.lock                            |  8 ++++----
 datafusion/common/src/scalar.rs                      |  3 ++-
 datafusion/core/tests/parquet/mod.rs                 |  3 ++-
 datafusion/core/tests/sql/explain_analyze.rs         |  9 ++++++---
 .../src/simplify_expressions/simplify_exprs.rs       |  4 ++--
 datafusion/physical-expr/src/datetime_expressions.rs |  9 +++++----
 datafusion/physical-plan/src/metrics/value.rs        |  6 ++++--
 .../physical-plan/src/sorts/sort_preserving_merge.rs | 12 ++++++++----
 datafusion/proto/src/physical_plan/to_proto.rs       |  8 +++++++-
 9 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 0b5af8252c85b..f2cd44a95706a 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -831,9 +831,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "chrono"
-version = "0.4.30"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "defd4e7873dbddba6c7c91e199c7fcb946abc4a6a4ac3195400bcfb01b5de877"
+checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
@@ -3445,9 +3445,9 @@ dependencies = [
 
 [[package]]
 name = "typenum"
-version = "1.16.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
 
 [[package]]
 name = "unicode-bidi"
diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index fa2175c223388..32343b98fa247 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -4965,7 +4965,8 @@ mod tests {
                             .unwrap()
                             .and_hms_nano_opt(hour, minute, second, nanosec)
                             .unwrap()
-                            .timestamp_nanos(),
+                            .timestamp_nanos_opt()
+                            .unwrap(),
                     ),
                     None,
                 ))
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index 6f289e0c064bd..db7349851ba28 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -290,7 +290,8 @@ fn make_timestamp_batch(offset: Duration) -> RecordBatch {
                 offset_nanos
                     + t.parse::<chrono::NaiveDateTime>()
                         .unwrap()
-                        .timestamp_nanos()
+                        .timestamp_nanos_opt()
+                        .unwrap()
             })
         })
         .collect::<Vec<_>>();
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index f32ffc1642cda..06120c01ce864 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -18,6 +18,7 @@
 use super::*;
 use datafusion::config::ConfigOptions;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
+use datafusion::physical_plan::metrics::Timestamp;
 
 #[tokio::test]
 async fn explain_analyze_baseline_metrics() {
@@ -142,11 +143,11 @@ async fn explain_analyze_baseline_metrics() {
             metrics.iter().for_each(|m| match m.value() {
                 MetricValue::StartTimestamp(ts) => {
                     saw_start = true;
-                    assert!(ts.value().unwrap().timestamp_nanos() > 0);
+                    assert!(nanos_from_timestamp(ts) > 0);
                 }
                 MetricValue::EndTimestamp(ts) => {
                     saw_end = true;
-                    assert!(ts.value().unwrap().timestamp_nanos() > 0);
+                    assert!(nanos_from_timestamp(ts) > 0);
                 }
                 _ => {}
             });
@@ -161,7 +162,9 @@ async fn explain_analyze_baseline_metrics() {
     datafusion::physical_plan::accept(physical_plan.as_ref(), &mut TimeValidator {})
         .unwrap();
 }
-
+fn nanos_from_timestamp(ts: &Timestamp) -> i64 {
+    ts.value().unwrap().timestamp_nanos_opt().unwrap()
+}
 #[tokio::test]
 async fn csv_explain_plans() {
     // This test verify the look of each plan in its full cycle plan creation
diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
index e6d66720ee1b4..59c1c5a1266d3 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs
@@ -473,8 +473,8 @@ mod tests {
         let expected = format!(
             "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\
             \n  TableScan: test",
-            time.timestamp_nanos(),
-            time.timestamp_nanos()
+            time.timestamp_nanos_opt().unwrap(),
+            time.timestamp_nanos_opt().unwrap()
         );
 
         assert_eq!(expected, actual);
diff --git a/datafusion/physical-expr/src/datetime_expressions.rs b/datafusion/physical-expr/src/datetime_expressions.rs
index bf90a7da9c0bd..63f7645fea5db 100644
--- a/datafusion/physical-expr/src/datetime_expressions.rs
+++ b/datafusion/physical-expr/src/datetime_expressions.rs
@@ -168,7 +168,7 @@ pub fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 pub fn make_now(
     now_ts: DateTime<Utc>,
 ) -> impl Fn(&[ColumnarValue]) -> Result<ColumnarValue> {
-    let now_ts = Some(now_ts.timestamp_nanos());
+    let now_ts = now_ts.timestamp_nanos_opt();
     move |_arg| {
         Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
             now_ts,
@@ -204,7 +204,7 @@ pub fn make_current_date(
 pub fn make_current_time(
     now_ts: DateTime<Utc>,
 ) -> impl Fn(&[ColumnarValue]) -> Result<ColumnarValue> {
-    let nano = Some(now_ts.timestamp_nanos() % 86400000000000);
+    let nano = now_ts.timestamp_nanos_opt().map(|ts| ts % 86400000000000);
     move |_arg| Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(nano)))
 }
 
@@ -271,8 +271,9 @@ fn date_trunc_coarse(granularity: &str, value: i64) -> Result<i64> {
             return exec_err!("Unsupported date_trunc granularity: {unsupported}");
         }
     };
+    let value = value.and_then(|value| value.timestamp_nanos_opt());
     // `with_x(0)` are infallible because `0` are always a valid
-    Ok(value.unwrap().timestamp_nanos())
+    Ok(value.unwrap())
 }
 
 // truncates a single value with the given timeunit to the specified granularity
@@ -459,7 +460,7 @@ fn date_bin_months_interval(stride_months: i64, source: i64, origin: i64) -> i64
         };
     }
 
-    bin_time.timestamp_nanos()
+    bin_time.timestamp_nanos_opt().unwrap()
 }
 
 fn to_utc_date_time(nanos: i64) -> DateTime<Utc> {
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
index 59b012f25a27d..899ceb60b49f7 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-plan/src/metrics/value.rs
@@ -430,11 +430,13 @@ impl MetricValue {
             Self::Time { time, .. } => time.value(),
             Self::StartTimestamp(timestamp) => timestamp
                 .value()
-                .map(|ts| ts.timestamp_nanos() as usize)
+                .and_then(|ts| ts.timestamp_nanos_opt())
+                .map(|nanos| nanos as usize)
                 .unwrap_or(0),
             Self::EndTimestamp(timestamp) => timestamp
                 .value()
-                .map(|ts| ts.timestamp_nanos() as usize)
+                .and_then(|ts| ts.timestamp_nanos_opt())
+                .map(|nanos| nanos as usize)
                 .unwrap_or(0),
         }
     }
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 6e81f43c3d3f0..6754a16331568 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -280,12 +280,12 @@ mod tests {
     use crate::coalesce_partitions::CoalescePartitionsExec;
     use crate::expressions::col;
     use crate::memory::MemoryExec;
-    use crate::metrics::MetricValue;
+    use crate::metrics::{MetricValue, Timestamp};
     use crate::sorts::sort::SortExec;
     use crate::stream::RecordBatchReceiverStream;
+    use crate::{collect, common};
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, assert_is_pending, make_partition};
-    use crate::{collect, common};
     use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray};
     use datafusion_common::assert_batches_eq;
 
@@ -893,11 +893,11 @@ mod tests {
         metrics.iter().for_each(|m| match m.value() {
             MetricValue::StartTimestamp(ts) => {
                 saw_start = true;
-                assert!(ts.value().unwrap().timestamp_nanos() > 0);
+                assert!(nanos_from_timestamp(ts) > 0);
             }
             MetricValue::EndTimestamp(ts) => {
                 saw_end = true;
-                assert!(ts.value().unwrap().timestamp_nanos() > 0);
+                assert!(nanos_from_timestamp(ts) > 0);
             }
             _ => {}
         });
@@ -906,6 +906,10 @@ mod tests {
         assert!(saw_end);
     }
 
+    fn nanos_from_timestamp(ts: &Timestamp) -> i64 {
+        ts.value().unwrap().timestamp_nanos_opt().unwrap()
+    }
+
     #[tokio::test]
     async fn test_drop_cancel() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 62221f9d754c2..a5b1300360fe7 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -422,10 +422,16 @@ impl TryFrom<&PartitionedFile> for protobuf::PartitionedFile {
     type Error = DataFusionError;
 
     fn try_from(pf: &PartitionedFile) -> Result<Self, Self::Error> {
+        let last_modified = pf.object_meta.last_modified;
+        let last_modified_ns = last_modified.timestamp_nanos_opt().ok_or_else(|| {
+            DataFusionError::Plan(format!(
+                "Invalid timestamp on PartitionedFile::ObjectMeta: {last_modified}"
+            ))
+        })? as u64;
         Ok(protobuf::PartitionedFile {
             path: pf.object_meta.location.as_ref().to_owned(),
             size: pf.object_meta.size as u64,
-            last_modified_ns: pf.object_meta.last_modified.timestamp_nanos() as u64,
+            last_modified_ns,
             partition_values: pf
                 .partition_values
                 .iter()

From 5121e47379c1da21e4c1af23b25dc984fa060b06 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 15 Sep 2023 12:01:27 -0400
Subject: [PATCH 15/15] fmt

---
 datafusion/physical-plan/src/sorts/sort_preserving_merge.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 6754a16331568..5b485e0b68e41 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -283,9 +283,9 @@ mod tests {
     use crate::metrics::{MetricValue, Timestamp};
     use crate::sorts::sort::SortExec;
     use crate::stream::RecordBatchReceiverStream;
-    use crate::{collect, common};
     use crate::test::exec::{assert_strong_count_converges_to_zero, BlockingExec};
     use crate::test::{self, assert_is_pending, make_partition};
+    use crate::{collect, common};
     use arrow::array::{Int32Array, StringArray, TimestampNanosecondArray};
     use datafusion_common::assert_batches_eq;