Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
0444abc
Make EnforceSorting global sort aware, fix union bug, parallelize CP+…
ozankabak Jan 26, 2023
fd7a714
Remove unnecessary cloning
ozankabak Jan 26, 2023
015f931
Convert sort_onward to tree to support multipath
mustafasrepo Jan 30, 2023
640dc7b
Merge branch 'apache:master' into feature/union_exec_handling
mustafasrepo Jan 30, 2023
b356962
Add limit bug handling
mustafasrepo Jan 31, 2023
4d8a4b2
minor changes
mustafasrepo Jan 31, 2023
3884fd5
Coalesce Partitions converted to tree
mustafasrepo Jan 31, 2023
32ff1b0
Simplifications
mustafasrepo Jan 31, 2023
2d9819a
Add new test
mustafasrepo Jan 31, 2023
dff1f7f
Simplifications
mustafasrepo Jan 31, 2023
17e07eb
Simplifications and refactors
ozankabak Feb 1, 2023
1fa2c43
Update todos
mustafasrepo Feb 1, 2023
3c8399e
Merge branch 'master' into feature/union_exec_handling
mustafasrepo Feb 1, 2023
cb0debd
minor changes
mustafasrepo Feb 1, 2023
f3542a2
Add test for union doesn't maintain any of its child ordering
mustafasrepo Feb 1, 2023
51ced76
Add new test
mustafasrepo Feb 1, 2023
d5d4943
use corresponding idx instead of 0th index
mustafasrepo Feb 1, 2023
1359a73
Add global limit test
mustafasrepo Feb 1, 2023
a470f98
Add SortPreservingMerge handling
mustafasrepo Feb 2, 2023
027eace
add finer sorting change Sort and SortPreserve test
mustafasrepo Feb 2, 2023
ec27254
Update test
mustafasrepo Feb 2, 2023
4c32040
simplifications
mustafasrepo Feb 2, 2023
1d3a995
Merge branch 'apache:master' into feature/union_exec_handling
mustafasrepo Feb 2, 2023
d8e9515
Simplifications
mustafasrepo Feb 2, 2023
3f68703
Improved comments and naming
ozankabak Feb 2, 2023
13d9074
Remove explicit union check
mustafasrepo Feb 7, 2023
fdffa3f
make parallelize sort flag config parameter
mustafasrepo Feb 7, 2023
4d19187
go back to first commit, add test
mustafasrepo Feb 7, 2023
2bdbbef
Merge branch 'apache:master' into feature/union_exec_handling
mustafasrepo Feb 7, 2023
663d163
update config mg
mustafasrepo Feb 7, 2023
d5957ae
Use repartition_sorts in the API instead of parallelize_sorts
ozankabak Feb 7, 2023
6a63f72
Update/format configs.md
ozankabak Feb 7, 2023
0ab76da
Use maintains_input_order instead of output_ordering comparison
mustafasrepo Feb 8, 2023
03c4fbe
Move logic under map_children to init method
mustafasrepo Feb 8, 2023
a25ed96
Remove two unnecessary clones
ozankabak Feb 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,18 @@ config_namespace! {
/// functions in parallel using the provided `target_partitions` level"
pub repartition_windows: bool, default = true

/// Should DataFusion execute sorts in a per-partition fashion and merge
/// afterwards instead of coalescing first and sorting globally
/// With this flag is enabled, plans in the form below
/// "SortExec: [a@0 ASC]",
/// " CoalescePartitionsExec",
/// " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
/// would turn into the plan below which performs better in multithreaded environments
/// "SortPreservingMergeExec: [a@0 ASC]",
/// " SortExec: [a@0 ASC]",
/// " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1",
pub repartition_sorts: bool, default = true

/// When set to true, the logical plan optimizer will produce warning
/// messages if any optimization rules produce errors and then proceed to the next
/// rule. When set to false, any rules that produce errors will cause the query to fail
Expand Down
12 changes: 12 additions & 0 deletions datafusion/core/src/execution/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,12 @@ impl SessionConfig {
self.options.optimizer.repartition_windows
}

/// Do we execute sorts in a per-partition fashion and merge afterwards,
/// or do we coalesce partitions first and sort globally?
pub fn repartition_sorts(&self) -> bool {
self.options.optimizer.repartition_sorts
}

/// Are statistics collected during execution?
pub fn collect_statistics(&self) -> bool {
self.options.execution.collect_statistics
Expand Down Expand Up @@ -1290,6 +1296,12 @@ impl SessionConfig {
self
}

/// Enables or disables the use of per-partition sorting to improve parallelism
pub fn with_repartition_sorts(mut self, enabled: bool) -> Self {
self.options.optimizer.repartition_sorts = enabled;
self
}

/// Enables or disables the use of pruning predicate for parquet readers to skip row groups
pub fn with_parquet_pruning(mut self, enabled: bool) -> Self {
self.options.execution.parquet.pruning = enabled;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/physical_optimizer/dist_enforcement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ mod tests {
// `EnforceSorting` and `EnfoceDistribution`.
// TODO: Orthogonalize the tests here just to verify `EnforceDistribution` and create
// new tests for the cascade.
let optimizer = EnforceSorting {};
let optimizer = EnforceSorting::new();
let optimized = optimizer.optimize(optimized, &config)?;

// Now format correctly
Expand Down
Loading