-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Push down limit to sort #3530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Push down limit to sort #3530
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,6 +82,7 @@ struct ExternalSorter { | |
| runtime: Arc<RuntimeEnv>, | ||
| metrics_set: CompositeMetricsSet, | ||
| metrics: BaselineMetrics, | ||
| fetch: Option<usize>, | ||
| } | ||
|
|
||
| impl ExternalSorter { | ||
|
|
@@ -92,6 +93,7 @@ impl ExternalSorter { | |
| metrics_set: CompositeMetricsSet, | ||
| session_config: Arc<SessionConfig>, | ||
| runtime: Arc<RuntimeEnv>, | ||
| fetch: Option<usize>, | ||
| ) -> Self { | ||
| let metrics = metrics_set.new_intermediate_baseline(partition_id); | ||
| Self { | ||
|
|
@@ -104,6 +106,7 @@ impl ExternalSorter { | |
| runtime, | ||
| metrics_set, | ||
| metrics, | ||
| fetch, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -120,7 +123,7 @@ impl ExternalSorter { | |
| // NB timer records time taken on drop, so there are no | ||
| // calls to `timer.done()` below. | ||
| let _timer = tracking_metrics.elapsed_compute().timer(); | ||
| let partial = sort_batch(input, self.schema.clone(), &self.expr)?; | ||
| let partial = sort_batch(input, self.schema.clone(), &self.expr, self.fetch)?; | ||
| in_mem_batches.push(partial); | ||
| } | ||
| Ok(()) | ||
|
|
@@ -657,15 +660,18 @@ pub struct SortExec { | |
| metrics_set: CompositeMetricsSet, | ||
| /// Preserve partitions of input plan | ||
| preserve_partitioning: bool, | ||
| /// Fetch highest/lowest n results | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see -- this seems like it it now has the information plumbed to the SortExec to implement "TopK" within the physical operator's implementation. 👍 Very cool |
||
| fetch: Option<usize>, | ||
| } | ||
|
|
||
| impl SortExec { | ||
| /// Create a new sort execution plan | ||
| pub fn try_new( | ||
| expr: Vec<PhysicalSortExpr>, | ||
| input: Arc<dyn ExecutionPlan>, | ||
| fetch: Option<usize>, | ||
| ) -> Result<Self> { | ||
| Ok(Self::new_with_partitioning(expr, input, false)) | ||
| Ok(Self::new_with_partitioning(expr, input, false, fetch)) | ||
| } | ||
|
|
||
| /// Whether this `SortExec` preserves partitioning of the children | ||
|
|
@@ -679,12 +685,14 @@ impl SortExec { | |
| expr: Vec<PhysicalSortExpr>, | ||
| input: Arc<dyn ExecutionPlan>, | ||
| preserve_partitioning: bool, | ||
| fetch: Option<usize>, | ||
| ) -> Self { | ||
| Self { | ||
| expr, | ||
| input, | ||
| metrics_set: CompositeMetricsSet::new(), | ||
| preserve_partitioning, | ||
| fetch, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -697,6 +705,11 @@ impl SortExec { | |
| pub fn expr(&self) -> &[PhysicalSortExpr] { | ||
| &self.expr | ||
| } | ||
|
|
||
| /// If `Some(fetch)`, limits output to only the first "fetch" items | ||
| pub fn fetch(&self) -> Option<usize> { | ||
| self.fetch | ||
| } | ||
| } | ||
|
|
||
| impl ExecutionPlan for SortExec { | ||
|
|
@@ -750,6 +763,7 @@ impl ExecutionPlan for SortExec { | |
| self.expr.clone(), | ||
| children[0].clone(), | ||
| self.preserve_partitioning, | ||
| self.fetch, | ||
| ))) | ||
| } | ||
|
|
||
|
|
@@ -778,6 +792,7 @@ impl ExecutionPlan for SortExec { | |
| self.expr.clone(), | ||
| self.metrics_set.clone(), | ||
| context, | ||
| self.fetch(), | ||
| ) | ||
| .map_err(|e| ArrowError::ExternalError(Box::new(e))), | ||
| ) | ||
|
|
@@ -816,14 +831,14 @@ fn sort_batch( | |
| batch: RecordBatch, | ||
| schema: SchemaRef, | ||
| expr: &[PhysicalSortExpr], | ||
| fetch: Option<usize>, | ||
| ) -> ArrowResult<BatchWithSortArray> { | ||
| // TODO: pushup the limit expression to sort | ||
| let sort_columns = expr | ||
| .iter() | ||
| .map(|e| e.evaluate_to_sort_column(&batch)) | ||
| .collect::<Result<Vec<SortColumn>>>()?; | ||
|
|
||
| let indices = lexsort_to_indices(&sort_columns, None)?; | ||
| let indices = lexsort_to_indices(&sort_columns, fetch)?; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The key optimization: this returns only n indices after the change.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this will effectively get us much of the benefit of a special TopK operator as we don't have to copy the entire input -- we only copy the Although I suppose
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact, I wonder if you could also apply the limit here: as part of sorting each batch -- rather than keeping the entire input batch, we only need to keep at most
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The remaining optimization I think is tweaking I would like to do this in a separate PR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A separate PR is a great idea 👍
Right, the point I was trying to make is that there are 2 calls to I was thinking if we applied
So clearly extending
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I didn't look to much at the rest of the implementation, I think you're right that providing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the current change already buffers
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| // reorder all rows based on sorted indices | ||
| let sorted_batch = RecordBatch::try_new( | ||
|
|
@@ -870,6 +885,7 @@ async fn do_sort( | |
| expr: Vec<PhysicalSortExpr>, | ||
| metrics_set: CompositeMetricsSet, | ||
| context: Arc<TaskContext>, | ||
| fetch: Option<usize>, | ||
| ) -> Result<SendableRecordBatchStream> { | ||
| debug!( | ||
| "Start do_sort for partition {} of context session_id {} and task_id {:?}", | ||
|
|
@@ -887,6 +903,7 @@ async fn do_sort( | |
| metrics_set, | ||
| Arc::new(context.session_config()), | ||
| context.runtime_env(), | ||
| fetch, | ||
| ); | ||
| context.runtime_env().register_requester(sorter.id()); | ||
| while let Some(batch) = input.next().await { | ||
|
|
@@ -949,6 +966,7 @@ mod tests { | |
| }, | ||
| ], | ||
| Arc::new(CoalescePartitionsExec::new(csv)), | ||
| None, | ||
| )?); | ||
|
|
||
| let result = collect(sort_exec, task_ctx).await?; | ||
|
|
@@ -1011,6 +1029,7 @@ mod tests { | |
| }, | ||
| ], | ||
| Arc::new(CoalescePartitionsExec::new(csv)), | ||
| None, | ||
| )?); | ||
|
|
||
| let task_ctx = session_ctx.task_ctx(); | ||
|
|
@@ -1083,6 +1102,7 @@ mod tests { | |
| options: SortOptions::default(), | ||
| }], | ||
| input, | ||
| None, | ||
| )?); | ||
|
|
||
| let result: Vec<RecordBatch> = collect(sort_exec, task_ctx).await?; | ||
|
|
@@ -1159,6 +1179,7 @@ mod tests { | |
| }, | ||
| ], | ||
| Arc::new(MemoryExec::try_new(&[vec![batch]], schema, None)?), | ||
| None, | ||
| )?); | ||
|
|
||
| assert_eq!(DataType::Float32, *sort_exec.schema().field(0).data_type()); | ||
|
|
@@ -1226,6 +1247,7 @@ mod tests { | |
| options: SortOptions::default(), | ||
| }], | ||
| blocking_exec, | ||
| None, | ||
| )?); | ||
|
|
||
| let fut = collect(sort_exec, task_ctx); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As we now have the pushdown - we can use
fetch, and support more than just a limit directly after sort.