From 40da605ef8742305cf0b2f28c9d04663aad809da Mon Sep 17 00:00:00 2001 From: Martin Hilton Date: Wed, 21 Aug 2024 16:31:49 +0100 Subject: [PATCH 1/2] minor: SortExec measure elapsed_compute time when sorting Whilst investigating query execution performance I noticed that some SortExec nodes were reporting suspiciously short elapsed_compute times. It appears that the SortExec node wasn't running the elapsed_compute timer when it doing the actual sorting operation. --- datafusion/physical-plan/src/sorts/sort.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index e7e1c5481f807..3081a3a5dd690 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -552,7 +552,9 @@ impl ExternalSorter { let fetch = self.fetch; let expressions = Arc::clone(&self.expr); let stream = futures::stream::once(futures::future::lazy(move |_| { + let timer = metrics.elapsed_compute().timer(); let sorted = sort_batch(&batch, &expressions, fetch)?; + timer.done(); metrics.record_output(sorted.num_rows()); drop(batch); drop(reservation); From 64b85d2c58955ffebc6d6aee11cb435febbae17a Mon Sep 17 00:00:00 2001 From: Martin Hilton Date: Wed, 21 Aug 2024 18:12:42 +0100 Subject: [PATCH 2/2] fix: apply review suggestions --- datafusion/physical-plan/src/sorts/sort.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 3081a3a5dd690..a81b09948cca7 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -499,6 +499,12 @@ impl ExternalSorter { metrics: BaselineMetrics, ) -> Result { assert_ne!(self.in_mem_batches.len(), 0); + + // The elapsed compute timer is updated when the value is dropped. + // There is no need for an explicit call to drop. + let elapsed_compute = metrics.elapsed_compute().clone(); + let _timer = elapsed_compute.timer(); + if self.in_mem_batches.len() == 1 { let batch = self.in_mem_batches.remove(0); let reservation = self.reservation.take();