diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 0eca27f8e40e0..8e0ba072b7467 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -228,6 +228,12 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { let Some(s) = c.as_string_view_opt() else { return Arc::clone(c); }; + + // Fast path: if the data buffers are empty, we can return the original array + if s.data_buffers().is_empty() { + return Arc::clone(c); + } + let ideal_buffer_size: usize = s .views() .iter() @@ -240,7 +246,11 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { } }) .sum(); - let actual_buffer_size = s.get_buffer_memory_size(); + + // We don't use get_buffer_memory_size here, because gc is for the contents of the + // data buffers, not views and nulls. + let actual_buffer_size = + s.data_buffers().iter().map(|b| b.capacity()).sum::(); // Re-creating the array copies data and can be time consuming. // We only do it if the array is sparse