From 838492d8989f041763daa505ca7e191dd53942a5 Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Mon, 23 Jun 2025 17:42:20 +0800 Subject: [PATCH 1/2] Perf: Optimize CursorValues compare performance for StringViewArray --- datafusion/physical-plan/src/sorts/cursor.rs | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index efb9c0a47bf58..1c2fc5b0ffe55 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -297,10 +297,14 @@ impl CursorValues for StringViewArray { // SAFETY: Both l_idx and r_idx are guaranteed to be within bounds, // and any null-checks are handled in the outer layers. // Fast path: Compare the lengths before full byte comparison. - let l_view = unsafe { l.views().get_unchecked(l_idx) }; - let l_len = *l_view as u32; let r_view = unsafe { r.views().get_unchecked(r_idx) }; + + if l.data_buffers().is_empty() && r.data_buffers().is_empty() { + return l_view.eq(r_view); + } + + let l_len = *l_view as u32; let r_len = *r_view as u32; if l_len != r_len { return false; @@ -314,8 +318,13 @@ impl CursorValues for StringViewArray { // Already checked it in is_eq_to_prev_one function // Fast path: Compare the lengths of the current and previous views. let l_view = unsafe { cursor.views().get_unchecked(idx) }; - let l_len = *l_view as u32; let r_view = unsafe { cursor.views().get_unchecked(idx - 1) }; + if cursor.data_buffers().is_empty() { + return l_view.eq(r_view); + } + + let l_len = *l_view as u32; + let r_len = *r_view as u32; if l_len != r_len { return false; @@ -330,6 +339,12 @@ impl CursorValues for StringViewArray { // SAFETY: Prior assertions guarantee that l_idx and r_idx are valid indices. // Null-checks are assumed to have been handled in the wrapper (e.g., ArrayValues). // And the bound is checked in is_finished, it is safe to call get_unchecked + if l.data_buffers().is_empty() && r.data_buffers().is_empty() { + let l_view = unsafe { l.views().get_unchecked(l_idx) }; + let r_view = unsafe { r.views().get_unchecked(r_idx) }; + return l_view.cmp(r_view); + } + unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) } } } From f22ca3c9863943ab31e8e9966057d13fd694c71c Mon Sep 17 00:00:00 2001 From: zhuqi-lucas <821684824@qq.com> Date: Mon, 23 Jun 2025 19:07:14 +0800 Subject: [PATCH 2/2] fix --- datafusion/physical-plan/src/sorts/cursor.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index 1c2fc5b0ffe55..17033e6a31425 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -293,6 +293,7 @@ impl CursorValues for StringViewArray { self.views().len() } + #[inline(always)] fn eq(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> bool { // SAFETY: Both l_idx and r_idx are guaranteed to be within bounds, // and any null-checks are handled in the outer layers. @@ -313,6 +314,7 @@ impl CursorValues for StringViewArray { unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx).is_eq() } } + #[inline(always)] fn eq_to_previous(cursor: &Self, idx: usize) -> bool { // SAFETY: The caller guarantees that idx > 0 and the indices are valid. // Already checked it in is_eq_to_prev_one function @@ -335,6 +337,7 @@ impl CursorValues for StringViewArray { } } + #[inline(always)] fn compare(l: &Self, l_idx: usize, r: &Self, r_idx: usize) -> Ordering { // SAFETY: Prior assertions guarantee that l_idx and r_idx are valid indices. // Null-checks are assumed to have been handled in the wrapper (e.g., ArrayValues). @@ -342,7 +345,11 @@ impl CursorValues for StringViewArray { if l.data_buffers().is_empty() && r.data_buffers().is_empty() { let l_view = unsafe { l.views().get_unchecked(l_idx) }; let r_view = unsafe { r.views().get_unchecked(r_idx) }; - return l_view.cmp(r_view); + let l_len = *l_view as u32; + let r_len = *r_view as u32; + let l_data = unsafe { StringViewArray::inline_value(l_view, l_len as usize) }; + let r_data = unsafe { StringViewArray::inline_value(r_view, r_len as usize) }; + return l_data.cmp(r_data); } unsafe { GenericByteViewArray::compare_unchecked(l, l_idx, r, r_idx) }