From 98a7c8a3fc2e4948eb4ccc8c635bcd6dbb79b1fd Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Fri, 11 Sep 2020 06:41:00 +0200 Subject: [PATCH 1/2] Added min/max of StringArray --- rust/arrow/src/compute/kernels/aggregate.rs | 69 ++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs index bc2cadd53fd..c0ebcd786d0 100644 --- a/rust/arrow/src/compute/kernels/aggregate.rs +++ b/rust/arrow/src/compute/kernels/aggregate.rs @@ -19,9 +19,42 @@ use std::ops::Add; -use crate::array::{Array, PrimitiveArray}; +use crate::array::{Array, LargeStringArray, PrimitiveArray, StringArray}; use crate::datatypes::ArrowNumericType; +/// Helper macro to perform min/max of strings +macro_rules! min_max_string_helper { + ($array:expr, $cmp:tt) => {{ + let null_count = $array.null_count(); + + if null_count == $array.len() { + return None + } + let mut n = ""; + let mut has_value = false; + let data = $array.data(); + + if null_count == 0 { + for i in 0..data.len() { + let item = $array.value(i); + if !has_value || (&n $cmp &item) { + has_value = true; + n = item; + } + } + } else { + for i in 0..data.len() { + let item = $array.value(i); + if !has_value || data.is_valid(i) && (&n $cmp &item) { + has_value = true; + n = item; + } + } + } + Some(n) + }} +} + /// Returns the minimum value in the array, according to the natural order. pub fn min(array: &PrimitiveArray) -> Option where @@ -38,6 +71,26 @@ where min_max_helper(array, |a, b| a < b) } +/// Returns the maximum value in the string array, according to the natural order. +pub fn max_string(array: &StringArray) -> Option<&str> { + min_max_string_helper!(array, <) +} + +/// Returns the minimum value in the string array, according to the natural order. +pub fn min_string(array: &StringArray) -> Option<&str> { + min_max_string_helper!(array, >) +} + +/// Returns the minimum value in the string array, according to the natural order. +pub fn max_large_string(array: &LargeStringArray) -> Option<&str> { + min_max_string_helper!(array, <) +} + +/// Returns the minimum value in the string array, according to the natural order. +pub fn min_large_string(array: &LargeStringArray) -> Option<&str> { + min_max_string_helper!(array, >) +} + /// Helper function to perform min/max lambda function on values from a numeric array. fn min_max_helper(array: &PrimitiveArray, cmp: F) -> Option where @@ -149,4 +202,18 @@ mod tests { assert_eq!(5, min(&a).unwrap()); assert_eq!(9, max(&a).unwrap()); } + + #[test] + fn test_string_min_max_with_nulls() { + let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]); + assert_eq!("a", min_string(&a).unwrap()); + assert_eq!("c", max_string(&a).unwrap()); + } + + #[test] + fn test_string_min_max_all_nulls() { + let a = StringArray::from(vec![None, None]); + assert_eq!(None, min_string(&a)); + assert_eq!(None, max_string(&a)); + } } From 12fe1a6666847c799eb58fd8d378b2750d9206c4 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 12 Sep 2020 19:25:33 +0200 Subject: [PATCH 2/2] Fixed error in computing min/max with null entries. --- rust/arrow/src/compute/kernels/aggregate.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs index c0ebcd786d0..fcb854bc3b5 100644 --- a/rust/arrow/src/compute/kernels/aggregate.rs +++ b/rust/arrow/src/compute/kernels/aggregate.rs @@ -45,7 +45,7 @@ macro_rules! min_max_string_helper { } else { for i in 0..data.len() { let item = $array.value(i); - if !has_value || data.is_valid(i) && (&n $cmp &item) { + if data.is_valid(i) && (!has_value || (&n $cmp &item)) { has_value = true; n = item; } @@ -118,7 +118,7 @@ where } } else { for (i, item) in m.iter().enumerate() { - if !has_value || data.is_valid(i) && cmp(&n, item) { + if data.is_valid(i) && (!has_value || cmp(&n, item)) { has_value = true; n = *item } @@ -203,6 +203,13 @@ mod tests { assert_eq!(9, max(&a).unwrap()); } + #[test] + fn test_buffer_min_max_1() { + let a = Int32Array::from(vec![None, None, Some(5), Some(2)]); + assert_eq!(Some(2), min(&a)); + assert_eq!(Some(5), max(&a)); + } + #[test] fn test_string_min_max_with_nulls() { let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]); @@ -216,4 +223,11 @@ mod tests { assert_eq!(None, min_string(&a)); assert_eq!(None, max_string(&a)); } + + #[test] + fn test_string_min_max_1() { + let a = StringArray::from(vec![None, None, Some("b"), Some("a")]); + assert_eq!(Some("a"), min_string(&a)); + assert_eq!(Some("b"), max_string(&a)); + } }