From 1623811fab071f1b967970f7985d1a1e7557fd5e Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sat, 28 Oct 2023 18:25:44 +0800 Subject: [PATCH 1/5] Refactor align_array_dimensions Signed-off-by: jayzhan211 --- .../physical-expr/src/array_expressions.rs | 85 ++++++++++++++----- 1 file changed, 65 insertions(+), 20 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 18d8c60fe7bd3..0dfd311c9f1ff 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -725,35 +725,39 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { } fn align_array_dimensions(args: Vec) -> Result> { - // Find the maximum number of dimensions - let max_ndim: u64 = (*args - .iter() - .map(|arr| compute_array_ndims(Some(arr.clone()))) - .collect::>>>()? - .iter() - .max() - .unwrap()) - .unwrap(); + let mut args_ndim = vec![]; + for arg in args.iter() { + let ndim = compute_array_ndims(Some(arg.to_owned()))?; + if let Some(ndim) = ndim { + args_ndim.push(ndim); + } else { + return internal_err!("args should not be empty"); + } + } + + let max_ndim = args_ndim.iter().max(); + let max_ndim = if let Some(max_ndim) = max_ndim { + max_ndim + } else { + return internal_err!("args_ndim should not be empty"); + }; // Align the dimensions of the arrays let aligned_args: Result> = args .into_iter() - .map(|array| { - let ndim = compute_array_ndims(Some(array.clone()))?.unwrap(); + .zip(args_ndim.iter()) + .map(|(array, ndim)| { if ndim < max_ndim { let mut aligned_array = array.clone(); for _ in 0..(max_ndim - ndim) { - let data_type = aligned_array.as_ref().data_type().clone(); - let offsets: Vec = - (0..downcast_arg!(aligned_array, ListArray).offsets().len()) - .map(|i| i as i32) - .collect(); - let field = Arc::new(Field::new("item", data_type, true)); + let data_type = aligned_array.data_type().to_owned(); + let array_lengths = vec![1; aligned_array.len()]; + let offsets = OffsetBuffer::::from_lengths(array_lengths); aligned_array = Arc::new(ListArray::try_new( - field, - OffsetBuffer::new(offsets.into()), - Arc::new(aligned_array.clone()), + Arc::new(Field::new("item", data_type, true)), + offsets, + aligned_array, None, )?) } @@ -1923,6 +1927,47 @@ mod tests { use arrow::datatypes::Int64Type; use datafusion_common::cast::as_uint64_array; + #[test] + fn test_align_array_dimensions() { + let array1d_1 = + Arc::new(ListArray::from_iter_primitive::(vec![ + Some(vec![Some(1), Some(2), Some(3)]), + Some(vec![Some(4), Some(5)]), + ])); + let array1d_2 = + Arc::new(ListArray::from_iter_primitive::(vec![ + Some(vec![Some(6), Some(7), Some(8)]), + ])); + + let array2d_1 = Arc::new(wrap_into_list_array(array1d_1.clone())) as ArrayRef; + let array2d_2 = Arc::new(wrap_into_list_array(array1d_2.clone())) as ArrayRef; + + let res = + align_array_dimensions(vec![array1d_1.to_owned(), array2d_2.to_owned()]) + .expect("should not error"); + + let expected = as_list_array(&array2d_1).unwrap(); + let expected_dim = compute_array_ndims(Some(array2d_1.to_owned())).unwrap(); + assert_ne!(as_list_array(&res[0]).unwrap(), expected); + assert_eq!( + compute_array_ndims(Some(res[0].clone())).unwrap(), + expected_dim + ); + + let array3d_1 = Arc::new(wrap_into_list_array(array2d_1)) as ArrayRef; + let array3d_2 = wrap_into_list_array(array2d_2.to_owned()); + let res = align_array_dimensions(vec![array1d_1, Arc::new(array3d_2.clone())]) + .expect("should not error"); + + let expected = as_list_array(&array3d_1).unwrap(); + let expected_dim = compute_array_ndims(Some(array3d_1.to_owned())).unwrap(); + assert_ne!(as_list_array(&res[0]).unwrap(), expected); + assert_eq!( + compute_array_ndims(Some(res[0].clone())).unwrap(), + expected_dim + ); + } + #[test] fn test_array() { // make_array(1, 2, 3) = [1, 2, 3] From a9919f0711016b4f75cc9900daec6c2fce75dad1 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sun, 29 Oct 2023 07:49:28 +0800 Subject: [PATCH 2/5] address comment Signed-off-by: jayzhan211 --- .../physical-expr/src/array_expressions.rs | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 0dfd311c9f1ff..a8a6d12ed9d51 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -726,28 +726,30 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { fn align_array_dimensions(args: Vec) -> Result> { let mut args_ndim = vec![]; + let mut max_ndim = None; for arg in args.iter() { let ndim = compute_array_ndims(Some(arg.to_owned()))?; if let Some(ndim) = ndim { args_ndim.push(ndim); + if max_ndim.is_none() || ndim > max_ndim.unwrap() { + max_ndim = Some(ndim); + } } else { return internal_err!("args should not be empty"); } } - let max_ndim = args_ndim.iter().max(); - let max_ndim = if let Some(max_ndim) = max_ndim { - max_ndim - } else { - return internal_err!("args_ndim should not be empty"); - }; + if max_ndim.is_none() { + return internal_err!("args should not be empty"); + } + let max_ndim = max_ndim.unwrap(); // Align the dimensions of the arrays let aligned_args: Result> = args .into_iter() .zip(args_ndim.iter()) .map(|(array, ndim)| { - if ndim < max_ndim { + if ndim < &max_ndim { let mut aligned_array = array.clone(); for _ in 0..(max_ndim - ndim) { let data_type = aligned_array.data_type().to_owned(); @@ -1944,7 +1946,7 @@ mod tests { let res = align_array_dimensions(vec![array1d_1.to_owned(), array2d_2.to_owned()]) - .expect("should not error"); + .unwrap(); let expected = as_list_array(&array2d_1).unwrap(); let expected_dim = compute_array_ndims(Some(array2d_1.to_owned())).unwrap(); @@ -1956,8 +1958,8 @@ mod tests { let array3d_1 = Arc::new(wrap_into_list_array(array2d_1)) as ArrayRef; let array3d_2 = wrap_into_list_array(array2d_2.to_owned()); - let res = align_array_dimensions(vec![array1d_1, Arc::new(array3d_2.clone())]) - .expect("should not error"); + let res = + align_array_dimensions(vec![array1d_1, Arc::new(array3d_2.clone())]).unwrap(); let expected = as_list_array(&array3d_1).unwrap(); let expected_dim = compute_array_ndims(Some(array3d_1.to_owned())).unwrap(); From 7af58ac10b6c66f7f35c4523a67155209785bf79 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Sun, 29 Oct 2023 20:33:13 +0800 Subject: [PATCH 3/5] remove unwrap Signed-off-by: jayzhan211 --- datafusion/physical-expr/src/array_expressions.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index a8a6d12ed9d51..5f77b28543c60 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -729,9 +729,13 @@ fn align_array_dimensions(args: Vec) -> Result> { let mut max_ndim = None; for arg in args.iter() { let ndim = compute_array_ndims(Some(arg.to_owned()))?; + if let Some(ndim) = ndim { args_ndim.push(ndim); - if max_ndim.is_none() || ndim > max_ndim.unwrap() { + + if let Some(current_max) = max_ndim { + max_ndim = Some(std::cmp::max(current_max, ndim)); + } else { max_ndim = Some(ndim); } } else { @@ -739,10 +743,11 @@ fn align_array_dimensions(args: Vec) -> Result> { } } - if max_ndim.is_none() { + let max_ndim = if let Some(max_ndim) = max_ndim { + max_ndim + } else { return internal_err!("args should not be empty"); - } - let max_ndim = max_ndim.unwrap(); + }; // Align the dimensions of the arrays let aligned_args: Result> = args From 04167d9fd0deab26b1ef21ecc711679002f15fdf Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Thu, 2 Nov 2023 09:13:21 +0800 Subject: [PATCH 4/5] address comment Signed-off-by: jayzhan211 --- .../physical-expr/src/array_expressions.rs | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 5f77b28543c60..b26fe51164c15 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -725,36 +725,21 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result { } fn align_array_dimensions(args: Vec) -> Result> { - let mut args_ndim = vec![]; - let mut max_ndim = None; - for arg in args.iter() { - let ndim = compute_array_ndims(Some(arg.to_owned()))?; - - if let Some(ndim) = ndim { - args_ndim.push(ndim); - - if let Some(current_max) = max_ndim { - max_ndim = Some(std::cmp::max(current_max, ndim)); - } else { - max_ndim = Some(ndim); - } - } else { - return internal_err!("args should not be empty"); - } - } - - let max_ndim = if let Some(max_ndim) = max_ndim { - max_ndim - } else { - return internal_err!("args should not be empty"); - }; + let args_ndim = args + .iter() + .map(|arg| compute_array_ndims(Some(arg.to_owned()))) + .collect::>>()? + .into_iter() + .map(|x| x.unwrap_or(0)) + .collect::>(); + let max_ndim = args_ndim.iter().max().unwrap_or(&0); // Align the dimensions of the arrays let aligned_args: Result> = args .into_iter() .zip(args_ndim.iter()) .map(|(array, ndim)| { - if ndim < &max_ndim { + if ndim < max_ndim { let mut aligned_array = array.clone(); for _ in 0..(max_ndim - ndim) { let data_type = aligned_array.data_type().to_owned(); From 31014a8a6ecf2a1f4d38ede72970bd9291601802 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Thu, 2 Nov 2023 09:20:24 +0800 Subject: [PATCH 5/5] fix rebase Signed-off-by: jayzhan211 --- datafusion/physical-expr/src/array_expressions.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index b26fe51164c15..687502e79fed4 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -1931,8 +1931,8 @@ mod tests { Some(vec![Some(6), Some(7), Some(8)]), ])); - let array2d_1 = Arc::new(wrap_into_list_array(array1d_1.clone())) as ArrayRef; - let array2d_2 = Arc::new(wrap_into_list_array(array1d_2.clone())) as ArrayRef; + let array2d_1 = Arc::new(array_into_list_array(array1d_1.clone())) as ArrayRef; + let array2d_2 = Arc::new(array_into_list_array(array1d_2.clone())) as ArrayRef; let res = align_array_dimensions(vec![array1d_1.to_owned(), array2d_2.to_owned()]) @@ -1946,8 +1946,8 @@ mod tests { expected_dim ); - let array3d_1 = Arc::new(wrap_into_list_array(array2d_1)) as ArrayRef; - let array3d_2 = wrap_into_list_array(array2d_2.to_owned()); + let array3d_1 = Arc::new(array_into_list_array(array2d_1)) as ArrayRef; + let array3d_2 = array_into_list_array(array2d_2.to_owned()); let res = align_array_dimensions(vec![array1d_1, Arc::new(array3d_2.clone())]).unwrap();