diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 984454ce779..46d3f2adfcc 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -259,27 +259,27 @@ impl fmt::Debug for PrimitiveArray { write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?; print_long_array(self, f, |array, index, f| match T::DATA_TYPE { DataType::Date32(_) | DataType::Date64(_) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.values()[index].to_usize().unwrap() as i64; match as_date::(v) { Some(date) => write!(f, "{:?}", date), None => write!(f, "null"), } } DataType::Time32(_) | DataType::Time64(_) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.values()[index].to_usize().unwrap() as i64; match as_time::(v) { Some(time) => write!(f, "{:?}", time), None => write!(f, "null"), } } DataType::Timestamp(_, _) => { - let v = self.value(index).to_usize().unwrap() as i64; + let v = self.values()[index].to_usize().unwrap() as i64; match as_datetime::(v) { Some(datetime) => write!(f, "{:?}", datetime), None => write!(f, "null"), } } - _ => fmt::Debug::fmt(&array.value(index), f), + _ => fmt::Debug::fmt(&array.values()[index], f), })?; write!(f, "]") } diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs index 043174b9ac8..65c8f116678 100644 --- a/rust/arrow/src/array/equal_json.rs +++ b/rust/arrow/src/array/equal_json.rs @@ -39,13 +39,14 @@ pub trait JsonEqual { impl JsonEqual for PrimitiveArray { fn equals_json(&self, json: &[&Value]) -> bool { self.len() == json.len() - && (0..self.len()).all(|i| match json[i] { - Value::Null => self.is_null(i), - v => { - self.is_valid(i) - && Some(v) == self.value(i).into_json_value().as_ref() - } - }) + && json + .iter() + .zip(self.iter()) + .all(|(lhs, rhs)| match (lhs, rhs) { + (Value::Null, None) => true, + (lhs, Some(rhs)) => Some(*lhs) == rhs.into_json_value().as_ref(), + _ => false, + }) } } diff --git a/rust/arrow/src/array/ord.rs b/rust/arrow/src/array/ord.rs index 358b5f1d18d..53ab7c28fbf 100644 --- a/rust/arrow/src/array/ord.rs +++ b/rust/arrow/src/array/ord.rs @@ -48,7 +48,9 @@ where { let left = left.as_any().downcast_ref::>().unwrap(); let right = right.as_any().downcast_ref::>().unwrap(); - Box::new(move |i, j| left.value(i).cmp(&right.value(j))) + let left = left.values(); + let right = right.values(); + Box::new(move |i, j| left[i].cmp(&right[j])) } fn compare_boolean<'a>(left: &'a Array, right: &'a Array) -> DynComparator<'a> { @@ -66,7 +68,9 @@ where { let left = left.as_any().downcast_ref::>().unwrap(); let right = right.as_any().downcast_ref::>().unwrap(); - Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j))) + let left = left.values(); + let right = right.values(); + Box::new(move |i, j| cmp_nans_last(&left[i], &right[j])) } fn compare_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a> @@ -90,15 +94,15 @@ where { let left = left.as_any().downcast_ref::>().unwrap(); let right = right.as_any().downcast_ref::>().unwrap(); - let left_keys = left.keys_array(); - let right_keys = right.keys_array(); + let left_keys = left.keys().values(); + let right_keys = right.keys().values(); let left_values = StringArray::from(left.values().data()); let right_values = StringArray::from(left.values().data()); Box::new(move |i: usize, j: usize| { - let key_left = left_keys.value(i).to_usize().unwrap(); - let key_right = right_keys.value(j).to_usize().unwrap(); + let key_left = left_keys[i].to_usize().unwrap(); + let key_right = right_keys[j].to_usize().unwrap(); let left = left_values.value(key_left); let right = right_values.value(key_right); left.cmp(&right) diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index e2e29620cc5..33549179644 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -569,29 +569,27 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Time64(_), Int64) => cast_array_data::(array, to_type.clone()), (Date32(DateUnit::Day), Date64(DateUnit::Millisecond)) => { let date_array = array.as_any().downcast_ref::().unwrap(); - let mut b = Date64Builder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - b.append_value(date_array.value(i) as i64 * MILLISECONDS_IN_DAY)?; - } - } - Ok(Arc::new(b.finish()) as ArrayRef) + // todo: can be optimized by computing this for the whole values buffer and + // cloning the null buffer + let values = date_array + .iter() + .map(|x| x.map(|x| x as i64 * MILLISECONDS_IN_DAY)) + .collect::(); + + Ok(Arc::new(values) as ArrayRef) } (Date64(DateUnit::Millisecond), Date32(DateUnit::Day)) => { let date_array = array.as_any().downcast_ref::().unwrap(); - let mut b = Date32Builder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - b.append_value((date_array.value(i) / MILLISECONDS_IN_DAY) as i32)?; - } - } - Ok(Arc::new(b.finish()) as ArrayRef) + // todo: can be optimized by computing this for the whole values buffer and + // cloning the null buffer + let values = date_array + .iter() + .map(|x| x.map(|x| (x / MILLISECONDS_IN_DAY) as i32)) + .collect::(); + + Ok(Arc::new(values) as ArrayRef) } (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => { let time_array = Time32MillisecondArray::from(array.data()); @@ -652,33 +650,20 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { let divisor = from_size / to_size; match to_unit { TimeUnit::Second => { - let mut b = Time32SecondBuilder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - b.append_value( - (time_array.value(i) as i64 / divisor) as i32, - )?; - } - } + let array = time_array + .iter() + .map(|x| x.map(|x| (x as i64 / divisor) as i32)) + .collect::(); - Ok(Arc::new(b.finish()) as ArrayRef) + Ok(Arc::new(array) as ArrayRef) } TimeUnit::Millisecond => { - // currently can't dedup this builder [ARROW-4164] - let mut b = Time32MillisecondBuilder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - b.append_value( - (time_array.value(i) as i64 / divisor) as i32, - )?; - } - } + let array = time_array + .iter() + .map(|x| x.map(|x| (x as i64 / divisor) as i32)) + .collect::(); - Ok(Arc::new(b.finish()) as ArrayRef) + Ok(Arc::new(array) as ArrayRef) } _ => unreachable!("array type not supported"), } @@ -739,16 +724,13 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Timestamp(from_unit, _), Date32(_)) => { let time_array = Int64Array::from(array.data()); let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY; - let mut b = Date32Builder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - b.append_value((time_array.value(i) / from_size) as i32)?; - } - } - Ok(Arc::new(b.finish()) as ArrayRef) + let array = time_array + .iter() + .map(|x| x.map(|x| (x / from_size) as i32)) + .collect::(); + + Ok(Arc::new(array) as ArrayRef) } (Timestamp(from_unit, _), Date64(_)) => { let from_size = time_unit_multiple(&from_unit); @@ -947,19 +929,10 @@ fn numeric_to_bool_cast(from: &PrimitiveArray) -> Result where T: ArrowPrimitiveType + ArrowNumericType, { - let mut b = BooleanBuilder::new(from.len()); - - for i in 0..from.len() { - if from.is_null(i) { - b.append_null()?; - } else if from.value(i) != T::default_value() { - b.append_value(true)?; - } else { - b.append_value(false)?; - } - } - - Ok(b.finish()) + Ok(from + .iter() + .map(|x| x.map(|x| x != T::Native::default())) + .collect()) } /// Cast Boolean types to numeric @@ -1153,13 +1126,17 @@ where let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder); // copy each element one at a time - for i in 0..values.len() { - if values.is_null(i) { - b.append_null()?; - } else { - b.append(values.value(i))?; + for value in values { + match value { + Some(v) => { + b.append(v)?; + } + None => { + b.append_null()?; + } } } + Ok(Arc::new(b.finish())) }