diff --git a/rust/arrow/benches/cast_kernels.rs b/rust/arrow/benches/cast_kernels.rs index b9e33cce978..81232e5fdc1 100644 --- a/rust/arrow/benches/cast_kernels.rs +++ b/rust/arrow/benches/cast_kernels.rs @@ -193,6 +193,9 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("cast utf8 to f32", |b| { b.iter(|| cast_array(&f32_utf8_array, DataType::Float32)) }); + c.bench_function("cast i64 to string 512", |b| { + b.iter(|| cast_array(&i64_array, DataType::Utf8)) + }); c.bench_function("cast timestamp_ms to i64 512", |b| { b.iter(|| cast_array(&time_ms_array, DataType::Int64)) diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index f1128769525..6725fc0926c 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -351,17 +351,13 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { Float32 => cast_bool_to_numeric::(array), Float64 => cast_bool_to_numeric::(array), Utf8 => { - let from = array.as_any().downcast_ref::().unwrap(); - let mut b = StringBuilder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append(false)?; - } else { - b.append_value(if from.value(i) { "1" } else { "0" })?; - } - } - - Ok(Arc::new(b.finish()) as ArrayRef) + let array = array.as_any().downcast_ref::().unwrap(); + Ok(Arc::new( + array + .iter() + .map(|value| value.map(|value| if value { "1" } else { "0" })) + .collect::(), + )) } _ => Err(ArrowError::ComputeError(format!( "Casting from {:?} to {:?} not supported", @@ -431,20 +427,15 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), Binary => { - let from = array.as_any().downcast_ref::().unwrap(); - let mut b = StringBuilder::new(array.len()); - for i in 0..array.len() { - if array.is_null(i) { - b.append_null()?; - } else { - match str::from_utf8(from.value(i)) { - Ok(s) => b.append_value(s)?, - Err(_) => b.append_null()?, // not valid UTF8 - } - } - } - - Ok(Arc::new(b.finish()) as ArrayRef) + let array = array.as_any().downcast_ref::().unwrap(); + Ok(Arc::new( + array + .iter() + .map(|maybe_value| { + maybe_value.and_then(|value| str::from_utf8(value).ok()) + }) + .collect::(), + )) } _ => Err(ArrowError::ComputeError(format!( "Casting from {:?} to {:?} not supported", @@ -892,31 +883,22 @@ where FROM: ArrowNumericType, FROM::Native: std::string::ToString, { - numeric_to_string_cast::( + Ok(Arc::new(numeric_to_string_cast::( array .as_any() .downcast_ref::>() .unwrap(), - ) - .map(|to| Arc::new(to) as ArrayRef) + ))) } -fn numeric_to_string_cast(from: &PrimitiveArray) -> Result +fn numeric_to_string_cast(from: &PrimitiveArray) -> StringArray where T: ArrowPrimitiveType + ArrowNumericType, T::Native: std::string::ToString, { - let mut b = StringBuilder::new(from.len()); - - for i in 0..from.len() { - if from.is_null(i) { - b.append(false)?; - } else { - b.append_value(&from.value(i).to_string())?; - } - } - - Ok(b.finish()) + from.iter() + .map(|maybe_value| maybe_value.map(|value| value.to_string())) + .collect() } /// Cast numeric types to Utf8 @@ -2714,11 +2696,8 @@ mod tests { fn test_cast_string_array_to_dict() { use DataType::*; - let mut builder = StringBuilder::new(10); - builder.append_value("one").unwrap(); - builder.append_null().unwrap(); - builder.append_value("three").unwrap(); - let array: ArrayRef = Arc::new(builder.finish()); + let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")])) + as ArrayRef; let expected = vec!["one", "null", "three"]; diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index ac779b0331c..3fca7b23e8c 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -51,7 +51,7 @@ use std::sync::Arc; use csv as csv_crate; -use crate::array::{ArrayRef, BooleanArray, PrimitiveArray, StringBuilder}; +use crate::array::{ArrayRef, BooleanArray, PrimitiveArray, StringArray}; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; @@ -449,16 +449,9 @@ fn parse( &DataType::Date64(_) => { build_primitive_array::(line_number, rows, i) } - &DataType::Utf8 => { - let mut builder = StringBuilder::new(rows.len()); - for row in rows.iter() { - match row.get(i) { - Some(s) => builder.append_value(s).unwrap(), - _ => builder.append(false).unwrap(), - } - } - Ok(Arc::new(builder.finish()) as ArrayRef) - } + &DataType::Utf8 => Ok(Arc::new( + rows.iter().map(|row| row.get(i)).collect::(), + ) as ArrayRef), other => Err(ArrowError::ParseError(format!( "Unsupported data type {:?}", other diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs index d43b02cee98..a4e6f932110 100644 --- a/rust/arrow/src/json/reader.rs +++ b/rust/arrow/src/json/reader.rs @@ -1115,21 +1115,14 @@ impl Decoder { t ))), }, - DataType::Utf8 => { - let mut builder = StringBuilder::new(rows.len()); - for row in rows { - if let Some(value) = row.get(field.name()) { - if let Some(str_v) = value.as_str() { - builder.append_value(str_v)? - } else { - builder.append(false)? - } - } else { - builder.append(false)? - } - } - Ok(Arc::new(builder.finish()) as ArrayRef) - } + DataType::Utf8 => Ok(Arc::new( + rows.iter() + .map(|row| { + let maybe_value = row.get(field.name()); + maybe_value.and_then(|value| value.as_str()) + }) + .collect::(), + ) as ArrayRef), DataType::List(ref list_field) => { match list_field.data_type() { DataType::Dictionary(ref key_ty, _) => {