Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions rust/arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,27 +259,27 @@ impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
DataType::Date32(_) | DataType::Date64(_) => {
let v = self.value(index).to_usize().unwrap() as i64;
let v = self.values()[index].to_usize().unwrap() as i64;
match as_date::<T>(v) {
Some(date) => write!(f, "{:?}", date),
None => write!(f, "null"),
}
}
DataType::Time32(_) | DataType::Time64(_) => {
let v = self.value(index).to_usize().unwrap() as i64;
let v = self.values()[index].to_usize().unwrap() as i64;
match as_time::<T>(v) {
Some(time) => write!(f, "{:?}", time),
None => write!(f, "null"),
}
}
DataType::Timestamp(_, _) => {
let v = self.value(index).to_usize().unwrap() as i64;
let v = self.values()[index].to_usize().unwrap() as i64;
match as_datetime::<T>(v) {
Some(datetime) => write!(f, "{:?}", datetime),
None => write!(f, "null"),
}
}
_ => fmt::Debug::fmt(&array.value(index), f),
_ => fmt::Debug::fmt(&array.values()[index], f),
})?;
write!(f, "]")
}
Expand Down
15 changes: 8 additions & 7 deletions rust/arrow/src/array/equal_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ pub trait JsonEqual {
impl<T: ArrowPrimitiveType> JsonEqual for PrimitiveArray<T> {
fn equals_json(&self, json: &[&Value]) -> bool {
self.len() == json.len()
&& (0..self.len()).all(|i| match json[i] {
Value::Null => self.is_null(i),
v => {
self.is_valid(i)
&& Some(v) == self.value(i).into_json_value().as_ref()
}
})
&& json
.iter()
.zip(self.iter())
.all(|(lhs, rhs)| match (lhs, rhs) {
(Value::Null, None) => true,
(lhs, Some(rhs)) => Some(*lhs) == rhs.into_json_value().as_ref(),
_ => false,
})
}
}

Expand Down
16 changes: 10 additions & 6 deletions rust/arrow/src/array/ord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ where
{
let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
Box::new(move |i, j| left.value(i).cmp(&right.value(j)))
let left = left.values();
let right = right.values();
Box::new(move |i, j| left[i].cmp(&right[j]))
}

fn compare_boolean<'a>(left: &'a Array, right: &'a Array) -> DynComparator<'a> {
Expand All @@ -66,7 +68,9 @@ where
{
let left = left.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
let right = right.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
Box::new(move |i, j| cmp_nans_last(&left.value(i), &right.value(j)))
let left = left.values();
let right = right.values();
Box::new(move |i, j| cmp_nans_last(&left[i], &right[j]))
Comment on lines +71 to +73
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will be slower, but at this point we have no guarantee that i and j are within bounds.

}

fn compare_string<'a, T>(left: &'a Array, right: &'a Array) -> DynComparator<'a>
Expand All @@ -90,15 +94,15 @@ where
{
let left = left.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
let right = right.as_any().downcast_ref::<DictionaryArray<T>>().unwrap();
let left_keys = left.keys_array();
let right_keys = right.keys_array();
let left_keys = left.keys().values();
let right_keys = right.keys().values();

let left_values = StringArray::from(left.values().data());
let right_values = StringArray::from(left.values().data());

Box::new(move |i: usize, j: usize| {
let key_left = left_keys.value(i).to_usize().unwrap();
let key_right = right_keys.value(j).to_usize().unwrap();
let key_left = left_keys[i].to_usize().unwrap();
let key_right = right_keys[j].to_usize().unwrap();
let left = left_values.value(key_left);
let right = right_values.value(key_right);
left.cmp(&right)
Expand Down
113 changes: 45 additions & 68 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -569,29 +569,27 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
(Time64(_), Int64) => cast_array_data::<Int64Type>(array, to_type.clone()),
(Date32(DateUnit::Day), Date64(DateUnit::Millisecond)) => {
let date_array = array.as_any().downcast_ref::<Date32Array>().unwrap();
let mut b = Date64Builder::new(array.len());
for i in 0..array.len() {
if array.is_null(i) {
b.append_null()?;
} else {
b.append_value(date_array.value(i) as i64 * MILLISECONDS_IN_DAY)?;
}
}

Ok(Arc::new(b.finish()) as ArrayRef)
// todo: can be optimized by computing this for the whole values buffer and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can open a JIRA to track this. I think 'unary' can also work (or was this before the cast improvement?)

// cloning the null buffer
let values = date_array
.iter()
.map(|x| x.map(|x| x as i64 * MILLISECONDS_IN_DAY))
.collect::<Date64Array>();

Ok(Arc::new(values) as ArrayRef)
}
(Date64(DateUnit::Millisecond), Date32(DateUnit::Day)) => {
let date_array = array.as_any().downcast_ref::<Date64Array>().unwrap();
let mut b = Date32Builder::new(array.len());
for i in 0..array.len() {
if array.is_null(i) {
b.append_null()?;
} else {
b.append_value((date_array.value(i) / MILLISECONDS_IN_DAY) as i32)?;
}
}

Ok(Arc::new(b.finish()) as ArrayRef)
// todo: can be optimized by computing this for the whole values buffer and
// cloning the null buffer
let values = date_array
.iter()
.map(|x| x.map(|x| (x / MILLISECONDS_IN_DAY) as i32))
.collect::<Date32Array>();

Ok(Arc::new(values) as ArrayRef)
}
(Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
let time_array = Time32MillisecondArray::from(array.data());
Expand Down Expand Up @@ -652,33 +650,20 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
let divisor = from_size / to_size;
match to_unit {
TimeUnit::Second => {
let mut b = Time32SecondBuilder::new(array.len());
for i in 0..array.len() {
if array.is_null(i) {
b.append_null()?;
} else {
b.append_value(
(time_array.value(i) as i64 / divisor) as i32,
)?;
}
}
let array = time_array
.iter()
.map(|x| x.map(|x| (x as i64 / divisor) as i32))
.collect::<Time32SecondArray>();

Ok(Arc::new(b.finish()) as ArrayRef)
Ok(Arc::new(array) as ArrayRef)
}
TimeUnit::Millisecond => {
// currently can't dedup this builder [ARROW-4164]
let mut b = Time32MillisecondBuilder::new(array.len());
for i in 0..array.len() {
if array.is_null(i) {
b.append_null()?;
} else {
b.append_value(
(time_array.value(i) as i64 / divisor) as i32,
)?;
}
}
let array = time_array
.iter()
.map(|x| x.map(|x| (x as i64 / divisor) as i32))
.collect::<Time32MillisecondArray>();

Ok(Arc::new(b.finish()) as ArrayRef)
Ok(Arc::new(array) as ArrayRef)
}
_ => unreachable!("array type not supported"),
}
Expand Down Expand Up @@ -739,16 +724,13 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
(Timestamp(from_unit, _), Date32(_)) => {
let time_array = Int64Array::from(array.data());
let from_size = time_unit_multiple(&from_unit) * SECONDS_IN_DAY;
let mut b = Date32Builder::new(array.len());
for i in 0..array.len() {
if array.is_null(i) {
b.append_null()?;
} else {
b.append_value((time_array.value(i) / from_size) as i32)?;
}
}

Ok(Arc::new(b.finish()) as ArrayRef)
let array = time_array
.iter()
.map(|x| x.map(|x| (x / from_size) as i32))
.collect::<Date32Array>();

Ok(Arc::new(array) as ArrayRef)
}
(Timestamp(from_unit, _), Date64(_)) => {
let from_size = time_unit_multiple(&from_unit);
Expand Down Expand Up @@ -947,19 +929,10 @@ fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowPrimitiveType + ArrowNumericType,
{
let mut b = BooleanBuilder::new(from.len());

for i in 0..from.len() {
if from.is_null(i) {
b.append_null()?;
} else if from.value(i) != T::default_value() {
b.append_value(true)?;
} else {
b.append_value(false)?;
}
}

Ok(b.finish())
Ok(from
.iter()
.map(|x| x.map(|x| x != T::Native::default()))
.collect())
}

/// Cast Boolean types to numeric
Expand Down Expand Up @@ -1153,13 +1126,17 @@ where
let mut b = PrimitiveDictionaryBuilder::new(keys_builder, values_builder);

// copy each element one at a time
for i in 0..values.len() {
if values.is_null(i) {
b.append_null()?;
} else {
b.append(values.value(i))?;
for value in values {
match value {
Some(v) => {
b.append(v)?;
}
None => {
b.append_null()?;
}
}
}

Ok(Arc::new(b.finish()))
}

Expand Down