diff --git a/rust/arrow/src/util/display.rs b/rust/arrow/src/util/display.rs new file mode 100644 index 00000000000..bf0cade562f --- /dev/null +++ b/rust/arrow/src/util/display.rs @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Functions for printing array values, as strings, for debugging +//! purposes. See the `pretty` crate for additional functions for +//! record batch pretty printing. + +use crate::array; +use crate::array::{Array, PrimitiveArrayOps}; +use crate::datatypes::{ + ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, + Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; + +use array::DictionaryArray; + +use crate::error::{ArrowError, Result}; + +macro_rules! make_string { + ($array_type:ty, $column: ident, $row: ident) => {{ + let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); + + let s = if array.is_null($row) { + "".to_string() + } else { + array.value($row).to_string() + }; + + Ok(s) + }}; +} + +/// Get the value at the given row in an array as a String. +/// +/// Note this function is quite inefficient and is unlikely to be +/// suitable for converting large arrays or record batches. +pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result { + match column.data_type() { + DataType::Utf8 => make_string!(array::StringArray, column, row), + DataType::Boolean => make_string!(array::BooleanArray, column, row), + DataType::Int8 => make_string!(array::Int8Array, column, row), + DataType::Int16 => make_string!(array::Int16Array, column, row), + DataType::Int32 => make_string!(array::Int32Array, column, row), + DataType::Int64 => make_string!(array::Int64Array, column, row), + DataType::UInt8 => make_string!(array::UInt8Array, column, row), + DataType::UInt16 => make_string!(array::UInt16Array, column, row), + DataType::UInt32 => make_string!(array::UInt32Array, column, row), + DataType::UInt64 => make_string!(array::UInt64Array, column, row), + DataType::Float16 => make_string!(array::Float32Array, column, row), + DataType::Float32 => make_string!(array::Float32Array, column, row), + DataType::Float64 => make_string!(array::Float64Array, column, row), + DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { + make_string!(array::TimestampSecondArray, column, row) + } + DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => { + make_string!(array::TimestampMillisecondArray, column, row) + } + DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => { + make_string!(array::TimestampMicrosecondArray, column, row) + } + DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => { + make_string!(array::TimestampNanosecondArray, column, row) + } + DataType::Date32(_) => make_string!(array::Date32Array, column, row), + DataType::Date64(_) => make_string!(array::Date64Array, column, row), + DataType::Time32(unit) if *unit == TimeUnit::Second => { + make_string!(array::Time32SecondArray, column, row) + } + DataType::Time32(unit) if *unit == TimeUnit::Millisecond => { + make_string!(array::Time32MillisecondArray, column, row) + } + DataType::Time32(unit) if *unit == TimeUnit::Microsecond => { + make_string!(array::Time64MicrosecondArray, column, row) + } + DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { + make_string!(array::Time64NanosecondArray, column, row) + } + DataType::Dictionary(index_type, _value_type) => match **index_type { + DataType::Int8 => dict_array_value_to_string::(column, row), + DataType::Int16 => dict_array_value_to_string::(column, row), + DataType::Int32 => dict_array_value_to_string::(column, row), + DataType::Int64 => dict_array_value_to_string::(column, row), + DataType::UInt8 => dict_array_value_to_string::(column, row), + DataType::UInt16 => dict_array_value_to_string::(column, row), + DataType::UInt32 => dict_array_value_to_string::(column, row), + DataType::UInt64 => dict_array_value_to_string::(column, row), + _ => Err(ArrowError::InvalidArgumentError(format!( + "Pretty printing not supported for {:?} due to index type", + column.data_type() + ))), + }, + _ => Err(ArrowError::InvalidArgumentError(format!( + "Pretty printing not implemented for {:?} type", + column.data_type() + ))), + } +} + +/// Converts the value of the dictionary array at `row` to a String +fn dict_array_value_to_string( + colum: &array::ArrayRef, + row: usize, +) -> Result { + let dict_array = colum.as_any().downcast_ref::>().unwrap(); + + let keys_array = dict_array.keys_array(); + + if keys_array.is_null(row) { + return Ok(String::from("")); + } + + let dict_index = keys_array.value(row).to_usize().ok_or_else(|| { + ArrowError::InvalidArgumentError(format!( + "Can not convert value {:?} at index {:?} to usize for string conversion.", + keys_array.value(row), + row + )) + })?; + + array_value_to_string(&dict_array.values(), dict_index) +} diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs index f3bcc23a1d4..ea1409d8b37 100644 --- a/rust/arrow/src/util/mod.rs +++ b/rust/arrow/src/util/mod.rs @@ -16,6 +16,7 @@ // under the License. pub mod bit_util; +pub mod display; pub mod integration_util; #[cfg(feature = "prettyprint")] pub mod pretty; diff --git a/rust/arrow/src/util/pretty.rs b/rust/arrow/src/util/pretty.rs index b881c3ae25d..7eacba3c1b5 100644 --- a/rust/arrow/src/util/pretty.rs +++ b/rust/arrow/src/util/pretty.rs @@ -15,21 +15,17 @@ // specific language governing permissions and limitations // under the License. -//! Utilities for printing record batches +//! Utilities for printing record batches. Note this module is not +//! available unless `feature = "prettyprint"` is enabled. -use crate::array; -use crate::array::{Array, PrimitiveArrayOps}; -use crate::datatypes::{ - ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, - Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, -}; use crate::record_batch::RecordBatch; -use array::DictionaryArray; use prettytable::format; use prettytable::{Cell, Row, Table}; -use crate::error::{ArrowError, Result}; +use crate::error::Result; + +use super::display::array_value_to_string; ///! Create a visual representation of record batches pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { @@ -73,113 +69,12 @@ fn create_table(results: &[RecordBatch]) -> Result { Ok(table) } -macro_rules! make_string { - ($array_type:ty, $column: ident, $row: ident) => {{ - let array = $column.as_any().downcast_ref::<$array_type>().unwrap(); - - let s = if array.is_null($row) { - "".to_string() - } else { - array.value($row).to_string() - }; - - Ok(s) - }}; -} - -/// Get the value at the given row in an array as a String -pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result { - match column.data_type() { - DataType::Utf8 => make_string!(array::StringArray, column, row), - DataType::Boolean => make_string!(array::BooleanArray, column, row), - DataType::Int8 => make_string!(array::Int8Array, column, row), - DataType::Int16 => make_string!(array::Int16Array, column, row), - DataType::Int32 => make_string!(array::Int32Array, column, row), - DataType::Int64 => make_string!(array::Int64Array, column, row), - DataType::UInt8 => make_string!(array::UInt8Array, column, row), - DataType::UInt16 => make_string!(array::UInt16Array, column, row), - DataType::UInt32 => make_string!(array::UInt32Array, column, row), - DataType::UInt64 => make_string!(array::UInt64Array, column, row), - DataType::Float16 => make_string!(array::Float32Array, column, row), - DataType::Float32 => make_string!(array::Float32Array, column, row), - DataType::Float64 => make_string!(array::Float64Array, column, row), - DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => { - make_string!(array::TimestampSecondArray, column, row) - } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => { - make_string!(array::TimestampMillisecondArray, column, row) - } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => { - make_string!(array::TimestampMicrosecondArray, column, row) - } - DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => { - make_string!(array::TimestampNanosecondArray, column, row) - } - DataType::Date32(_) => make_string!(array::Date32Array, column, row), - DataType::Date64(_) => make_string!(array::Date64Array, column, row), - DataType::Time32(unit) if *unit == TimeUnit::Second => { - make_string!(array::Time32SecondArray, column, row) - } - DataType::Time32(unit) if *unit == TimeUnit::Millisecond => { - make_string!(array::Time32MillisecondArray, column, row) - } - DataType::Time32(unit) if *unit == TimeUnit::Microsecond => { - make_string!(array::Time64MicrosecondArray, column, row) - } - DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { - make_string!(array::Time64NanosecondArray, column, row) - } - DataType::Dictionary(index_type, _value_type) => match **index_type { - DataType::Int8 => dict_array_value_to_string::(column, row), - DataType::Int16 => dict_array_value_to_string::(column, row), - DataType::Int32 => dict_array_value_to_string::(column, row), - DataType::Int64 => dict_array_value_to_string::(column, row), - DataType::UInt8 => dict_array_value_to_string::(column, row), - DataType::UInt16 => dict_array_value_to_string::(column, row), - DataType::UInt32 => dict_array_value_to_string::(column, row), - DataType::UInt64 => dict_array_value_to_string::(column, row), - _ => Err(ArrowError::InvalidArgumentError(format!( - "Pretty printing not supported for {:?} due to index type", - column.data_type() - ))), - }, - _ => Err(ArrowError::InvalidArgumentError(format!( - "Pretty printing not implemented for {:?} type", - column.data_type() - ))), - } -} - -/// Converts the value of the dictionary array at `row` to a String -fn dict_array_value_to_string( - colum: &array::ArrayRef, - row: usize, -) -> Result { - let dict_array = colum.as_any().downcast_ref::>().unwrap(); - - let keys_array = dict_array.keys_array(); - - if keys_array.is_null(row) { - return Ok(String::from("")); - } - - let dict_index = keys_array.value(row).to_usize().ok_or_else(|| { - ArrowError::InvalidArgumentError(format!( - "Can not convert value {:?} at index {:?} to usize for repl.", - keys_array.value(row), - row - )) - })?; - - array_value_to_string(&dict_array.values(), dict_index) -} - #[cfg(test)] mod tests { - use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder}; + use crate::array::{self, PrimitiveBuilder, StringBuilder, StringDictionaryBuilder}; use super::*; - use crate::datatypes::{Field, Schema}; + use crate::datatypes::{DataType, Field, Int32Type, Schema}; use std::sync::Arc; #[test] diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs index 5640daa5303..1bc8bd036c4 100644 --- a/rust/datafusion/tests/sql.rs +++ b/rust/datafusion/tests/sql.rs @@ -25,7 +25,7 @@ use arrow::record_batch::RecordBatch; use arrow::{array::*, datatypes::TimeUnit}; use arrow::{ datatypes::{DataType, Field, Schema, SchemaRef}, - util::pretty::array_value_to_string, + util::display::array_value_to_string, }; use datafusion::datasource::{csv::CsvReadOptions, MemTable};