From dee18df12b26db55c0b1fece0f093906753ef293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Wed, 30 Dec 2020 12:34:36 +0100 Subject: [PATCH 1/8] feat: support physical int types for decimal type --- rust/arrow/src/array/array_binary.rs | 7 ++++ rust/arrow/src/compute/kernels/cast.rs | 26 +++++++++++++++ rust/parquet/src/arrow/arrow_reader.rs | 44 +++++++++++++++----------- rust/parquet/src/arrow/schema.rs | 35 +++++++++++--------- 4 files changed, 78 insertions(+), 34 deletions(-) diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index db4097aee69..037c8fc1ceb 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -580,6 +580,13 @@ impl DecimalArray { let data = builder.build(); Self::from(data) } + pub fn precision(&self) -> usize { + self.precision + } + + pub fn scale(&self) -> usize { + self.scale + } } impl From for DecimalArray { diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index f1128769525..b175417bba4 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -522,6 +522,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Int32, Int64) => cast_numeric_arrays::(array), (Int32, Float32) => cast_numeric_arrays::(array), (Int32, Float64) => cast_numeric_arrays::(array), + (Int32, Decimal(p, s)) => int32_to_decimal_cast(array, *p, *s), (Int64, UInt8) => cast_numeric_arrays::(array), (Int64, UInt16) => cast_numeric_arrays::(array), @@ -532,6 +533,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Int64, Int32) => cast_numeric_arrays::(array), (Int64, Float32) => cast_numeric_arrays::(array), (Int64, Float64) => cast_numeric_arrays::(array), + (Int64, Decimal(p, s)) => int64_to_decimal_cast(array, *p, *s), (Float32, UInt8) => cast_numeric_arrays::(array), (Float32, UInt16) => cast_numeric_arrays::(array), @@ -946,6 +948,30 @@ where .collect() } +fn int32_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result { + let mut builder = DecimalBuilder::new(from.len(), p, s); + let array = from.as_any().downcast_ref::().unwrap(); + for v in array.iter() { + match v { + Some(n) => builder.append_value(n as i128)?, + None => builder.append_null()? + }; + } + Ok(Arc::new(builder.finish()) as ArrayRef) +} + +fn int64_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result { + let mut builder = DecimalBuilder::new(from.len(), p, s); + let array = from.as_any().downcast_ref::().unwrap(); + for v in array.iter() { + match v { + Some(n) => builder.append_value(n as i128)?, + None => builder.append_null()? + }; + } + Ok(Arc::new(builder.finish()) as ArrayRef) +} + /// Cast numeric types to Boolean /// /// Any zero value returns `false` while non-zero returns `true` diff --git a/rust/parquet/src/arrow/arrow_reader.rs b/rust/parquet/src/arrow/arrow_reader.rs index 304ba18bc37..1559c97e4cf 100644 --- a/rust/parquet/src/arrow/arrow_reader.rs +++ b/rust/parquet/src/arrow/arrow_reader.rs @@ -406,25 +406,31 @@ mod tests { fn test_read_decimal_file() { use arrow::array::DecimalArray; let testdata = arrow::util::test_util::parquet_test_data(); - let path = format!("{}/fixed_length_decimal.parquet", testdata); - let parquet_reader = - SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap(); - let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader)); - - let mut record_reader = arrow_reader.get_record_reader(32).unwrap(); - - let batch = record_reader.next().unwrap().unwrap(); - assert_eq!(batch.num_rows(), 24); - let col = batch - .column(0) - .as_any() - .downcast_ref::() - .unwrap(); - - let expected = 1..25; - - for (i, v) in expected.enumerate() { - assert_eq!(col.value(i), v * 100_i128); + let file_variants = vec![("fixed_length", 25), ("int32", 4), ("int64", 10)]; + for (prefix, target_precision) in file_variants { + let path = format!("{}/{}_decimal.parquet", testdata, prefix); + let parquet_reader = + SerializedFileReader::try_from(File::open(&path).unwrap()).unwrap(); + let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(parquet_reader)); + + let mut record_reader = arrow_reader.get_record_reader(32).unwrap(); + + let batch = record_reader.next().unwrap().unwrap(); + assert_eq!(batch.num_rows(), 24); + let col = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + + let expected = 1..25; + + assert_eq!(col.precision(), target_precision); + assert_eq!(col.scale(), 2); + + for (i, v) in expected.enumerate() { + assert_eq!(col.value(i), v * 100_i128); + } } } diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 22213d4f0db..d37c1d0e5ce 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -591,6 +591,7 @@ impl ParquetTypeConverter<'_> { LogicalType::INT_32 => Ok(DataType::Int32), LogicalType::DATE => Ok(DataType::Date32(DateUnit::Day)), LogicalType::TIME_MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)), + LogicalType::DECIMAL => self.to_decimal(), other => Err(ArrowError(format!( "Unable to convert parquet INT32 logical type {}", other @@ -610,6 +611,7 @@ impl ParquetTypeConverter<'_> { LogicalType::TIMESTAMP_MICROS => { Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) } + LogicalType::DECIMAL => self.to_decimal(), other => Err(ArrowError(format!( "Unable to convert parquet INT64 logical type {}", other @@ -619,21 +621,7 @@ impl ParquetTypeConverter<'_> { fn from_fixed_len_byte_array(&self) -> Result { match self.schema.get_basic_info().logical_type() { - LogicalType::DECIMAL => { - let (precision, scale) = match self.schema { - Type::PrimitiveType { - ref precision, - ref scale, - .. - } => (*precision, *scale), - _ => { - return Err(ArrowError( - "Expected a physical type, not a group type".to_string(), - )) - } - }; - Ok(DataType::Decimal(precision as usize, scale as usize)) - } + LogicalType::DECIMAL => self.to_decimal(), LogicalType::INTERVAL => { // There is currently no reliable way of determining which IntervalUnit // to return. Thus without the original Arrow schema, the results @@ -657,6 +645,23 @@ impl ParquetTypeConverter<'_> { } } + fn to_decimal(&self) -> Result { + let (precision, scale) = match self.schema { + Type::PrimitiveType { + ref precision, + ref scale, + .. + } => (*precision, *scale), + _ => { + return Err(ArrowError( + "Expected a physical type, not a group type".to_string(), + )) + } + }; + Ok(DataType::Decimal(precision as usize, scale as usize)) + } + + fn from_byte_array(&self) -> Result { match self.schema.get_basic_info().logical_type() { LogicalType::NONE => Ok(DataType::Binary), From d22156cd26863edc716e03b2682edbf2bb3aa982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Wed, 30 Dec 2020 12:56:27 +0100 Subject: [PATCH 2/8] chore: add tests for casts --- rust/arrow/src/compute/kernels/cast.rs | 36 ++++++++++++++++++++++++-- rust/parquet/src/arrow/schema.rs | 27 ++++++++++--------- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index b175417bba4..ec0b9edc0fc 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -954,7 +954,7 @@ fn int32_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result builder.append_value(n as i128)?, - None => builder.append_null()? + None => builder.append_null()?, }; } Ok(Arc::new(builder.finish()) as ArrayRef) @@ -966,7 +966,7 @@ fn int64_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result builder.append_value(n as i128)?, - None => builder.append_null()? + None => builder.append_null()?, }; } Ok(Arc::new(builder.finish()) as ArrayRef) @@ -2325,6 +2325,23 @@ mod tests { u8_expected, get_cast_values::(&i64_array, &DataType::UInt8) ); + + let decimal_expected = vec![ + "-9223372036854775808", + "-2147483648", + "-32768", + "-128", + "0", + "127", + "32767", + "2147483647", + "9223372036854775807", + ]; + let arr = cast(&i64_array, &DataType::Decimal(25, 0)).unwrap(); + let decimal_actual = arr.as_any().downcast_ref::().unwrap(); + for (i, expected) in decimal_expected.iter().enumerate() { + assert_eq!(format!("{:?}", decimal_actual.value(i)), *expected); + } } #[test] @@ -2405,6 +2422,21 @@ mod tests { u8_expected, get_cast_values::(&i32_array, &DataType::UInt8) ); + + let decimal_expected = vec![ + "-2147483648", + "-32768", + "-128", + "0", + "127", + "32767", + "2147483647", + ]; + let arr = cast(&i32_array, &DataType::Decimal(25, 0)).unwrap(); + let decimal_actual = arr.as_any().downcast_ref::().unwrap(); + for (i, expected) in decimal_expected.iter().enumerate() { + assert_eq!(format!("{:?}", decimal_actual.value(i)), *expected); + } } #[test] diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index d37c1d0e5ce..7dbe9af6749 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -646,22 +646,21 @@ impl ParquetTypeConverter<'_> { } fn to_decimal(&self) -> Result { - let (precision, scale) = match self.schema { - Type::PrimitiveType { - ref precision, - ref scale, - .. - } => (*precision, *scale), - _ => { - return Err(ArrowError( - "Expected a physical type, not a group type".to_string(), - )) - } - }; - Ok(DataType::Decimal(precision as usize, scale as usize)) + let (precision, scale) = match self.schema { + Type::PrimitiveType { + ref precision, + ref scale, + .. + } => (*precision, *scale), + _ => { + return Err(ArrowError( + "Expected a physical type, not a group type".to_string(), + )) + } + }; + Ok(DataType::Decimal(precision as usize, scale as usize)) } - fn from_byte_array(&self) -> Result { match self.schema.get_basic_info().logical_type() { LogicalType::NONE => Ok(DataType::Binary), From 97dc01e204fb91ae63d41e6f860bf009859c7e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Thu, 31 Dec 2020 13:00:29 +0100 Subject: [PATCH 3/8] chore: address review comments --- rust/arrow/src/compute/kernels/cast.rs | 58 -------------------------- rust/parquet/src/arrow/array_reader.rs | 20 +++++++-- rust/parquet/src/arrow/schema.rs | 15 +++---- 3 files changed, 22 insertions(+), 71 deletions(-) diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index ec0b9edc0fc..f1128769525 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -522,7 +522,6 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Int32, Int64) => cast_numeric_arrays::(array), (Int32, Float32) => cast_numeric_arrays::(array), (Int32, Float64) => cast_numeric_arrays::(array), - (Int32, Decimal(p, s)) => int32_to_decimal_cast(array, *p, *s), (Int64, UInt8) => cast_numeric_arrays::(array), (Int64, UInt16) => cast_numeric_arrays::(array), @@ -533,7 +532,6 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { (Int64, Int32) => cast_numeric_arrays::(array), (Int64, Float32) => cast_numeric_arrays::(array), (Int64, Float64) => cast_numeric_arrays::(array), - (Int64, Decimal(p, s)) => int64_to_decimal_cast(array, *p, *s), (Float32, UInt8) => cast_numeric_arrays::(array), (Float32, UInt16) => cast_numeric_arrays::(array), @@ -948,30 +946,6 @@ where .collect() } -fn int32_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result { - let mut builder = DecimalBuilder::new(from.len(), p, s); - let array = from.as_any().downcast_ref::().unwrap(); - for v in array.iter() { - match v { - Some(n) => builder.append_value(n as i128)?, - None => builder.append_null()?, - }; - } - Ok(Arc::new(builder.finish()) as ArrayRef) -} - -fn int64_to_decimal_cast(from: &ArrayRef, p: usize, s: usize) -> Result { - let mut builder = DecimalBuilder::new(from.len(), p, s); - let array = from.as_any().downcast_ref::().unwrap(); - for v in array.iter() { - match v { - Some(n) => builder.append_value(n as i128)?, - None => builder.append_null()?, - }; - } - Ok(Arc::new(builder.finish()) as ArrayRef) -} - /// Cast numeric types to Boolean /// /// Any zero value returns `false` while non-zero returns `true` @@ -2325,23 +2299,6 @@ mod tests { u8_expected, get_cast_values::(&i64_array, &DataType::UInt8) ); - - let decimal_expected = vec![ - "-9223372036854775808", - "-2147483648", - "-32768", - "-128", - "0", - "127", - "32767", - "2147483647", - "9223372036854775807", - ]; - let arr = cast(&i64_array, &DataType::Decimal(25, 0)).unwrap(); - let decimal_actual = arr.as_any().downcast_ref::().unwrap(); - for (i, expected) in decimal_expected.iter().enumerate() { - assert_eq!(format!("{:?}", decimal_actual.value(i)), *expected); - } } #[test] @@ -2422,21 +2379,6 @@ mod tests { u8_expected, get_cast_values::(&i32_array, &DataType::UInt8) ); - - let decimal_expected = vec![ - "-2147483648", - "-32768", - "-128", - "0", - "127", - "32767", - "2147483647", - ]; - let arr = cast(&i32_array, &DataType::Decimal(25, 0)).unwrap(); - let decimal_actual = arr.as_any().downcast_ref::().unwrap(); - for (i, expected) in decimal_expected.iter().enumerate() { - assert_eq!(format!("{:?}", decimal_actual.value(i)), *expected); - } } #[test] diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index eb9548a8622..bdda9dbde03 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -25,10 +25,10 @@ use std::vec::Vec; use arrow::array::{ Array, ArrayData, ArrayDataBuilder, ArrayDataRef, ArrayRef, BinaryArray, - BinaryBuilder, BooleanArray, BooleanBufferBuilder, FixedSizeBinaryArray, - FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, ListBuilder, - OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, StringArray, StringBuilder, - StructArray, + BinaryBuilder, BooleanArray, BooleanBufferBuilder, DecimalBuilder, + FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, + Int64Array, ListBuilder, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, + StringArray, StringBuilder, StructArray, }; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::datatypes::{ @@ -350,6 +350,18 @@ impl ArrayReader for PrimitiveArrayReader { let a = arrow::compute::cast(&array, &ArrowType::Date32(DateUnit::Day))?; arrow::compute::cast(&a, target_type)? } + ArrowType::Decimal(p, s) => { + let to_int64 = arrow::compute::cast(&array, &ArrowType::Int64)?; + let mut builder = DecimalBuilder::new(to_int64.len(), *p, *s); + let values = to_int64.as_any().downcast_ref::().unwrap(); + for maybe_value in values.iter() { + match maybe_value { + Some(value) => builder.append_value(value as i128)?, + None => builder.append_null()?, + } + } + Arc::new(builder.finish()) as ArrayRef + } _ => arrow::compute::cast(&array, target_type)?, }; diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 7dbe9af6749..ee8bf0f0f9a 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -646,19 +646,16 @@ impl ParquetTypeConverter<'_> { } fn to_decimal(&self) -> Result { - let (precision, scale) = match self.schema { + match self.schema { Type::PrimitiveType { ref precision, ref scale, .. - } => (*precision, *scale), - _ => { - return Err(ArrowError( - "Expected a physical type, not a group type".to_string(), - )) - } - }; - Ok(DataType::Decimal(precision as usize, scale as usize)) + } => Ok(DataType::Decimal(*precision as usize, *scale as usize)), + _ => Err(ArrowError( + "Expected a physical type, not a group type".to_string(), + )), + } } fn from_byte_array(&self) -> Result { From 366618022b7b4cc80226f4e8f7170bf1ed63c25a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Fri, 1 Jan 2021 12:38:37 +0100 Subject: [PATCH 4/8] refactor: assertion instead of result return --- rust/parquet/src/arrow/schema.rs | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index ee8bf0f0f9a..0522670918f 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -591,7 +591,7 @@ impl ParquetTypeConverter<'_> { LogicalType::INT_32 => Ok(DataType::Int32), LogicalType::DATE => Ok(DataType::Date32(DateUnit::Day)), LogicalType::TIME_MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)), - LogicalType::DECIMAL => self.to_decimal(), + LogicalType::DECIMAL => Ok(self.to_decimal()), other => Err(ArrowError(format!( "Unable to convert parquet INT32 logical type {}", other @@ -611,7 +611,7 @@ impl ParquetTypeConverter<'_> { LogicalType::TIMESTAMP_MICROS => { Ok(DataType::Timestamp(TimeUnit::Microsecond, None)) } - LogicalType::DECIMAL => self.to_decimal(), + LogicalType::DECIMAL => Ok(self.to_decimal()), other => Err(ArrowError(format!( "Unable to convert parquet INT64 logical type {}", other @@ -621,7 +621,7 @@ impl ParquetTypeConverter<'_> { fn from_fixed_len_byte_array(&self) -> Result { match self.schema.get_basic_info().logical_type() { - LogicalType::DECIMAL => self.to_decimal(), + LogicalType::DECIMAL => Ok(self.to_decimal()), LogicalType::INTERVAL => { // There is currently no reliable way of determining which IntervalUnit // to return. Thus without the original Arrow schema, the results @@ -645,16 +645,15 @@ impl ParquetTypeConverter<'_> { } } - fn to_decimal(&self) -> Result { - match self.schema { - Type::PrimitiveType { - ref precision, - ref scale, - .. - } => Ok(DataType::Decimal(*precision as usize, *scale as usize)), - _ => Err(ArrowError( - "Expected a physical type, not a group type".to_string(), - )), + fn to_decimal(&self) -> DataType { + assert!(self.schema.is_primitive()); + if let Type::PrimitiveType { + precision, scale, .. + } = self.schema + { + DataType::Decimal(*precision as usize, *scale as usize) + } else { + unreachable!() } } From 6a630888d3d33a4b662cc4698af13819b19a1ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Sat, 2 Jan 2021 13:37:09 +0100 Subject: [PATCH 5/8] feat: add getter for precision and scale --- rust/parquet/src/arrow/schema.rs | 9 +-------- rust/parquet/src/schema/types.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 0522670918f..93d5dabf6a9 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -647,14 +647,7 @@ impl ParquetTypeConverter<'_> { fn to_decimal(&self) -> DataType { assert!(self.schema.is_primitive()); - if let Type::PrimitiveType { - precision, scale, .. - } = self.schema - { - DataType::Decimal(*precision as usize, *scale as usize) - } else { - unreachable!() - } + DataType::Decimal(self.schema.get_precision() as usize, self.schema.get_scale() as usize) } fn from_byte_array(&self) -> Result { diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs index c9eeaa0f901..9f873974cc4 100644 --- a/rust/parquet/src/schema/types.rs +++ b/rust/parquet/src/schema/types.rs @@ -103,6 +103,24 @@ impl Type { } } + /// Gets precision of this primitive type. + /// Note that this will panic if called on a non-primitive type. + pub fn get_precision(&self) -> i32 { + match *self { + Type::PrimitiveType { precision, ..} => precision, + _ => panic!("Cannot call get_precision() on non-primitive type") + } + } + + /// Gets scale of this primitive type. + /// Note that this will panic if called on a non-primitive type. + pub fn get_scale(&self) -> i32 { + match *self { + Type::PrimitiveType { scale, ..} => scale, + _ => panic!("Cannot call get_scale() on non-primitive type") + } + } + /// Checks if `sub_type` schema is part of current schema. /// This method can be used to check if projected columns are part of the root schema. pub fn check_contains(&self, sub_type: &Type) -> bool { From 5e8645c7fdfd3d30e97200e473cd61e42c2c5166 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Sat, 2 Jan 2021 14:19:51 +0100 Subject: [PATCH 6/8] chore: formatting --- rust/parquet/src/arrow/schema.rs | 5 ++++- rust/parquet/src/schema/types.rs | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 93d5dabf6a9..fc5861af1b2 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -647,7 +647,10 @@ impl ParquetTypeConverter<'_> { fn to_decimal(&self) -> DataType { assert!(self.schema.is_primitive()); - DataType::Decimal(self.schema.get_precision() as usize, self.schema.get_scale() as usize) + DataType::Decimal( + self.schema.get_precision() as usize, + self.schema.get_scale() as usize, + ) } fn from_byte_array(&self) -> Result { diff --git a/rust/parquet/src/schema/types.rs b/rust/parquet/src/schema/types.rs index 9f873974cc4..27768fbb63e 100644 --- a/rust/parquet/src/schema/types.rs +++ b/rust/parquet/src/schema/types.rs @@ -107,8 +107,8 @@ impl Type { /// Note that this will panic if called on a non-primitive type. pub fn get_precision(&self) -> i32 { match *self { - Type::PrimitiveType { precision, ..} => precision, - _ => panic!("Cannot call get_precision() on non-primitive type") + Type::PrimitiveType { precision, .. } => precision, + _ => panic!("Cannot call get_precision() on non-primitive type"), } } @@ -116,8 +116,8 @@ impl Type { /// Note that this will panic if called on a non-primitive type. pub fn get_scale(&self) -> i32 { match *self { - Type::PrimitiveType { scale, ..} => scale, - _ => panic!("Cannot call get_scale() on non-primitive type") + Type::PrimitiveType { scale, .. } => scale, + _ => panic!("Cannot call get_scale() on non-primitive type"), } } From 7ee5fe69f61e995be909feb273293f839cbe822d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Sat, 2 Jan 2021 14:28:24 +0100 Subject: [PATCH 7/8] refactor: use precision and scale getter --- rust/parquet/src/arrow/array_reader.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 0ae6be504f5..690e0f1adb2 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -1562,20 +1562,10 @@ impl<'a> ArrayReaderBuilder { PhysicalType::FIXED_LEN_BYTE_ARRAY if cur_type.get_basic_info().logical_type() == LogicalType::DECIMAL => { - let (precision, scale) = match *cur_type { - Type::PrimitiveType { - ref precision, - ref scale, - .. - } => (*precision, *scale), - _ => { - return Err(ArrowError( - "Expected a physical type, not a group type".to_string(), - )) - } - }; - let converter = - DecimalConverter::new(DecimalArrayConverter::new(precision, scale)); + let converter = DecimalConverter::new(DecimalArrayConverter::new( + cur_type.get_precision(), + cur_type.get_scale(), + )); Ok(Box::new(ComplexObjectArrayReader::< FixedLenByteArrayType, DecimalConverter, From ba1c6aa7f2806fe79068945d45d941e8e5f32784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Sun, 3 Jan 2021 09:30:56 +0100 Subject: [PATCH 8/8] refactor: handle i32 and i64 differently without cast --- rust/parquet/src/arrow/array_reader.rs | 36 +++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 690e0f1adb2..18fb478b70c 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -27,8 +27,8 @@ use arrow::array::{ Array, ArrayData, ArrayDataBuilder, ArrayDataRef, ArrayRef, BinaryArray, BinaryBuilder, BooleanArray, BooleanBufferBuilder, DecimalBuilder, FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, - Int64Array, ListBuilder, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, - StringArray, StringBuilder, StructArray, + Int32Array, Int64Array, ListBuilder, OffsetSizeTrait, PrimitiveArray, + PrimitiveBuilder, StringArray, StringBuilder, StructArray, }; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::datatypes::{ @@ -351,13 +351,31 @@ impl ArrayReader for PrimitiveArrayReader { arrow::compute::cast(&a, target_type)? } ArrowType::Decimal(p, s) => { - let to_int64 = arrow::compute::cast(&array, &ArrowType::Int64)?; - let mut builder = DecimalBuilder::new(to_int64.len(), *p, *s); - let values = to_int64.as_any().downcast_ref::().unwrap(); - for maybe_value in values.iter() { - match maybe_value { - Some(value) => builder.append_value(value as i128)?, - None => builder.append_null()?, + let mut builder = DecimalBuilder::new(array.len(), *p, *s); + match array.data_type() { + ArrowType::Int32 => { + let values = array.as_any().downcast_ref::().unwrap(); + for maybe_value in values.iter() { + match maybe_value { + Some(value) => builder.append_value(value as i128)?, + None => builder.append_null()?, + } + } + } + ArrowType::Int64 => { + let values = array.as_any().downcast_ref::().unwrap(); + for maybe_value in values.iter() { + match maybe_value { + Some(value) => builder.append_value(value as i128)?, + None => builder.append_null()?, + } + } + } + _ => { + return Err(ArrowError(format!( + "Cannot convert {:?} to decimal", + array.data_type() + ))) } } Arc::new(builder.finish()) as ArrayRef