diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index ad80881a88f..4ff5a8dd2b2 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -307,6 +307,7 @@ pub fn make_array(data: ArrayDataRef) -> ArrayRef { dt => panic!("Unexpected dictionary key type {:?}", dt), }, DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef, + DataType::Decimal(_, _) => Arc::new(DecimalArray::from(data)) as ArrayRef, dt => panic!("Unexpected data type {:?}", dt), } } diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index d7a3eb7217a..15d6ccd0045 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -441,6 +441,159 @@ impl Array for FixedSizeBinaryArray { } } +/// A type of `DecimalArray` whose elements are binaries. +pub struct DecimalArray { + data: ArrayDataRef, + value_data: RawPtrBox, + precision: usize, + scale: usize, + length: i32, +} + +impl DecimalArray { + /// Returns the element at index `i` as i128. + pub fn value(&self, i: usize) -> i128 { + assert!(i < self.data.len(), "DecimalArray out of bounds access"); + let offset = i.checked_add(self.data.offset()).unwrap(); + let raw_val = unsafe { + let pos = self.value_offset_at(offset); + std::slice::from_raw_parts( + self.value_data.get().offset(pos as isize), + (self.value_offset_at(offset + 1) - pos) as usize, + ) + }; + Self::from_bytes_to_i128(raw_val) + } + + fn from_bytes_to_i128(b: &[u8]) -> i128 { + assert!(b.len() <= 16, "DecimalArray supports only up to size 16"); + let first_bit = b[0] & 128u8 == 128u8; + let mut result = if first_bit { [255u8; 16] } else { [0u8; 16] }; + for (i, v) in b.iter().enumerate() { + result[i + (16 - b.len())] = *v; + } + i128::from_be_bytes(result) + } + + /// Returns the byte size per value for Decimal arrays with a given precision + pub fn calc_fixed_byte_size(precision: usize) -> i32 { + (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as i32 + } + + /// Returns the offset for the element at index `i`. + /// + /// Note this doesn't do any bound checking, for performance reason. + #[inline] + pub fn value_offset(&self, i: usize) -> i32 { + self.value_offset_at(self.data.offset() + i) + } + + /// Returns the length for an element. + /// + /// All elements have the same length as the array is a fixed size. + #[inline] + pub fn value_length(&self) -> i32 { + self.length + } + + /// Returns a clone of the value data buffer + pub fn value_data(&self) -> Buffer { + self.data.buffers()[0].clone() + } + + #[inline] + fn value_offset_at(&self, i: usize) -> i32 { + self.length * i as i32 + } + + pub fn from_fixed_size_list_array( + v: FixedSizeListArray, + precision: usize, + scale: usize, + ) -> Self { + assert_eq!( + v.data_ref().child_data()[0].child_data().len(), + 0, + "DecimalArray can only be created from list array of u8 values \ + (i.e. FixedSizeList>)." + ); + assert_eq!( + v.data_ref().child_data()[0].data_type(), + &DataType::UInt8, + "DecimalArray can only be created from FixedSizeList arrays, mismatched data types." + ); + + let mut builder = ArrayData::builder(DataType::Decimal(precision, scale)) + .len(v.len()) + .add_buffer(v.data_ref().child_data()[0].buffers()[0].clone()); + if let Some(bitmap) = v.data_ref().null_bitmap() { + builder = builder + .null_count(v.data_ref().null_count()) + .null_bit_buffer(bitmap.bits.clone()) + } + + let data = builder.build(); + Self::from(data) + } +} + +impl From for DecimalArray { + fn from(data: ArrayDataRef) -> Self { + assert_eq!( + data.buffers().len(), + 1, + "DecimalArray data should contain 1 buffer only (values)" + ); + let value_data = data.buffers()[0].raw_data(); + let (precision, scale) = match data.data_type() { + DataType::Decimal(precision, scale) => (*precision, *scale), + _ => panic!("Expected data type to be Decimal"), + }; + let length = Self::calc_fixed_byte_size(precision); + Self { + data, + value_data: RawPtrBox::new(value_data), + precision, + scale, + length, + } + } +} + +impl fmt::Debug for DecimalArray { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?; + print_long_array(self, f, |array, index, f| { + fmt::Debug::fmt(&array.value(index), f) + })?; + write!(f, "]") + } +} + +impl Array for DecimalArray { + fn as_any(&self) -> &Any { + self + } + + fn data(&self) -> ArrayDataRef { + self.data.clone() + } + + fn data_ref(&self) -> &ArrayDataRef { + &self.data + } + + /// Returns the total number of bytes of memory occupied by the buffers owned by this [DecimalArray]. + fn get_buffer_memory_size(&self) -> usize { + self.data.get_buffer_memory_size() + } + + /// Returns the total number of bytes of memory occupied physically by this [DecimalArray]. + fn get_array_memory_size(&self) -> usize { + self.data.get_array_memory_size() + mem::size_of_val(self) + } +} + #[cfg(test)] mod tests { use crate::datatypes::Field; @@ -794,4 +947,38 @@ mod tests { format!("{:?}", arr) ); } + + #[test] + fn test_decimal_array() { + let values: [u8; 20] = [ + 0, 0, 0, 0, 0, 2, 17, 180, 219, 192, 255, 255, 255, 255, 255, 253, 238, 75, + 36, 64, + ]; + + let array_data = ArrayData::builder(DataType::Decimal(23, 6)) + .len(2) + .add_buffer(Buffer::from(&values[..])) + .build(); + let decimal_array = DecimalArray::from(array_data); + assert_eq!(8_887_000_000, decimal_array.value(0)); + assert_eq!(-8_887_000_000, decimal_array.value(1)); + assert_eq!(10, decimal_array.value_length()); + } + + #[test] + fn test_decimal_array_fmt_debug() { + let values: [u8; 20] = [ + 0, 0, 0, 0, 0, 2, 17, 180, 219, 192, 255, 255, 255, 255, 255, 253, 238, 75, + 36, 64, + ]; + let array_data = ArrayData::builder(DataType::Decimal(23, 6)) + .len(2) + .add_buffer(Buffer::from(&values[..])) + .build(); + let arr = DecimalArray::from(array_data); + assert_eq!( + "DecimalArray<23, 6>\n[\n 8887000000,\n -8887000000,\n]", + format!("{:?}", arr) + ); + } } diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index c0300eeb510..64d3e5d1fb6 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -1285,6 +1285,13 @@ pub struct FixedSizeBinaryBuilder { builder: FixedSizeListBuilder, } +#[derive(Debug)] +pub struct DecimalBuilder { + builder: FixedSizeListBuilder, + precision: usize, + scale: usize, +} + impl ArrayBuilder for BinaryBuilder { /// Returns the builder as a non-mutable `Any` reference. fn as_any(&self) -> &Any { @@ -1663,6 +1670,92 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { } } +impl ArrayBuilder for DecimalBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &Any { + self + } + + /// Appends data from other arrays into the builder + /// + /// This is most useful when concatenating arrays of the same type into a builder. + fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { + // validate arraydata and reserve memory + for array in data { + if array.data_type() != &self.data_type() { + return Err(ArrowError::InvalidArgumentError( + "Cannot append data to builder if data types are different" + .to_string(), + )); + } + if array.buffers().len() != 1 { + return Err(ArrowError::InvalidArgumentError( + "Decimal arrays should have 1 buffer".to_string(), + )); + } + } + for array in data { + // convert string to FixedSizeList to reuse list's append + let int_data = &array.buffers()[0]; + let int_data = Arc::new(ArrayData::new( + DataType::UInt8, + int_data.len(), + None, + None, + 0, + vec![int_data.clone()], + vec![], + )) as ArrayDataRef; + let list_data = Arc::new(ArrayData::new( + DataType::FixedSizeList( + Box::new(Field::new("item", DataType::UInt8, true)), + self.builder.list_len, + ), + array.len(), + None, + array.null_buffer().cloned(), + array.offset(), + vec![], + vec![int_data], + )); + self.builder.append_data(&[list_data])?; + } + Ok(()) + } + + /// Returns the data type of the builder + /// + /// This is used for validating array data types in `append_data` + fn data_type(&self) -> DataType { + DataType::Decimal(self.precision, self.scale) + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} + impl BinaryBuilder { /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values /// array @@ -1882,6 +1975,67 @@ impl FixedSizeBinaryBuilder { } } +impl DecimalBuilder { + /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + let byte_width = DecimalArray::calc_fixed_byte_size(precision); + Self { + builder: FixedSizeListBuilder::new(values_builder, byte_width), + precision, + scale, + } + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + pub fn append_value(&mut self, value: i128) -> Result<()> { + let value_as_bytes = Self::from_i128_to_fixed_size_bytes( + value, + self.builder.value_length() as usize, + )?; + if self.builder.value_length() != value_as_bytes.len() as i32 { + return Err(ArrowError::InvalidArgumentError( + "Byte slice does not have the same length as DecimalBuilder value lengths".to_string() + )); + } + self.builder + .values() + .append_slice(value_as_bytes.as_slice())?; + self.builder.append(true) + } + + fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result> { + if size > 16 { + return Err(ArrowError::InvalidArgumentError( + "DecimalBuilder only supports values up to 16 bytes.".to_string(), + )); + } + let res = v.to_be_bytes(); + let start_byte = 16 - size; + Ok(res[start_byte..16].to_vec()) + } + + /// Append a null value to the array. + pub fn append_null(&mut self) -> Result<()> { + let length: usize = self.builder.value_length() as usize; + self.builder.values().append_slice(&vec![0u8; length][..])?; + self.builder.append(false) + } + + /// Builds the `DecimalArray` and reset this builder. + pub fn finish(&mut self) -> DecimalArray { + DecimalArray::from_fixed_size_list_array( + self.builder.finish(), + self.precision, + self.scale, + ) + } +} + /// Array builder for Struct types. /// /// Note that callers should make sure that methods of all the child field builders are @@ -2024,6 +2178,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { DataType::FixedSizeBinary(len) => { Box::new(FixedSizeBinaryBuilder::new(capacity, *len)) } + DataType::Decimal(precision, scale) => { + Box::new(DecimalBuilder::new(capacity, *precision, *scale)) + } DataType::Utf8 => Box::new(StringBuilder::new(capacity)), DataType::Date32(DateUnit::Day) => Box::new(Date32Builder::new(capacity)), DataType::Date64(DateUnit::Millisecond) => Box::new(Date64Builder::new(capacity)), @@ -3186,6 +3343,22 @@ mod tests { assert_eq!(5, fixed_size_binary_array.value_length()); } + #[test] + fn test_decimal_builder() { + let mut builder = DecimalBuilder::new(30, 23, 6); + + builder.append_value(8_887_000_000).unwrap(); + builder.append_null().unwrap(); + builder.append_value(-8_887_000_000).unwrap(); + let decimal_array: DecimalArray = builder.finish(); + + assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type()); + assert_eq!(3, decimal_array.len()); + assert_eq!(1, decimal_array.null_count()); + assert_eq!(20, decimal_array.value_offset(2)); + assert_eq!(10, decimal_array.value_length()); + } + #[test] fn test_string_array_builder_finish() { let mut builder = StringBuilder::new(10); diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs new file mode 100644 index 00000000000..d8534b825ac --- /dev/null +++ b/rust/arrow/src/array/equal/decimal.rs @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{array::ArrayData, array::DecimalArray, datatypes::DataType}; + +use super::utils::equal_len; + +pub(super) fn decimal_equal( + lhs: &ArrayData, + rhs: &ArrayData, + lhs_start: usize, + rhs_start: usize, + len: usize, +) -> bool { + let size = match lhs.data_type() { + DataType::Decimal(precision, _) => { + DecimalArray::calc_fixed_byte_size(*precision) as usize + } + _ => unreachable!(), + }; + + let lhs_values = &lhs.buffers()[0].data()[lhs.offset() * size..]; + let rhs_values = &rhs.buffers()[0].data()[rhs.offset() * size..]; + + if lhs.null_count() == 0 && rhs.null_count() == 0 { + equal_len( + lhs_values, + rhs_values, + size * lhs_start, + size * rhs_start, + size * len, + ) + } else { + // with nulls, we need to compare item by item whenever it is not null + (0..len).all(|i| { + let lhs_pos = lhs_start + i; + let rhs_pos = rhs_start + i; + + let lhs_is_null = lhs.is_null(lhs_pos); + let rhs_is_null = rhs.is_null(rhs_pos); + + lhs_is_null + || (lhs_is_null == rhs_is_null) + && equal_len( + lhs_values, + rhs_values, + lhs_pos * size, + rhs_pos * size, + size, // 1 * size since we are comparing a single entry + ) + }) + } +} diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index ab063754051..920235802c6 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -20,14 +20,15 @@ //! depend on dynamic casting of `Array`. use super::{ - Array, ArrayData, BinaryOffsetSizeTrait, FixedSizeBinaryArray, GenericBinaryArray, - GenericListArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray, - StringOffsetSizeTrait, StructArray, + Array, ArrayData, BinaryOffsetSizeTrait, DecimalArray, FixedSizeBinaryArray, + GenericBinaryArray, GenericListArray, GenericStringArray, OffsetSizeTrait, + PrimitiveArray, StringOffsetSizeTrait, StructArray, }; use crate::datatypes::{ArrowPrimitiveType, DataType, IntervalUnit}; mod boolean; +mod decimal; mod dictionary; mod fixed_binary; mod fixed_list; @@ -42,6 +43,7 @@ mod variable_size; // For this reason, they are not exposed and are instead used // to build the generic functions below (`equal_range` and `equal`). use boolean::boolean_equal; +use decimal::decimal_equal; use dictionary::dictionary_equal; use fixed_binary::fixed_binary_equal; use fixed_list::fixed_list_equal; @@ -87,6 +89,12 @@ impl PartialEq for FixedSizeBinaryArray { } } +impl PartialEq for DecimalArray { + fn eq(&self, other: &Self) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + impl PartialEq for GenericListArray { fn eq(&self, other: &Self) -> bool { equal(self.data().as_ref(), other.data().as_ref()) @@ -143,6 +151,7 @@ fn equal_values( DataType::FixedSizeBinary(_) => { fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len) } + DataType::Decimal(_, _) => decimal_equal(lhs, rhs, lhs_start, rhs_start, len), DataType::List(_) => list_equal::(lhs, rhs, lhs_start, rhs_start, len), DataType::LargeList(_) => list_equal::(lhs, rhs, lhs_start, rhs_start, len), DataType::FixedSizeList(_, _) => { @@ -217,9 +226,9 @@ mod tests { use crate::array::{ array::Array, ArrayDataRef, ArrayRef, BinaryOffsetSizeTrait, BooleanArray, - FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, Int32Builder, - ListBuilder, NullArray, PrimitiveBuilder, StringArray, StringDictionaryBuilder, - StringOffsetSizeTrait, StructArray, + DecimalBuilder, FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, + Int32Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, + StringDictionaryBuilder, StringOffsetSizeTrait, StructArray, }; use crate::array::{GenericStringArray, Int32Array}; use crate::datatypes::Int16Type; @@ -604,6 +613,96 @@ mod tests { test_equal(&a_slice, &b_slice, true); } + fn create_decimal_array(data: &[Option]) -> ArrayDataRef { + let mut builder = DecimalBuilder::new(20, 23, 6); + + for d in data { + if let Some(v) = d { + builder.append_value(*v).unwrap(); + } else { + builder.append_null().unwrap(); + } + } + builder.finish().data() + } + + #[test] + fn test_decimal_equal() { + let a = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]); + let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000)]); + test_equal(a.as_ref(), b.as_ref(), true); + + let b = create_decimal_array(&[Some(15_887_000_000), Some(-8_887_000_000)]); + test_equal(a.as_ref(), b.as_ref(), false); + } + + // Test the case where null_count > 0 + #[test] + fn test_decimal_null() { + let a = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]); + let b = create_decimal_array(&[Some(8_887_000_000), None, Some(-8_887_000_000)]); + test_equal(a.as_ref(), b.as_ref(), true); + + let b = create_decimal_array(&[Some(8_887_000_000), Some(-8_887_000_000), None]); + test_equal(a.as_ref(), b.as_ref(), false); + + let b = create_decimal_array(&[Some(15_887_000_000), None, Some(-8_887_000_000)]); + test_equal(a.as_ref(), b.as_ref(), false); + } + + #[test] + fn test_decimal_offsets() { + // Test the case where offset != 0 + let a = create_decimal_array(&[ + Some(8_887_000_000), + None, + None, + Some(-8_887_000_000), + None, + None, + ]); + let b = create_decimal_array(&[ + Some(8_887_000_000), + None, + None, + Some(15_887_000_000), + None, + None, + ]); + + let a_slice = a.slice(0, 3); + let b_slice = b.slice(0, 3); + test_equal(&a_slice, &b_slice, true); + + let a_slice = a.slice(0, 5); + let b_slice = b.slice(0, 5); + test_equal(&a_slice, &b_slice, false); + + let a_slice = a.slice(4, 1); + let b_slice = b.slice(4, 1); + test_equal(&a_slice, &b_slice, true); + + let a_slice = a.slice(3, 3); + let b_slice = b.slice(3, 3); + test_equal(&a_slice, &b_slice, false); + + let a_slice = a.slice(1, 3); + let b_slice = b.slice(1, 3); + test_equal(&a_slice, &b_slice, false); + + let b = create_decimal_array(&[ + None, + None, + None, + Some(-8_887_000_000), + Some(-3_000), + None, + ]); + let a_slice = a.slice(1, 3); + let b_slice = b.slice(1, 3); + test_equal(&a_slice, &b_slice, true); + } + /// Create a fixed size list of 2 value lengths fn create_fixed_size_list_array, T: AsRef<[Option]>>( data: T, diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs index 42c0964c68b..808cb5bb772 100644 --- a/rust/arrow/src/array/equal_json.rs +++ b/rust/arrow/src/array/equal_json.rs @@ -324,6 +324,43 @@ impl PartialEq for Value { } } +impl JsonEqual for DecimalArray { + fn equals_json(&self, json: &[&Value]) -> bool { + if self.len() != json.len() { + return false; + } + + (0..self.len()).all(|i| match json[i] { + JString(s) => { + self.is_valid(i) + && (s + .parse::() + .map_or_else(|_| false, |v| v == self.value(i))) + } + JNull => self.is_null(i), + _ => false, + }) + } +} + +impl PartialEq for DecimalArray { + fn eq(&self, json: &Value) -> bool { + match json { + Value::Array(json_array) => self.equals_json_values(&json_array), + _ => false, + } + } +} + +impl PartialEq for Value { + fn eq(&self, arrow: &DecimalArray) -> bool { + match self { + Value::Array(json_array) => arrow.equals_json_values(&json_array), + _ => false, + } + } +} + impl JsonEqual for UnionArray { fn equals_json(&self, _json: &[&Value]) -> bool { unimplemented!( @@ -846,6 +883,88 @@ mod tests { assert!(json_array.ne(&arrow_array)); } + #[test] + fn test_decimal_json_equal() { + // Test the equal case + let mut builder = DecimalBuilder::new(30, 23, 6); + builder.append_value(1_000).unwrap(); + builder.append_null().unwrap(); + builder.append_value(-250).unwrap(); + let arrow_array: DecimalArray = builder.finish(); + let json_array: Value = serde_json::from_str( + r#" + [ + "1000", + null, + "-250" + ] + "#, + ) + .unwrap(); + println!("{:?}", arrow_array); + assert!(arrow_array.eq(&json_array)); + assert!(json_array.eq(&arrow_array)); + + // Test unequal case + builder.append_value(1_000).unwrap(); + builder.append_null().unwrap(); + builder.append_value(55).unwrap(); + let arrow_array: DecimalArray = builder.finish(); + let json_array: Value = serde_json::from_str( + r#" + [ + "1000", + null, + "-250" + ] + "#, + ) + .unwrap(); + assert!(arrow_array.ne(&json_array)); + assert!(json_array.ne(&arrow_array)); + + // Test unequal length case + let json_array: Value = serde_json::from_str( + r#" + [ + "1000", + null, + null, + "55" + ] + "#, + ) + .unwrap(); + assert!(arrow_array.ne(&json_array)); + assert!(json_array.ne(&arrow_array)); + + // Test incorrect type case + let json_array: Value = serde_json::from_str( + r#" + { + "a": 1 + } + "#, + ) + .unwrap(); + assert!(arrow_array.ne(&json_array)); + assert!(json_array.ne(&arrow_array)); + + // Test incorrect value type case + let json_array: Value = serde_json::from_str( + r#" + [ + "hello", + null, + 1 + ] + "#, + ) + .unwrap(); + assert!(arrow_array.ne(&json_array)); + assert!(json_array.ne(&arrow_array)); + } + #[test] fn test_struct_json_equal() { let strings: ArrayRef = Arc::new(StringArray::from(vec![ diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs index bbe454c9036..0fc227aff73 100644 --- a/rust/arrow/src/array/mod.rs +++ b/rust/arrow/src/array/mod.rs @@ -111,6 +111,7 @@ pub use self::data::ArrayDataBuilder; pub use self::data::ArrayDataRef; pub use self::array_binary::BinaryArray; +pub use self::array_binary::DecimalArray; pub use self::array_binary::FixedSizeBinaryArray; pub use self::array_binary::LargeBinaryArray; pub use self::array_dictionary::DictionaryArray; @@ -207,6 +208,7 @@ pub type DurationNanosecondBufferBuilder = BufferBuilder pub use self::builder::ArrayBuilder; pub use self::builder::BinaryBuilder; +pub use self::builder::DecimalBuilder; pub use self::builder::FixedSizeBinaryBuilder; pub use self::builder::FixedSizeListBuilder; pub use self::builder::LargeBinaryBuilder; diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index d6dd7a544cd..d051a430075 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -145,6 +145,8 @@ pub enum DataType { /// This type mostly used to represent low cardinality string /// arrays or a limited set of primitive types as integers. Dictionary(Box, Box), + /// Decimal value with precision and scale + Decimal(usize, usize), } /// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX @@ -1122,6 +1124,9 @@ impl DataType { TimeUnit::Nanosecond => "NANOSECOND", }}), DataType::Dictionary(_, _) => json!({ "name": "dictionary"}), + DataType::Decimal(precision, scale) => { + json!({"name": "decimal", "precision": precision, "scale": scale}) + } } } @@ -1458,7 +1463,8 @@ impl Field { | DataType::FixedSizeList(_, _) | DataType::FixedSizeBinary(_) | DataType::Utf8 - | DataType::LargeUtf8 => { + | DataType::LargeUtf8 + | DataType::Decimal(_, _) => { if self.data_type != from.data_type { return Err(ArrowError::SchemaError( "Fail to merge schema Field due to conflicting datatype" diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs index f2c907baa61..31864a79f1e 100644 --- a/rust/parquet/src/arrow/arrow_writer.rs +++ b/rust/parquet/src/arrow/arrow_writer.rs @@ -267,6 +267,7 @@ fn write_leaves( ArrowDataType::FixedSizeList(_, _) | ArrowDataType::Boolean | ArrowDataType::FixedSizeBinary(_) + | ArrowDataType::Decimal(_, _) | ArrowDataType::Union(_) => Err(ParquetError::NYI( "Attempting to write an Arrow type that is not yet implemented".to_string(), )), @@ -471,6 +472,7 @@ fn get_levels( repetition: None, }], ArrowDataType::FixedSizeBinary(_) => unimplemented!(), + ArrowDataType::Decimal(_, _) => unimplemented!(), ArrowDataType::List(_) | ArrowDataType::LargeList(_) => { let array_data = array.data(); let child_data = array_data.child_data().get(0).unwrap(); @@ -555,6 +557,7 @@ fn get_levels( | ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 => unimplemented!(), ArrowDataType::FixedSizeBinary(_) => unimplemented!(), + ArrowDataType::Decimal(_, _) => unimplemented!(), ArrowDataType::LargeBinary => unimplemented!(), ArrowDataType::List(_) | ArrowDataType::LargeList(_) => { // nested list diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index c84e8955501..cff3800bcfd 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -400,6 +400,12 @@ fn arrow_to_parquet_type(field: &Field) -> Result { .with_length(*length) .build() } + DataType::Decimal(_, _) => { + Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY) + .with_repetition(repetition) + .with_length(10) + .build() + } DataType::Utf8 | DataType::LargeUtf8 => { Type::primitive_type_builder(name, PhysicalType::BYTE_ARRAY) .with_logical_type(LogicalType::UTF8)