Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@ macro_rules! get_statistic {
Some(DataType::Int16) => {
Some(ScalarValue::Int16(Some((*s.$func()).try_into().unwrap())))
}
Some(DataType::UInt8) => {
Some(ScalarValue::UInt8(Some((*s.$func()).try_into().unwrap())))
}
Some(DataType::UInt16) => {
Some(ScalarValue::UInt16(Some((*s.$func()).try_into().unwrap())))
}
Some(DataType::UInt32) => {
Some(ScalarValue::UInt32(Some((*s.$func()) as u32)))
}
Some(DataType::Date32) => {
Some(ScalarValue::Date32(Some(*s.$func())))
}
Expand All @@ -100,6 +109,9 @@ macro_rules! get_statistic {
*scale,
))
}
Some(DataType::UInt64) => {
Some(ScalarValue::UInt64(Some((*s.$func()) as u64)))
}
_ => Some(ScalarValue::Int64(Some(*s.$func()))),
}
}
Expand Down
60 changes: 25 additions & 35 deletions datafusion/core/tests/parquet/arrow_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ use arrow::datatypes::{Date32Type, Date64Type};
use arrow_array::{
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array,
Int32Array, Int64Array, Int8Array, RecordBatch, StringArray, UInt64Array,
Int32Array, Int64Array, Int8Array, RecordBatch, StringArray, UInt16Array,
UInt32Array, UInt64Array, UInt8Array,
};
use arrow_schema::{DataType, Field, Schema};
use datafusion::datasource::physical_plan::parquet::{
Expand Down Expand Up @@ -703,8 +704,6 @@ async fn test_dates_64_diff_rg_sizes() {
.run();
}

// BUG:
// https://github.com/apache/datafusion/issues/10604
#[tokio::test]
async fn test_uint() {
// This creates a parquet files of 4 columns named "u8", "u16", "u32", "u64"
Expand All @@ -719,48 +718,40 @@ async fn test_uint() {
row_per_group: 4,
};

// u8
// BUG: expect UInt8Array but returns Int32Array
Test {
reader: reader.build().await,
expected_min: Arc::new(Int32Array::from(vec![0, 1, 4, 7, 251])), // shoudld be UInt8Array
expected_max: Arc::new(Int32Array::from(vec![3, 4, 6, 250, 254])), // shoudld be UInt8Array
expected_min: Arc::new(UInt8Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt8Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
expected_row_counts: UInt64Array::from(vec![4, 4, 4, 4, 4]),
column_name: "u8",
}
.run();

// u16
// BUG: expect UInt16Array but returns Int32Array
Test {
reader: reader.build().await,
expected_min: Arc::new(Int32Array::from(vec![0, 1, 4, 7, 251])), // shoudld be UInt16Array
expected_max: Arc::new(Int32Array::from(vec![3, 4, 6, 250, 254])), // shoudld be UInt16Array
expected_min: Arc::new(UInt16Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt16Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
expected_row_counts: UInt64Array::from(vec![4, 4, 4, 4, 4]),
column_name: "u16",
}
.run();

// u32
// BUG: expect UInt32Array but returns Int32Array
Test {
reader: reader.build().await,
expected_min: Arc::new(Int32Array::from(vec![0, 1, 4, 7, 251])), // shoudld be UInt32Array
expected_max: Arc::new(Int32Array::from(vec![3, 4, 6, 250, 254])), // shoudld be UInt32Array
expected_min: Arc::new(UInt32Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt32Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
expected_row_counts: UInt64Array::from(vec![4, 4, 4, 4, 4]),
column_name: "u32",
}
.run();

// u64
// BUG: expect UInt64rray but returns Int64Array
Test {
reader: reader.build().await,
expected_min: Arc::new(Int64Array::from(vec![0, 1, 4, 7, 251])), // shoudld be UInt64Array
expected_max: Arc::new(Int64Array::from(vec![3, 4, 6, 250, 254])), // shoudld be UInt64Array
expected_min: Arc::new(UInt64Array::from(vec![0, 1, 4, 7, 251])),
expected_max: Arc::new(UInt64Array::from(vec![3, 4, 6, 250, 254])),
expected_null_counts: UInt64Array::from(vec![0, 0, 0, 0, 0]),
expected_row_counts: UInt64Array::from(vec![4, 4, 4, 4, 4]),
column_name: "u64",
Expand Down Expand Up @@ -788,8 +779,6 @@ async fn test_int32_range() {
.run();
}

// BUG: not convert UInt32Array to Int32Array
// https://github.com/apache/datafusion/issues/10604
#[tokio::test]
async fn test_uint32_range() {
// This creates a parquet file of 1 column "u"
Expand All @@ -801,8 +790,8 @@ async fn test_uint32_range() {

Test {
reader: reader.build().await,
expected_min: Arc::new(Int32Array::from(vec![0])), // should be UInt32Array
expected_max: Arc::new(Int32Array::from(vec![300000])), // should be UInt32Array
expected_min: Arc::new(UInt32Array::from(vec![0])),
expected_max: Arc::new(UInt32Array::from(vec![300000])),
expected_null_counts: UInt64Array::from(vec![0]),
expected_row_counts: UInt64Array::from(vec![4]),
column_name: "u",
Expand All @@ -820,44 +809,45 @@ async fn test_numeric_limits_unsigned() {

Test {
reader: reader.build().await,
expected_min: Arc::new(Int8Array::from(vec![i8::MIN, -100])),
expected_max: Arc::new(Int8Array::from(vec![100, i8::MAX])),
expected_min: Arc::new(UInt8Array::from(vec![u8::MIN, 100])),
expected_max: Arc::new(UInt8Array::from(vec![100, u8::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
expected_row_counts: UInt64Array::from(vec![5, 2]),
column_name: "i8",
column_name: "u8",
}
.run();

Test {
reader: reader.build().await,
expected_min: Arc::new(Int16Array::from(vec![i16::MIN, -100])),
expected_max: Arc::new(Int16Array::from(vec![100, i16::MAX])),
expected_min: Arc::new(UInt16Array::from(vec![u16::MIN, 100])),
expected_max: Arc::new(UInt16Array::from(vec![100, u16::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
expected_row_counts: UInt64Array::from(vec![5, 2]),
column_name: "i16",
column_name: "u16",
}
.run();

Test {
reader: reader.build().await,
expected_min: Arc::new(Int32Array::from(vec![i32::MIN, -100])),
expected_max: Arc::new(Int32Array::from(vec![100, i32::MAX])),
expected_min: Arc::new(UInt32Array::from(vec![u32::MIN, 100])),
expected_max: Arc::new(UInt32Array::from(vec![100, u32::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
expected_row_counts: UInt64Array::from(vec![5, 2]),
column_name: "i32",
column_name: "u32",
}
.run();

Test {
reader: reader.build().await,
expected_min: Arc::new(Int64Array::from(vec![i64::MIN, -100])),
expected_max: Arc::new(Int64Array::from(vec![100, i64::MAX])),
expected_min: Arc::new(UInt64Array::from(vec![u64::MIN, 100])),
expected_max: Arc::new(UInt64Array::from(vec![100, u64::MAX])),
expected_null_counts: UInt64Array::from(vec![0, 0]),
expected_row_counts: UInt64Array::from(vec![5, 2]),
column_name: "i64",
column_name: "u64",
}
.run();
}

#[tokio::test]
async fn test_numeric_limits_signed() {
// file has 7 rows, 2 row groups: one with 5 rows, one with 2 rows.
Expand Down