diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index 8d17354839a8a..bbdd46af5d01f 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -117,6 +117,9 @@ macro_rules! get_statistic { *scale, )) } + Some(DataType::Binary) => { + Some(ScalarValue::Binary(Some(s.$bytes_func().to_vec()))) + } _ => { let s = std::str::from_utf8(s.$bytes_func()) .map(|s| s.to_string()) @@ -644,10 +647,6 @@ mod test { } #[test] - #[should_panic( - expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Utf8, got Binary(NULL)" - )] - // Due to https://github.com/apache/datafusion/issues/8295 fn roundtrip_binary() { Test { input: Arc::new(BinaryArray::from_opt_vec(vec![ diff --git a/datafusion/core/tests/parquet/arrow_statistics.rs b/datafusion/core/tests/parquet/arrow_statistics.rs index db687a3777a4f..e5aadf21310e2 100644 --- a/datafusion/core/tests/parquet/arrow_statistics.rs +++ b/datafusion/core/tests/parquet/arrow_statistics.rs @@ -24,9 +24,9 @@ use std::sync::Arc; use arrow::compute::kernels::cast_utils::Parser; use arrow::datatypes::{Date32Type, Date64Type}; use arrow_array::{ - make_array, Array, ArrayRef, BooleanArray, Date32Array, Date64Array, Decimal128Array, - FixedSizeBinaryArray, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, - RecordBatch, StringArray, UInt64Array, + make_array, Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array, + Decimal128Array, FixedSizeBinaryArray, Float64Array, Int16Array, Int32Array, + Int64Array, Int8Array, RecordBatch, StringArray, UInt64Array, }; use arrow_schema::{DataType, Field, Schema}; use datafusion::datasource::physical_plan::parquet::{ @@ -905,18 +905,17 @@ async fn test_byte() { .run(); // column "service_binary" + + let expected_service_binary_min_values: Vec<&[u8]> = + vec![b"frontend five", b"backend one", b"backend eight"]; + + let expected_service_binary_max_values: Vec<&[u8]> = + vec![b"frontend two", b"frontend six", b"backend six"]; + Test { reader: reader.build().await, - expected_min: Arc::new(StringArray::from(vec![ - "frontend five", - "backend one", - "backend eight", - ])), // Shuld be BinaryArray - expected_max: Arc::new(StringArray::from(vec![ - "frontend two", - "frontend six", - "backend six", - ])), // Shuld be BinaryArray + expected_min: Arc::new(BinaryArray::from(expected_service_binary_min_values)), // Shuld be BinaryArray + expected_max: Arc::new(BinaryArray::from(expected_service_binary_max_values)), // Shuld be BinaryArray expected_null_counts: UInt64Array::from(vec![0, 0, 0]), expected_row_counts: UInt64Array::from(vec![5, 5, 5]), column_name: "service_binary",