diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
index 6c738cfe03a95..9d59808508604 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
@@ -160,6 +160,9 @@ macro_rules! get_statistic {
Some(DataType::Binary) => {
Some(ScalarValue::Binary(Some(s.$bytes_func().to_vec())))
}
+ Some(DataType::LargeBinary) => {
+ Some(ScalarValue::LargeBinary(Some(s.$bytes_func().to_vec())))
+ }
Some(DataType::LargeUtf8) | _ => {
let utf8_value = std::str::from_utf8(s.$bytes_func())
.map(|s| s.to_string())
@@ -427,8 +430,9 @@ mod test {
use arrow_array::{
new_null_array, Array, BinaryArray, BooleanArray, Date32Array, Date64Array,
Decimal128Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
- Int8Array, RecordBatch, StringArray, StructArray, TimestampMicrosecondArray,
- TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
+ Int8Array, LargeBinaryArray, RecordBatch, StringArray, StructArray,
+ TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+ TimestampSecondArray,
};
use arrow_schema::{Field, SchemaRef};
use bytes::Bytes;
@@ -965,6 +969,34 @@ mod test {
.run()
}
+ #[test]
+ fn roundtrip_large_binary_array() {
+ let input: Vec