From 75c96b399aedc76626759ddf7801d9cc93fe6109 Mon Sep 17 00:00:00 2001 From: "Carol (Nichols || Goulding)" Date: Fri, 25 Sep 2020 11:05:17 -0400 Subject: [PATCH] [Rust] Update arrow schema encoding with 0.15 IPC alignment changes In 75f804efbfe367175fef5a2238d9cd2d30ed3afe, schema_to_bytes was changed to take IpcWriteOptions and to return EncodedData. This updates encode_arrow_schema to use those changes, which should get the rust-parquet-arrow-writer branch compiling and passing tests again. --- rust/parquet/src/arrow/schema.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index d4cfe1f4772..d5a0ff9ca08 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -27,6 +27,7 @@ use std::collections::{HashMap, HashSet}; use std::rc::Rc; use arrow::datatypes::{DataType, DateUnit, Field, Schema, TimeUnit}; +use arrow::ipc::writer; use crate::basic::{LogicalType, Repetition, Type as PhysicalType}; use crate::errors::{ParquetError::ArrowError, Result}; @@ -120,15 +121,16 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option { /// Encodes the Arrow schema into the IPC format, and base64 encodes it fn encode_arrow_schema(schema: &Schema) -> String { - let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema); + let options = writer::IpcWriteOptions::default(); + let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema, &options); // manually prepending the length to the schema as arrow uses the legacy IPC format // TODO: change after addressing ARROW-9777 - let schema_len = serialized_schema.len(); + let schema_len = serialized_schema.ipc_message.len(); let mut len_prefix_schema = Vec::with_capacity(schema_len + 8); len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]); len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut()); - len_prefix_schema.append(&mut serialized_schema); + len_prefix_schema.append(&mut serialized_schema.ipc_message); base64::encode(&len_prefix_schema) }