From c904ee9c09066eb95fa875cb5ea3ca11bf2347ae Mon Sep 17 00:00:00 2001 From: mqy Date: Thu, 31 Dec 2020 18:49:27 +0800 Subject: [PATCH 1/4] ARROW-10996: [Rust] Return for get_arrow_schema_from_metadata --- rust/parquet/src/arrow/schema.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 22213d4f0db..7dd4983768f 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -47,7 +47,7 @@ pub fn parquet_to_arrow_schema( .map(|encoded| get_arrow_schema_from_metadata(&encoded)); match arrow_schema_metadata { - Some(Some(schema)) => Ok(schema), + Some(Ok(schema)) => Ok(schema), _ => parquet_to_arrow_schema_by_columns( parquet_schema, 0..parquet_schema.columns().len(), @@ -120,10 +120,14 @@ where T: IntoIterator, { let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default(); - let arrow_schema_metadata = metadata + let maybe_schema = metadata .remove(super::ARROW_SCHEMA_META_KEY) - .map(|encoded| get_arrow_schema_from_metadata(&encoded)) - .unwrap_or_default(); + .map(|encoded| get_arrow_schema_from_metadata(&encoded)); + + let arrow_schema_metadata = match maybe_schema { + Some(v) => Some(v?), + _ => None, + }; // add the Arrow metadata to the Parquet metadata if let Some(arrow_schema) = &arrow_schema_metadata { @@ -175,7 +179,7 @@ where } /// Try to convert Arrow schema metadata into a schema -fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option { +fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result { let decoded = base64::decode(encoded_meta); match decoded { Ok(bytes) => { @@ -187,28 +191,25 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option { match arrow::ipc::root_as_message(slice) { Ok(message) => message .header_as_schema() - .map(arrow::ipc::convert::fb_to_schema), + .map(arrow::ipc::convert::fb_to_schema) + .ok_or(ArrowError("the message is not Arrow Schema".to_string())), Err(err) => { // The flatbuffers implementation returns an error on verification error. - // TODO: return error to caller? - eprintln!( + Err(ArrowError(format!( "Unable to get root as message stored in {}: {:?}", super::ARROW_SCHEMA_META_KEY, err - ); - None + ))) } } } Err(err) => { // The C++ implementation returns an error if the schema can't be parsed. - // To prevent this, we explicitly log this, then compute the schema without the metadata - eprintln!( + Err(ArrowError(format!( "Unable to decode the encoded schema stored in {}, {:?}", super::ARROW_SCHEMA_META_KEY, err - ); - None + ))) } } } From 1e6082cb4eaff295f0e311ec2f3ffec2c4cdd99b Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 1 Jan 2021 15:40:08 +0800 Subject: [PATCH 2/4] Fix parquet_to_arrow_schema() --- rust/parquet/src/arrow/schema.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index 7dd4983768f..f6c9107a6a7 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -42,12 +42,17 @@ pub fn parquet_to_arrow_schema( key_value_metadata: &Option>, ) -> Result { let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default(); - let arrow_schema_metadata = metadata + let maybe_schema = metadata .remove(super::ARROW_SCHEMA_META_KEY) .map(|encoded| get_arrow_schema_from_metadata(&encoded)); + let arrow_schema_metadata = match maybe_schema { + Some(v) => Some(v?), + _ => None, + }; + match arrow_schema_metadata { - Some(Ok(schema)) => Ok(schema), + Some(schema) => Ok(schema), _ => parquet_to_arrow_schema_by_columns( parquet_schema, 0..parquet_schema.columns().len(), From 58ac5de3d301f448872d32fd33eba2cc8e30a9f7 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 1 Jan 2021 17:30:23 +0800 Subject: [PATCH 3/4] use map_or() --- rust/parquet/src/arrow/schema.rs | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index f6c9107a6a7..d5a52beca81 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -42,14 +42,10 @@ pub fn parquet_to_arrow_schema( key_value_metadata: &Option>, ) -> Result { let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default(); - let maybe_schema = metadata + let arrow_schema_metadata = metadata .remove(super::ARROW_SCHEMA_META_KEY) - .map(|encoded| get_arrow_schema_from_metadata(&encoded)); - - let arrow_schema_metadata = match maybe_schema { - Some(v) => Some(v?), - _ => None, - }; + .map(|encoded| get_arrow_schema_from_metadata(&encoded)) + .map_or(Ok(None), |v| v.map(Some))?; match arrow_schema_metadata { Some(schema) => Ok(schema), @@ -125,14 +121,10 @@ where T: IntoIterator, { let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default(); - let maybe_schema = metadata + let arrow_schema_metadata = metadata .remove(super::ARROW_SCHEMA_META_KEY) - .map(|encoded| get_arrow_schema_from_metadata(&encoded)); - - let arrow_schema_metadata = match maybe_schema { - Some(v) => Some(v?), - _ => None, - }; + .map(|encoded| get_arrow_schema_from_metadata(&encoded)) + .map_or(Ok(None), |v| v.map(Some))?; // add the Arrow metadata to the Parquet metadata if let Some(arrow_schema) = &arrow_schema_metadata { From 56c24ed8a73270e5af2f4ac34840eeab2bf79057 Mon Sep 17 00:00:00 2001 From: mqy Date: Fri, 1 Jan 2021 18:36:25 +0800 Subject: [PATCH 4/4] unwrap_or --- rust/parquet/src/arrow/schema.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index d5a52beca81..eb5e94d2682 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -42,19 +42,14 @@ pub fn parquet_to_arrow_schema( key_value_metadata: &Option>, ) -> Result { let mut metadata = parse_key_value_metadata(key_value_metadata).unwrap_or_default(); - let arrow_schema_metadata = metadata + metadata .remove(super::ARROW_SCHEMA_META_KEY) .map(|encoded| get_arrow_schema_from_metadata(&encoded)) - .map_or(Ok(None), |v| v.map(Some))?; - - match arrow_schema_metadata { - Some(schema) => Ok(schema), - _ => parquet_to_arrow_schema_by_columns( + .unwrap_or(parquet_to_arrow_schema_by_columns( parquet_schema, 0..parquet_schema.columns().len(), key_value_metadata, - ), - } + )) } /// Convert parquet schema to arrow schema including optional metadata,