From 3fc5b508883a5e255927baf9f881697f0227d66a Mon Sep 17 00:00:00 2001 From: alamb Date: Mon, 26 Oct 2020 11:14:28 -0400 Subject: [PATCH 1/4] ARROW-10390: [Rust][Parquet] Ensure it is possible to create custom parquet writers --- rust/parquet/tests/custom_writer.rs | 78 +++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 rust/parquet/tests/custom_writer.rs diff --git a/rust/parquet/tests/custom_writer.rs b/rust/parquet/tests/custom_writer.rs new file mode 100644 index 00000000000..c01c148e248 --- /dev/null +++ b/rust/parquet/tests/custom_writer.rs @@ -0,0 +1,78 @@ +use std::{io::{SeekFrom, prelude::*}, rc::Rc, fs}; +use std::fs::File; + +use std::env; +use parquet::{schema::types, basic::Repetition, file::properties::WriterProperties, file::writer::SerializedFileWriter}; +use parquet::util::io::TryClone; + + +// Test creating some sort of custom writer to ensure the +// appropriate traits are exposed +struct CustomWriter { + file: File, +} + +impl Write for CustomWriter{ + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.file.write(buf) + } + fn flush(&mut self) -> std::io::Result<()> { + self.file.flush() + } + +} + +impl Seek for CustomWriter { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + self.file.seek(pos) + } + +} + +impl TryClone for CustomWriter { + fn try_clone(&self) -> std::io::Result { + use std::io::{Error, ErrorKind}; + Err(Error::new(ErrorKind::Other, "Clone not supported")) + } + +} + +#[test] +fn test_custom_writer() { + let schema = Rc::new( + types::Type::group_type_builder("schema") + .with_fields(&mut vec![Rc::new( + types::Type::primitive_type_builder("col1", types::Type::INT32) + .with_repetition(Repetition::REQUIRED) + .build() + .unwrap(), + )]) + .build() + .unwrap(), + ); + let props = Rc::new(WriterProperties::builder().build()); + + + + let file = get_temp_file("test_custom_file_writer", &[]); + let writer = CustomWriter { file }; + + // test is that this file can be created + let mut file_writer = + SerializedFileWriter::new(writer, schema, props).unwrap(); + file_writer.close().unwrap(); +} + + +/// Returns file handle for a temp file in 'target' directory with a provided content +pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File { + // build tmp path to a file in "target/debug/testdata" + let mut path_buf = env::current_dir().unwrap(); + path_buf.push("target"); + path_buf.push("debug"); + path_buf.push("testdata"); + fs::create_dir_all(&path_buf).unwrap(); + path_buf.push(file_name); + + File::create(path_buf).unwrap() +} From 65862b9dfba31393153a27372205e2db8561deb9 Mon Sep 17 00:00:00 2001 From: alamb Date: Mon, 26 Oct 2020 11:22:18 -0400 Subject: [PATCH 2/4] Expose TryClone --- rust/parquet/src/file/writer.rs | 3 ++- rust/parquet/tests/custom_writer.rs | 39 ++++++++++++++++------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs index 3509a23ce1d..c6535865625 100644 --- a/rust/parquet/src/file/writer.rs +++ b/rust/parquet/src/file/writer.rs @@ -38,7 +38,7 @@ use crate::file::{ statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC, }; use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}; -use crate::util::io::{FileSink, Position, TryClone}; +use crate::util::io::{FileSink, Position}; // ---------------------------------------------------------------------- // APIs for file & row group writers @@ -114,6 +114,7 @@ pub trait RowGroupWriter { // ---------------------------------------------------------------------- // Serialized impl for file & row group writers +pub use crate::util::io::TryClone; pub trait ParquetWriter: Write + Seek + TryClone {} impl ParquetWriter for T {} diff --git a/rust/parquet/tests/custom_writer.rs b/rust/parquet/tests/custom_writer.rs index c01c148e248..4c70c7cdb7d 100644 --- a/rust/parquet/tests/custom_writer.rs +++ b/rust/parquet/tests/custom_writer.rs @@ -1,10 +1,16 @@ -use std::{io::{SeekFrom, prelude::*}, rc::Rc, fs}; use std::fs::File; - +use std::{ + fs, + io::{prelude::*, SeekFrom}, + rc::Rc, +}; + +use parquet::file::writer::TryClone; +use parquet::{ + basic::Repetition, basic::Type, file::properties::WriterProperties, + file::writer::SerializedFileWriter, schema::types, +}; use std::env; -use parquet::{schema::types, basic::Repetition, file::properties::WriterProperties, file::writer::SerializedFileWriter}; -use parquet::util::io::TryClone; - // Test creating some sort of custom writer to ensure the // appropriate traits are exposed @@ -12,21 +18,19 @@ struct CustomWriter { file: File, } -impl Write for CustomWriter{ +impl Write for CustomWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.file.write(buf) } fn flush(&mut self) -> std::io::Result<()> { self.file.flush() } - } impl Seek for CustomWriter { fn seek(&mut self, pos: SeekFrom) -> std::io::Result { self.file.seek(pos) } - } impl TryClone for CustomWriter { @@ -34,7 +38,6 @@ impl TryClone for CustomWriter { use std::io::{Error, ErrorKind}; Err(Error::new(ErrorKind::Other, "Clone not supported")) } - } #[test] @@ -42,7 +45,7 @@ fn test_custom_writer() { let schema = Rc::new( types::Type::group_type_builder("schema") .with_fields(&mut vec![Rc::new( - types::Type::primitive_type_builder("col1", types::Type::INT32) + types::Type::primitive_type_builder("col1", Type::INT32) .with_repetition(Repetition::REQUIRED) .build() .unwrap(), @@ -52,20 +55,22 @@ fn test_custom_writer() { ); let props = Rc::new(WriterProperties::builder().build()); + let file = get_temp_file("test_custom_file_writer"); + let test_file = file.try_clone().unwrap(); - - let file = get_temp_file("test_custom_file_writer", &[]); let writer = CustomWriter { file }; // test is that this file can be created - let mut file_writer = - SerializedFileWriter::new(writer, schema, props).unwrap(); - file_writer.close().unwrap(); -} + let file_writer = SerializedFileWriter::new(writer, schema, props).unwrap(); + std::mem::drop(file_writer); + // ensure the file now exists and has non zero size + let metadata = test_file.metadata().unwrap(); + assert!(metadata.len() > 0); +} /// Returns file handle for a temp file in 'target' directory with a provided content -pub fn get_temp_file(file_name: &str, content: &[u8]) -> fs::File { +fn get_temp_file(file_name: &str) -> fs::File { // build tmp path to a file in "target/debug/testdata" let mut path_buf = env::current_dir().unwrap(); path_buf.push("target"); From 7579d8c82068460dc6b75986d206ff58321e710f Mon Sep 17 00:00:00 2001 From: alamb Date: Mon, 26 Oct 2020 11:51:47 -0400 Subject: [PATCH 3/4] Move import location --- rust/parquet/src/file/writer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs index c6535865625..c06fef4dc3f 100644 --- a/rust/parquet/src/file/writer.rs +++ b/rust/parquet/src/file/writer.rs @@ -40,6 +40,9 @@ use crate::file::{ use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}; use crate::util::io::{FileSink, Position}; +// Exposed publically so client code can implement ParquetWriter +pub use crate::util::io::TryClone; + // ---------------------------------------------------------------------- // APIs for file & row group writers @@ -114,7 +117,6 @@ pub trait RowGroupWriter { // ---------------------------------------------------------------------- // Serialized impl for file & row group writers -pub use crate::util::io::TryClone; pub trait ParquetWriter: Write + Seek + TryClone {} impl ParquetWriter for T {} From 45cb23f14e169ed0ac5ddd83f45ff5c0b86bceee Mon Sep 17 00:00:00 2001 From: alamb Date: Mon, 26 Oct 2020 14:28:48 -0400 Subject: [PATCH 4/4] Add copyright header, and add linked text --- rust/parquet/src/file/writer.rs | 2 +- rust/parquet/tests/custom_writer.rs | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/rust/parquet/src/file/writer.rs b/rust/parquet/src/file/writer.rs index c06fef4dc3f..c0b2bd968c4 100644 --- a/rust/parquet/src/file/writer.rs +++ b/rust/parquet/src/file/writer.rs @@ -40,7 +40,7 @@ use crate::file::{ use crate::schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}; use crate::util::io::{FileSink, Position}; -// Exposed publically so client code can implement ParquetWriter +// Exposed publically so client code can implement [`ParquetWriter`] pub use crate::util::io::TryClone; // ---------------------------------------------------------------------- diff --git a/rust/parquet/tests/custom_writer.rs b/rust/parquet/tests/custom_writer.rs index 4c70c7cdb7d..6146fc2c94e 100644 --- a/rust/parquet/tests/custom_writer.rs +++ b/rust/parquet/tests/custom_writer.rs @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + use std::fs::File; use std::{ fs,