diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs index 3d5d8f35198..8cfa22515b6 100644 --- a/rust/arrow/src/record_batch.rs +++ b/rust/arrow/src/record_batch.rs @@ -65,7 +65,7 @@ impl RecordBatch { } if columns[i].data_type() != schema.field(i).data_type() { return Err(ArrowError::InvalidArgumentError(format!( - "column types must match schema types, expected {:?} but found {:?} at column index {}", + "column types must match schema types, expected {:?} but found {:?} at column index {}", schema.field(i).data_type(), columns[i].data_type(), i))); @@ -130,9 +130,6 @@ impl Into for RecordBatch { } } -unsafe impl Send for RecordBatch {} -unsafe impl Sync for RecordBatch {} - /// Definition of record batch reader. pub trait RecordBatchReader { /// Returns schemas of this record batch reader. diff --git a/rust/parquet/src/column/reader.rs b/rust/parquet/src/column/reader.rs index 887443f40be..8dc81ee6653 100644 --- a/rust/parquet/src/column/reader.rs +++ b/rust/parquet/src/column/reader.rs @@ -20,7 +20,6 @@ use std::{ cmp::{max, min}, collections::HashMap, - mem, }; use super::page::{Page, PageReader}; @@ -90,21 +89,16 @@ pub fn get_column_reader( /// Gets a typed column reader for the specific type `T`, by "up-casting" `col_reader` of /// non-generic type to a generic column reader type `ColumnReaderImpl`. /// -/// NOTE: the caller MUST guarantee that the actual enum value for `col_reader` matches -/// the type `T`. Otherwise, disastrous consequence could happen. +/// Panics if actual enum value for `col_reader` does not match the type `T`. pub fn get_typed_column_reader( col_reader: ColumnReader, ) -> ColumnReaderImpl { - match col_reader { - ColumnReader::BoolColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::Int32ColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::Int64ColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::Int96ColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::FloatColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::DoubleColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::ByteArrayColumnReader(r) => unsafe { mem::transmute(r) }, - ColumnReader::FixedLenByteArrayColumnReader(r) => unsafe { mem::transmute(r) }, - } + T::get_column_reader(col_reader).unwrap_or_else(|| { + panic!( + "Failed to convert column reader into a typed column reader for `{}` type", + T::get_physical_type() + ) + }) } /// Typed value reader for a particular primitive column. diff --git a/rust/parquet/src/column/writer.rs b/rust/parquet/src/column/writer.rs index 1be6b80073a..6a634b9f467 100644 --- a/rust/parquet/src/column/writer.rs +++ b/rust/parquet/src/column/writer.rs @@ -17,7 +17,7 @@ //! Contains column writer API. -use std::{cmp, collections::VecDeque, mem, rc::Rc}; +use std::{cmp, collections::VecDeque, rc::Rc}; use crate::basic::{Compression, Encoding, PageType, Type}; use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter}; @@ -98,57 +98,40 @@ pub fn get_column_writer( /// Gets a typed column writer for the specific type `T`, by "up-casting" `col_writer` of /// non-generic type to a generic column writer type `ColumnWriterImpl`. /// -/// NOTE: the caller MUST guarantee that the actual enum value for `col_writer` matches -/// the type `T`. Otherwise, disastrous consequence could happen. +/// Panics if actual enum value for `col_writer` does not match the type `T`. pub fn get_typed_column_writer( col_writer: ColumnWriter, ) -> ColumnWriterImpl { - match col_writer { - ColumnWriter::BoolColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int32ColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int64ColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int96ColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::FloatColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::DoubleColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::ByteArrayColumnWriter(r) => unsafe { mem::transmute(r) }, - ColumnWriter::FixedLenByteArrayColumnWriter(r) => unsafe { mem::transmute(r) }, - } + T::get_column_writer(col_writer).unwrap_or_else(|| { + panic!( + "Failed to convert column writer into a typed column writer for `{}` type", + T::get_physical_type() + ) + }) } /// Similar to `get_typed_column_writer` but returns a reference. pub fn get_typed_column_writer_ref( col_writer: &ColumnWriter, ) -> &ColumnWriterImpl { - match col_writer { - ColumnWriter::BoolColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int32ColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int64ColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int96ColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::FloatColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::DoubleColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::ByteArrayColumnWriter(ref r) => unsafe { mem::transmute(r) }, - ColumnWriter::FixedLenByteArrayColumnWriter(ref r) => unsafe { - mem::transmute(r) - }, - } + T::get_column_writer_ref(col_writer).unwrap_or_else(|| { + panic!( + "Failed to convert column writer into a typed column writer for `{}` type", + T::get_physical_type() + ) + }) } /// Similar to `get_typed_column_writer` but returns a reference. pub fn get_typed_column_writer_mut( col_writer: &mut ColumnWriter, ) -> &mut ColumnWriterImpl { - match col_writer { - ColumnWriter::BoolColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int32ColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int64ColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::Int96ColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::FloatColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::DoubleColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::ByteArrayColumnWriter(ref mut r) => unsafe { mem::transmute(r) }, - ColumnWriter::FixedLenByteArrayColumnWriter(ref mut r) => unsafe { - mem::transmute(r) - }, - } + T::get_column_writer_mut(col_writer).unwrap_or_else(|| { + panic!( + "Failed to convert column writer into a typed column writer for `{}` type", + T::get_physical_type() + ) + }) } /// Typed column writer for a primitive column. diff --git a/rust/parquet/src/data_type.rs b/rust/parquet/src/data_type.rs index ef0299a79d2..d7e20fa5533 100644 --- a/rust/parquet/src/data_type.rs +++ b/rust/parquet/src/data_type.rs @@ -23,6 +23,8 @@ use std::mem; use byteorder::{BigEndian, ByteOrder}; use crate::basic::Type; +use crate::column::reader::{ColumnReader, ColumnReaderImpl}; +use crate::column::writer::{ColumnWriter, ColumnWriterImpl}; use crate::errors::{ParquetError, Result}; use crate::util::memory::{ByteBuffer, ByteBufferPtr}; use std::str::from_utf8; @@ -376,10 +378,30 @@ pub trait DataType: 'static { /// Returns size in bytes for Rust representation of the physical type. fn get_type_size() -> usize; + + fn get_column_reader(column_writer: ColumnReader) -> Option> + where + Self: Sized; + + fn get_column_writer(column_writer: ColumnWriter) -> Option> + where + Self: Sized; + + fn get_column_writer_ref( + column_writer: &ColumnWriter, + ) -> Option<&ColumnWriterImpl> + where + Self: Sized; + + fn get_column_writer_mut( + column_writer: &mut ColumnWriter, + ) -> Option<&mut ColumnWriterImpl> + where + Self: Sized; } macro_rules! make_type { - ($name:ident, $physical_ty:path, $native_ty:ty, $size:expr) => { + ($name:ident, $physical_ty:path, $reader_ident: ident, $writer_ident: ident, $native_ty:ty, $size:expr) => { pub struct $name {} impl DataType for $name { @@ -392,27 +414,109 @@ macro_rules! make_type { fn get_type_size() -> usize { $size } + + fn get_column_reader( + column_writer: ColumnReader, + ) -> Option> { + match column_writer { + ColumnReader::$reader_ident(w) => Some(w), + _ => None, + } + } + + fn get_column_writer( + column_writer: ColumnWriter, + ) -> Option> { + match column_writer { + ColumnWriter::$writer_ident(w) => Some(w), + _ => None, + } + } + + fn get_column_writer_ref( + column_writer: &ColumnWriter, + ) -> Option<&ColumnWriterImpl> { + match column_writer { + ColumnWriter::$writer_ident(w) => Some(w), + _ => None, + } + } + + fn get_column_writer_mut( + column_writer: &mut ColumnWriter, + ) -> Option<&mut ColumnWriterImpl> { + match column_writer { + ColumnWriter::$writer_ident(w) => Some(w), + _ => None, + } + } } }; } // Generate struct definitions for all physical types -make_type!(BoolType, Type::BOOLEAN, bool, 1); -make_type!(Int32Type, Type::INT32, i32, 4); -make_type!(Int64Type, Type::INT64, i64, 8); -make_type!(Int96Type, Type::INT96, Int96, mem::size_of::()); -make_type!(FloatType, Type::FLOAT, f32, 4); -make_type!(DoubleType, Type::DOUBLE, f64, 8); +make_type!( + BoolType, + Type::BOOLEAN, + BoolColumnReader, + BoolColumnWriter, + bool, + 1 +); +make_type!( + Int32Type, + Type::INT32, + Int32ColumnReader, + Int32ColumnWriter, + i32, + 4 +); +make_type!( + Int64Type, + Type::INT64, + Int64ColumnReader, + Int64ColumnWriter, + i64, + 8 +); +make_type!( + Int96Type, + Type::INT96, + Int96ColumnReader, + Int96ColumnWriter, + Int96, + mem::size_of::() +); +make_type!( + FloatType, + Type::FLOAT, + FloatColumnReader, + FloatColumnWriter, + f32, + 4 +); +make_type!( + DoubleType, + Type::DOUBLE, + DoubleColumnReader, + DoubleColumnWriter, + f64, + 8 +); make_type!( ByteArrayType, Type::BYTE_ARRAY, + ByteArrayColumnReader, + ByteArrayColumnWriter, ByteArray, mem::size_of::() ); make_type!( FixedLenByteArrayType, Type::FIXED_LEN_BYTE_ARRAY, + FixedLenByteArrayColumnReader, + FixedLenByteArrayColumnWriter, ByteArray, mem::size_of::() );