From c6ad47c0d2d81e3df41462eaca8dde84d5c3e835 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Tue, 28 Jan 2020 21:07:23 -0500 Subject: [PATCH 1/4] Updates `from_raw_parts` to be unsafe --- rust/arrow/src/array/array.rs | 6 ++--- rust/arrow/src/buffer.rs | 47 +++++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index e6c56440848..a8c593037a3 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -2825,7 +2825,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_primitive_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build(); Int32Array::from(array_data); @@ -2835,7 +2835,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_list_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let values: [i32; 8] = [0; 8]; @@ -2855,7 +2855,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_binary_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let values: [u8; 12] = [0; 12]; diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 597f34e0f81..c0aeaa4de31 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -98,14 +98,34 @@ impl Debug for BufferData { impl Buffer { /// Creates a buffer from an existing memory region (must already be byte-aligned), and this - /// buffer will free this piece of memory when dropped. - pub fn from_raw_parts(ptr: *const u8, len: usize) -> Self { + /// `Buffer` will free this piece of memory when dropped. + /// + /// # Arguments + /// + /// * `ptr` - Pointer to raw parts + /// * `len` - Length of raw parts in **bytes** + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + pub unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self { Buffer::build_with_arguments(ptr, len, true) } /// Creates a buffer from an existing memory region (must already be byte-aligned), and this - /// buffers doesn't free this piece of memory when dropped. - pub fn from_unowned(ptr: *const u8, len: usize) -> Self { + /// `Buffer` **does not** free this piece of memory when dropped. + /// + /// # Arguments + /// + /// * `ptr` - Pointer to raw parts + /// * `len` - Length of raw parts in **bytes** + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + pub unsafe fn from_unowned(ptr: *const u8, len: usize) -> Self { Buffer::build_with_arguments(ptr, len, false) } @@ -113,11 +133,16 @@ impl Buffer { /// /// # Arguments /// - /// * `ptr` - Pointer to raw parts. + /// * `ptr` - Pointer to raw parts /// * `len` - Length of raw parts in bytes - /// * `owned` - Whether the raw parts is owned by this buffer. If true, this buffer will free - /// this memory when dropped, otherwise it will skip freeing the raw parts. - fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self { + /// * `owned` - Whether the raw parts is owned by this `Buffer`. If true, this `Buffer` will + /// free this memory when dropped, otherwise it will skip freeing the raw parts. + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + unsafe fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self { assert!( memory::is_aligned(ptr, memory::ALIGNMENT), "memory not aligned" @@ -178,7 +203,7 @@ impl Buffer { /// Returns an empty buffer. pub fn empty() -> Self { - Self::from_raw_parts(::std::ptr::null(), 0) + unsafe { Self::from_raw_parts(::std::ptr::null(), 0) } } } @@ -202,8 +227,8 @@ impl> From for Buffer { let buffer = memory::allocate_aligned(capacity); unsafe { memory::memcpy(buffer, slice.as_ptr(), len); + Buffer::from_raw_parts(buffer, len) } - Buffer::from_raw_parts(buffer, len) } } @@ -552,7 +577,7 @@ mod tests { #[test] fn test_from_raw_parts() { - let buf = Buffer::from_raw_parts(null_mut(), 0); + let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0) }; assert_eq!(0, buf.len()); assert_eq!(0, buf.data().len()); assert!(buf.raw_data().is_null()); From cffcc0b8005e7ca02ecb7c80ccb02db45f0642e0 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 10 Feb 2020 15:50:31 -0500 Subject: [PATCH 2/4] Makes `typed_data` unsafe. --- rust/arrow/src/buffer.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index c0aeaa4de31..c0a597b7e86 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -97,7 +97,7 @@ impl Debug for BufferData { } impl Buffer { - /// Creates a buffer from an existing memory region (must already be byte-aligned), and this + /// Creates a buffer from an existing memory region (must already be byte-aligned), this /// `Buffer` will free this piece of memory when dropped. /// /// # Arguments @@ -113,7 +113,7 @@ impl Buffer { Buffer::build_with_arguments(ptr, len, true) } - /// Creates a buffer from an existing memory region (must already be byte-aligned), and this + /// Creates a buffer from an existing memory region (must already be byte-aligned), this /// `Buffer` **does not** free this piece of memory when dropped. /// /// # Arguments @@ -129,7 +129,7 @@ impl Buffer { Buffer::build_with_arguments(ptr, len, false) } - /// Creates a buffer from an existing memory region (must already be byte-aligned) + /// Creates a buffer from an existing memory region (must already be byte-aligned). /// /// # Arguments /// @@ -190,15 +190,22 @@ impl Buffer { } /// View buffer as typed slice. - pub fn typed_data(&self) -> &[T] { + /// + /// # Safety + /// + /// `ArrowNativeType` is public so that it can be used as a trait bound for other public + /// components, such as the `ToByteSlice`. However, this means that it can be implemented + /// by user defined types, which it is not intended for. + /// + /// Also `typed_data::` is unsafe as `0x00` and `0x01` are the only valid values for + /// `bool` in Rust. However, `bool` arrays in Arrow are bit-packed which breaks this condition. + pub unsafe fn typed_data(&self) -> &[T] { assert_eq!(self.len() % mem::size_of::(), 0); assert!(memory::is_ptr_aligned::(self.raw_data() as *const T)); - unsafe { - from_raw_parts( - mem::transmute::<*const u8, *const T>(self.raw_data()), - self.len() / mem::size_of::(), - ) - } + from_raw_parts( + mem::transmute::<*const u8, *const T>(self.raw_data()), + self.len() / mem::size_of::(), + ) } /// Returns an empty buffer. @@ -797,7 +804,7 @@ mod tests { macro_rules! check_as_typed_data { ($input: expr, $native_t: ty) => {{ let buffer = Buffer::from($input.to_byte_slice()); - let slice: &[$native_t] = buffer.typed_data::<$native_t>(); + let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() }; assert_eq!($input, slice); }}; } From d02fa8bbc5a997bb4a407faa3b77bfc26ee9785a Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 10 Feb 2020 15:53:55 -0500 Subject: [PATCH 3/4] Fixed typo. --- rust/arrow/src/buffer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index c0a597b7e86..f51b56afdcc 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -194,8 +194,8 @@ impl Buffer { /// # Safety /// /// `ArrowNativeType` is public so that it can be used as a trait bound for other public - /// components, such as the `ToByteSlice`. However, this means that it can be implemented - /// by user defined types, which it is not intended for. + /// components, such as the `ToByteSlice` trait. However, this means that it can be + /// implemented by user defined types, which it is not intended for. /// /// Also `typed_data::` is unsafe as `0x00` and `0x01` are the only valid values for /// `bool` in Rust. However, `bool` arrays in Arrow are bit-packed which breaks this condition. From 963c177d7f49658f301bd0a088efe99941eff229 Mon Sep 17 00:00:00 2001 From: Paddy Horan Date: Mon, 10 Feb 2020 16:08:43 -0500 Subject: [PATCH 4/4] Fixes calls to unsafe functions --- rust/parquet/src/arrow/array_reader.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index dbca001707b..9437f216ba1 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -232,11 +232,15 @@ impl ArrayReader for PrimitiveArrayReader { } fn get_def_levels(&self) -> Option<&[i16]> { - self.def_levels_buffer.as_ref().map(|buf| buf.typed_data()) + self.def_levels_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } fn get_rep_levels(&self) -> Option<&[i16]> { - self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data()) + self.rep_levels_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } } @@ -579,11 +583,15 @@ impl ArrayReader for StructArrayReader { } fn get_def_levels(&self) -> Option<&[i16]> { - self.def_level_buffer.as_ref().map(|buf| buf.typed_data()) + self.def_level_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } fn get_rep_levels(&self) -> Option<&[i16]> { - self.rep_level_buffer.as_ref().map(|buf| buf.typed_data()) + self.rep_level_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } }