From f0ec2f852b73bc18856d85df68aec119eff71d19 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 5 Apr 2021 12:26:47 +0200 Subject: [PATCH 1/3] process comments @pitrou --- rust/arrow/src/ffi.rs | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index 3a6d031ebd8..4f752b829a0 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -81,7 +81,7 @@ use std::{ ffi::CStr, ffi::CString, iter, - mem::{size_of, ManuallyDrop}, + mem::size_of, os::raw::c_char, ptr::{self, NonNull}, sync::Arc, @@ -93,6 +93,11 @@ use crate::datatypes::{DataType, Field, TimeUnit}; use crate::error::{ArrowError, Result}; use crate::util::bit_util; +const FLAG_NONE: i64 = 0; +const FLAG_DICTIONARY_ORDERED: i64 = 1; +const FLAG_NULLABLE: i64 = 2; +const FLAG_MAP_KEYS_SORTED: i64 = 4; + /// ABI-compatible struct for `ArrowSchema` from C Data Interface /// See /// This was created by bindgen @@ -135,13 +140,14 @@ impl FFI_ArrowSchema { let n_children = children.len() as i64; let children_ptr = children.as_ptr() as *mut *mut FFI_ArrowSchema; - let flags = if nullable { 2 } else { 0 }; + let flags = if nullable { FLAG_NULLABLE } else { FLAG_NONE }; let private_data = Box::new(SchemaPrivateData { children }); // FFI_ArrowSchema { format: CString::new(format).unwrap().into_raw(), - // For child data a non null string is expected and is called item + // TODO: get the field name for general nested data + // For List child data a non null string is expected and is called item name: CString::new("item").unwrap().into_raw(), metadata: std::ptr::null_mut(), flags, @@ -159,7 +165,7 @@ impl FFI_ArrowSchema { format: std::ptr::null_mut(), name: std::ptr::null_mut(), metadata: std::ptr::null_mut(), - flags: 0, + flags: FLAG_NONE, n_children: 0, children: ptr::null_mut(), dictionary: std::ptr::null_mut(), @@ -221,15 +227,13 @@ fn to_datatype( // at that point the child data is not yet known, but it is also not required to determine // the buffer length of the list arrays. "+l" => { - let nullable = schema.flags == 2; + let nullable = (schema.flags & FLAG_NULLABLE) != 0; // Safety // Should be set as this is expected from the C FFI definition debug_assert!(!schema.name.is_null()); - let name = unsafe { CString::from_raw(schema.name as *mut c_char) } - .into_string() + let name = unsafe { CStr::from_ptr(schema.name as *const c_char) } + .to_str() .unwrap(); - // prevent a double free - let name = ManuallyDrop::new(name); DataType::List(Box::new(Field::new( &name, child_type.unwrap_or(DataType::Null), @@ -237,15 +241,13 @@ fn to_datatype( ))) } "+L" => { - let nullable = schema.flags == 2; + let nullable = (schema.flags & FLAG_NULLABLE) != 0; // Safety // Should be set as this is expected from the C FFI definition debug_assert!(!schema.name.is_null()); - let name = unsafe { CString::from_raw(schema.name as *mut c_char) } - .into_string() + let name = unsafe { CStr::from_ptr(schema.name as *const c_char) } + .to_str() .unwrap(); - // prevent a double free - let name = ManuallyDrop::new(name); DataType::LargeList(Box::new(Field::new( &name, child_type.unwrap_or(DataType::Null), @@ -395,6 +397,12 @@ unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { // take ownership of `private_data`, therefore dropping it Box::from_raw(array.private_data as *mut PrivateData); + // release children + for i in 0..array.n_children { + let child = *array.children.add(i as usize); + release_array(child); + } + array.release = None; } @@ -654,7 +662,7 @@ impl ArrowArray { debug_assert_eq!(bits % 8, 0); (self.array.length as usize + 1) * (bits / 8) } - (DataType::Utf8, 2) | (DataType::Binary, 2) | (DataType::List(_), 2) => { + (DataType::Utf8, 2) | (DataType::Binary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) let len = self.buffer_len(1)?; // first buffer is the null buffer => add(1) @@ -666,9 +674,7 @@ impl ArrowArray { // get last offset (unsafe { *offset_buffer.add(len / size_of::() - 1) }) as usize } - (DataType::LargeUtf8, 2) - | (DataType::LargeBinary, 2) - | (DataType::LargeList(_), 2) => { + (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => { // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1) let len = self.buffer_len(1)?; // first buffer is the null buffer => add(1) From b23289cd1ef43ca91df1f74631f8817ffe165c55 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Mon, 5 Apr 2021 12:46:48 +0200 Subject: [PATCH 2/3] properly recurse all nested data --- rust/arrow/src/array/ffi.rs | 78 ++++++++++++------------------- rust/arrow/src/zz_memory_check.rs | 2 +- 2 files changed, 32 insertions(+), 48 deletions(-) diff --git a/rust/arrow/src/array/ffi.rs b/rust/arrow/src/array/ffi.rs index 450685bf522..92cbc26bf1a 100644 --- a/rust/arrow/src/array/ffi.rs +++ b/rust/arrow/src/array/ffi.rs @@ -26,7 +26,6 @@ use crate::{ use super::ArrayData; use crate::datatypes::DataType; -use crate::ffi::ArrowArray; impl TryFrom for ArrayData { type Error = ArrowError; @@ -60,62 +59,47 @@ impl TryFrom for ArrayData { } } +fn array_data_to_arrow_array( + value: ArrayData, + nullable: bool, +) -> Result { + let len = value.len(); + let offset = value.offset() as usize; + let null_count = value.null_count(); + let buffers = value.buffers().to_vec(); + let null_buffer = value.null_buffer().cloned(); + let child_data = value + .child_data() + .iter() + .map(|arr| array_data_to_arrow_array(arr.clone(), nullable)) + .collect::>>()?; + + unsafe { + ffi::ArrowArray::try_new( + value.data_type(), + len, + null_count, + null_buffer, + offset, + buffers, + child_data, + nullable, + ) + } +} + impl TryFrom for ffi::ArrowArray { type Error = ArrowError; fn try_from(value: ArrayData) -> Result { // If parent is nullable, then children also must be nullable - // so we pass this nullable to the creation of hte child data + // so we pass this nullable to the creation of the child data let nullable = match value.data_type() { DataType::List(field) => field.is_nullable(), DataType::LargeList(field) => field.is_nullable(), _ => false, }; - - let len = value.len(); - let offset = value.offset() as usize; - let null_count = value.null_count(); - let buffers = value.buffers().to_vec(); - let null_buffer = value.null_buffer().cloned(); - let child_data = value - .child_data() - .iter() - .map(|arr| { - let len = arr.len(); - let offset = arr.offset() as usize; - let null_count = arr.null_count(); - let buffers = arr.buffers().to_vec(); - let null_buffer = arr.null_buffer().cloned(); - - // Note: the nullable comes from the parent data. - unsafe { - ArrowArray::try_new( - arr.data_type(), - len, - null_count, - null_buffer, - offset, - buffers, - vec![], - nullable, - ) - .expect("infallible") - } - }) - .collect::>(); - - unsafe { - ffi::ArrowArray::try_new( - value.data_type(), - len, - null_count, - null_buffer, - offset, - buffers, - child_data, - nullable, - ) - } + array_data_to_arrow_array(value, nullable) } } diff --git a/rust/arrow/src/zz_memory_check.rs b/rust/arrow/src/zz_memory_check.rs index 70ec8ebdbdd..cf92887bd39 100644 --- a/rust/arrow/src/zz_memory_check.rs +++ b/rust/arrow/src/zz_memory_check.rs @@ -21,7 +21,7 @@ #[cfg(feature = "memory-check")] mod tests { - use crate::memory::ALLOCATIONS; + use crate::alloc::ALLOCATIONS; // verify that there is no data un-allocated #[test] From ea499dda26a13cab932c3a76320f8d20fd674022 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 8 Apr 2021 14:30:16 +0200 Subject: [PATCH 3/3] update release functions --- rust/arrow/src/ffi.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index 4f752b829a0..61838dee1fe 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -121,6 +121,14 @@ unsafe extern "C" fn release_schema(schema: *mut FFI_ArrowSchema) { // take ownership back to release it. CString::from_raw(schema.format as *mut std::os::raw::c_char); + CString::from_raw(schema.name as *mut std::os::raw::c_char); + + // release children + for i in 0..schema.n_children { + let child_ptr = *schema.children.add(i as usize); + let child = &*child_ptr; + child.release.map(|release| release(child_ptr)); + } schema.release = None; } @@ -399,8 +407,9 @@ unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) { // release children for i in 0..array.n_children { - let child = *array.children.add(i as usize); - release_array(child); + let child_ptr = *array.children.add(i as usize); + let child = &*child_ptr; + child.release.map(|release| release(child_ptr)); } array.release = None;