Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,12 +1551,10 @@ def _temp_path():
.skip_category('JS') # TODO(ARROW-8716)
.skip_category('Rust'),

generate_nested_case()
.skip_category('Rust'),
generate_nested_case(),

generate_recursive_nested_case()
.skip_category('Go') # TODO(ARROW-8453)
.skip_category('Rust'),
.skip_category('Go'), # TODO(ARROW-8453)

generate_nested_large_offsets_case()
.skip_category('Go')
Expand All @@ -1571,12 +1569,11 @@ def _temp_path():
generate_custom_metadata_case()
.skip_category('Go')
.skip_category('JS')
.skip_category('Rust'),
.skip_category('Rust'), # TODO(ARROW-10259)

generate_duplicate_fieldnames_case()
.skip_category('Go')
.skip_category('JS')
.skip_category('Rust'),
.skip_category('JS'),

# TODO(ARROW-3039, ARROW-5267): Dictionaries in GO
generate_dictionary_case()
Expand Down
32 changes: 16 additions & 16 deletions docs/source/status.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,52 +30,52 @@ Data Types
| Data type | C++ | Java | Go | JavaScript | C# | Rust |
| (primitive) | | | | | | |
+===================+=======+=======+=======+============+=======+=======+
| Null | ✓ | ✓ | | | | |
| Null | ✓ | ✓ | | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Boolean | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Boolean | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Int8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Int8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
| UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Float16 | | | ✓ | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Decimal128 | ✓ | ✓ | | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Date32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Date32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Time32/64 | ✓ | ✓ | ✓ | ✓ | | |
| Time32/64 | ✓ | ✓ | ✓ | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Timestamp | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Timestamp | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Duration | ✓ | ✓ | ✓ | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Interval | ✓ | ✓ | ✓ | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Fixed Size Binary | ✓ | ✓ | ✓ | ✓ | | |
| Fixed Size Binary | ✓ | ✓ | ✓ | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Binary | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Binary | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Large Binary | ✓ | ✓ | ✓ | ✓ | | |
| Large Binary | ✓ | ✓ | ✓ | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Large Utf8 | ✓ | ✓ | ✓ | ✓ | | |
| Large Utf8 | ✓ | ✓ | ✓ | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+

+-------------------+-------+-------+-------+------------+-------+-------+
| Data type | C++ | Java | Go | JavaScript | C# | Rust |
| (nested) | | | | | | |
+===================+=======+=======+=======+============+=======+=======+
| Fixed Size List | ✓ | ✓ | ✓ | ✓ | | |
| Fixed Size List | ✓ | ✓ | ✓ | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| List | ✓ | ✓ | ✓ | ✓ | ✓ | |
| List | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Large List | ✓ | ✓ | | | | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Struct | ✓ | ✓ | ✓ | ✓ | ✓ | |
| Struct | ✓ | ✓ | ✓ | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+
| Map | ✓ | ✓ | | ✓ | | |
+-------------------+-------+-------+-------+------------+-------+-------+
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/array/array_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ mod tests {
.build();
let int_data = ArrayData::builder(DataType::Int64)
.len(4)
.add_buffer(Buffer::from([42, 28, 19, 31].to_byte_slice()))
.add_buffer(Buffer::from([42i64, 28, 19, 31].to_byte_slice()))
.build();
let mut field_types = vec![];
field_types.push(Field::new("a", DataType::Boolean, false));
Expand Down
60 changes: 7 additions & 53 deletions rust/arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ use crate::datatypes::DataType;
use crate::util::bit_util;
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};

use super::equal::equal;

#[inline]
fn count_nulls(null_bit_buffer: Option<&Buffer>, offset: usize, len: usize) -> usize {
if let Some(ref buf) = null_bit_buffer {
Expand Down Expand Up @@ -245,59 +247,10 @@ impl ArrayData {

impl PartialEq for ArrayData {
fn eq(&self, other: &Self) -> bool {
assert_eq!(
self.data_type(),
other.data_type(),
"Data types not the same"
);
assert_eq!(self.len(), other.len(), "Lengths not the same");
// TODO: when adding tests for this, test that we can compare with arrays that have offsets
assert_eq!(self.offset(), other.offset(), "Offsets not the same");
assert_eq!(self.null_count(), other.null_count());
// compare buffers excluding padding
let self_buffers = self.buffers();
let other_buffers = other.buffers();
assert_eq!(self_buffers.len(), other_buffers.len());
self_buffers.iter().zip(other_buffers).for_each(|(s, o)| {
compare_buffer_regions(
s,
self.offset(), // TODO mul by data length
o,
other.offset(), // TODO mul by data len
);
});
// assert_eq!(self.buffers(), other.buffers());

assert_eq!(self.child_data(), other.child_data());
// null arrays can skip the null bitmap, thus only compare if there are no nulls
if self.null_count() != 0 || other.null_count() != 0 {
compare_buffer_regions(
self.null_buffer().unwrap(),
self.offset(),
other.null_buffer().unwrap(),
other.offset(),
)
}
true
equal(self, other)
}
}

/// A helper to compare buffer regions of 2 buffers.
/// Compares the length of the shorter buffer.
fn compare_buffer_regions(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
) {
// for convenience, we assume that the buffer lengths are only unequal if one has padding,
// so we take the shorter length so we can discard the padding from the longer length
let shorter_len = left.len().min(right.len());
let s_sliced = left.bit_slice(left_offset, shorter_len);
let o_sliced = right.bit_slice(right_offset, shorter_len);
assert_eq!(s_sliced, o_sliced);
}

/// Builder for `ArrayData` type
#[derive(Debug)]
pub struct ArrayDataBuilder {
Expand Down Expand Up @@ -388,6 +341,7 @@ mod tests {
use std::sync::Arc;

use crate::buffer::Buffer;
use crate::datatypes::ToByteSlice;
use crate::util::bit_util;

#[test]
Expand All @@ -403,16 +357,16 @@ mod tests {

#[test]
fn test_builder() {
let v = vec![0, 1, 2, 3];
let child_arr_data = Arc::new(ArrayData::new(
DataType::Int32,
10,
5,
Some(0),
None,
0,
vec![],
vec![Buffer::from([1i32, 2, 3, 4, 5].to_byte_slice())],
vec![],
));
let v = vec![0, 1, 2, 3];
let b1 = Buffer::from(&v[..]);
let arr_data = ArrayData::builder(DataType::Int32)
.len(20)
Expand Down
6 changes: 5 additions & 1 deletion rust/arrow/src/array/equal/variable_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ pub(super) fn variable_sized_equal<T: OffsetSizeTrait>(
let lhs_values = &lhs.buffers()[1].data()[lhs.offset()..];
let rhs_values = &rhs.buffers()[1].data()[rhs.offset()..];

if lhs.null_count() == 0 && rhs.null_count() == 0 {
if lhs.null_count() == 0
&& rhs.null_count() == 0
&& !lhs_values.is_empty()
&& !rhs_values.is_empty()
{
offset_value_equal(
lhs_values,
rhs_values,
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct BufferData {

impl PartialEq for BufferData {
fn eq(&self, other: &BufferData) -> bool {
if self.capacity != other.capacity {
if self.len != other.len {
return false;
}

Expand Down
16 changes: 16 additions & 0 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
(Timestamp(_, _), Date32(_)) => true,
(Timestamp(_, _), Date64(_)) => true,
// date64 to timestamp might not make sense,
(Int64, Duration(_)) => true,
(Null, Int32) => true,
(_, _) => false,
}
Expand Down Expand Up @@ -751,6 +752,21 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
}
}
// date64 to timestamp might not make sense,
(Int64, Duration(to_unit)) => {
use TimeUnit::*;
match to_unit {
Second => cast_array_data::<DurationSecondType>(array, to_type.clone()),
Millisecond => {
cast_array_data::<DurationMillisecondType>(array, to_type.clone())
}
Microsecond => {
cast_array_data::<DurationMicrosecondType>(array, to_type.clone())
}
Nanosecond => {
cast_array_data::<DurationNanosecondType>(array, to_type.clone())
}
}
}

// null to primitive/flat types
(Null, Int32) => Ok(Arc::new(Int32Array::from(vec![None; array.len()]))),
Expand Down
35 changes: 27 additions & 8 deletions rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,7 @@ impl<T: ArrowNativeType> ToByteSlice for T {

impl DataType {
/// Parse a data type from a JSON representation
fn from(json: &Value) -> Result<DataType> {
pub(crate) fn from(json: &Value) -> Result<DataType> {
let default_field = Field::new("", DataType::Boolean, true);
match *json {
Value::Object(ref map) => match map.get("name") {
Expand Down Expand Up @@ -1489,7 +1489,7 @@ impl fmt::Display for Field {
pub struct Schema {
pub(crate) fields: Vec<Field>,
/// A map of key-value pairs containing additional meta data.
#[serde(default)]
#[serde(skip_serializing_if = "HashMap::is_empty")]
pub(crate) metadata: HashMap<String, String>,
}

Expand Down Expand Up @@ -1696,9 +1696,24 @@ impl Schema {
}

/// Parse a `metadata` definition from a JSON representation
/// The JSON can either be an Object or an Array of Objects
fn from_metadata(json: &Value) -> Result<HashMap<String, String>> {
if let Value::Object(md) = json {
md.iter()
match json {
Value::Array(_) => {
let mut hashmap = HashMap::new();
let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
.map_err(|_| {
ArrowError::JsonError(
"Unable to parse object into key-value pair".to_string(),
)
})?;
for meta in values {
hashmap.insert(meta.key.clone(), meta.value);
}
Ok(hashmap)
}
Value::Object(md) => md
.iter()
.map(|(k, v)| {
if let Value::String(v) = v {
Ok((k.to_string(), v.to_string()))
Expand All @@ -1708,11 +1723,10 @@ impl Schema {
))
}
})
.collect::<Result<_>>()
} else {
Err(ArrowError::ParseError(
.collect::<Result<_>>(),
_ => Err(ArrowError::ParseError(
"`metadata` field must be an object".to_string(),
))
)),
}
}
}
Expand All @@ -1733,6 +1747,11 @@ impl fmt::Display for Schema {
/// A reference-counted reference to a [`Schema`](crate::datatypes::Schema).
pub type SchemaRef = Arc<Schema>;

#[derive(Deserialize)]
struct MetadataKeyValue {
key: String,
value: String,
}
#[cfg(test)]
mod tests {
use super::*;
Expand Down
6 changes: 6 additions & 0 deletions rust/arrow/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ impl From<::std::string::FromUtf8Error> for ArrowError {
}
}

impl From<serde_json::Error> for ArrowError {
fn from(error: serde_json::Error) -> Self {
ArrowError::JsonError(error.to_string())
}
}

impl Display for ArrowError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down
8 changes: 5 additions & 3 deletions rust/arrow/src/ipc/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ pub(crate) fn build_field<'a: 'b, 'b>(
field: &Field,
) -> WIPOffset<ipc::Field<'b>> {
let fb_field_name = fbb.create_string(field.name().as_str());
let field_type = get_fb_field_type(field.data_type(), fbb);
let field_type = get_fb_field_type(field.data_type(), field.is_nullable(), fbb);

let fb_dictionary = if let Dictionary(index_type, _) = field.data_type() {
Some(get_fb_dictionary(
Expand Down Expand Up @@ -342,6 +342,7 @@ pub(crate) fn build_field<'a: 'b, 'b>(
/// Get the IPC type of a data type
pub(crate) fn get_fb_field_type<'a: 'b, 'b>(
data_type: &DataType,
is_nullable: bool,
fbb: &mut FlatBufferBuilder<'a>,
) -> FBFieldType<'b> {
// some IPC implementations expect an empty list for child data, instead of a null value.
Expand Down Expand Up @@ -558,7 +559,8 @@ pub(crate) fn get_fb_field_type<'a: 'b, 'b>(
// struct's fields are children
let mut children = vec![];
for field in fields {
let inner_types = get_fb_field_type(field.data_type(), fbb);
let inner_types =
get_fb_field_type(field.data_type(), field.is_nullable(), fbb);
let field_name = fbb.create_string(field.name());
children.push(ipc::Field::create(
fbb,
Expand All @@ -583,7 +585,7 @@ pub(crate) fn get_fb_field_type<'a: 'b, 'b>(
// In this library, the dictionary "type" is a logical construct. Here we
// pass through to the value type, as we've already captured the index
// type in the DictionaryEncoding metadata in the parent field
get_fb_field_type(value_type, fbb)
get_fb_field_type(value_type, is_nullable, fbb)
}
t => unimplemented!("Type {:?} not supported", t),
}
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/ipc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ pub use self::gen::Schema::*;
pub use self::gen::SparseTensor::*;
pub use self::gen::Tensor::*;

static ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
static CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
Loading