Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 136 additions & 2 deletions parquet-variant-compute/src/cast_to_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,36 @@ macro_rules! decimal_to_variant_decimal {
};
}

/// Convert arrays that don't need generic type parameters
macro_rules! cast_conversion_nongeneric {
($method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
let array = $input.$method();
for i in 0..array.len() {
if array.is_null(i) {
$builder.append_null();
continue;
}
let cast_value = $cast_fn(array.value(i));
$builder.append_variant(Variant::from(cast_value));
}
}};
}

/// Convert string arrays using the offset size as the type parameter
macro_rules! cast_conversion_string {
($offset_type:ty, $method:ident, $cast_fn:expr, $input:expr, $builder:expr) => {{
let array = $input.$method::<$offset_type>();
for i in 0..array.len() {
if array.is_null(i) {
$builder.append_null();
continue;
}
let cast_value = $cast_fn(array.value(i));
$builder.append_variant(Variant::from(cast_value));
}
}};
}

/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when you
/// need to convert a specific data type
///
Expand Down Expand Up @@ -211,7 +241,7 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
let mut builder = VariantArrayBuilder::new(input.len());

let input_type = input.data_type();
// todo: handle other types like Boolean, Strings, Date, Timestamp, etc.
// todo: handle other types like Boolean, Date, Timestamp, etc.
match input_type {
DataType::Boolean => {
non_generic_conversion!(as_boolean, |v| v, input, builder);
Expand Down Expand Up @@ -328,6 +358,15 @@ pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
.to_string(),
));
}
DataType::Utf8 => {
cast_conversion_string!(i32, as_string, |v| v, input, builder);
}
DataType::LargeUtf8 => {
cast_conversion_string!(i64, as_string, |v| v, input, builder);
}
DataType::Utf8View => {
cast_conversion_nongeneric!(as_string_view, |v| v, input, builder);
}
dt => {
return Err(ArrowError::CastError(format!(
"Unsupported data type for casting to Variant: {dt:?}",
Expand All @@ -348,7 +387,8 @@ mod tests {
ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, GenericByteBuilder,
GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array,
IntervalYearMonthArray, NullArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
IntervalYearMonthArray, LargeStringArray, NullArray, StringArray, StringViewArray,
UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
use arrow_schema::{
DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
Expand Down Expand Up @@ -1152,6 +1192,100 @@ mod tests {
)
}

#[test]
fn test_cast_to_variant_utf8() {
// Test with short strings (should become ShortString variants)
let short_strings = vec![Some("hello"), Some(""), None, Some("world"), Some("test")];
let string_array = StringArray::from(short_strings.clone());

run_test(
Arc::new(string_array),
vec![
Some(Variant::from("hello")),
Some(Variant::from("")),
None,
Some(Variant::from("world")),
Some(Variant::from("test")),
],
);

// Test with a long string (should become String variant)
let long_string = "a".repeat(100); // > 63 bytes, so will be Variant::String
let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
let string_array = StringArray::from(long_strings);

run_test(
Arc::new(string_array),
vec![
Some(Variant::from(long_string.as_str())),
None,
Some(Variant::from("short")),
],
);
}

#[test]
fn test_cast_to_variant_large_utf8() {
// Test with short strings (should become ShortString variants)
let short_strings = vec![Some("hello"), Some(""), None, Some("world")];
let string_array = LargeStringArray::from(short_strings.clone());

run_test(
Arc::new(string_array),
vec![
Some(Variant::from("hello")),
Some(Variant::from("")),
None,
Some(Variant::from("world")),
],
);

// Test with a long string (should become String variant)
let long_string = "b".repeat(100); // > 63 bytes, so will be Variant::String
let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
let string_array = LargeStringArray::from(long_strings);

run_test(
Arc::new(string_array),
vec![
Some(Variant::from(long_string.as_str())),
None,
Some(Variant::from("short")),
],
);
}

#[test]
fn test_cast_to_variant_utf8_view() {
// Test with short strings (should become ShortString variants)
let short_strings = vec![Some("hello"), Some(""), None, Some("world")];
let string_view_array = StringViewArray::from(short_strings.clone());

run_test(
Arc::new(string_view_array),
vec![
Some(Variant::from("hello")),
Some(Variant::from("")),
None,
Some(Variant::from("world")),
],
);

// Test with a long string (should become String variant)
let long_string = "c".repeat(100); // > 63 bytes, so will be Variant::String
let long_strings = vec![Some(long_string.clone()), None, Some("short".to_string())];
let string_view_array = StringViewArray::from(long_strings);

run_test(
Arc::new(string_view_array),
vec![
Some(Variant::from(long_string.as_str())),
None,
Some(Variant::from("short")),
],
);
}

/// Converts the given `Array` to a `VariantArray` and tests the conversion
/// against the expected values. It also tests the handling of nulls by
/// setting one element to null and verifying the output.
Expand Down
4 changes: 2 additions & 2 deletions parquet-variant-compute/src/variant_array_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ pub struct VariantArrayVariantBuilder<'a> {
variant_builder: VariantBuilder,
}

impl<'a> VariantBuilderExt for VariantArrayVariantBuilder<'a> {
impl VariantBuilderExt for VariantArrayVariantBuilder<'_> {
fn append_value<'m, 'v>(&mut self, value: impl Into<Variant<'m, 'v>>) {
self.variant_builder.append_value(value);
}
Expand Down Expand Up @@ -300,7 +300,7 @@ impl<'a> VariantArrayVariantBuilder<'a> {
}
}

impl<'a> Drop for VariantArrayVariantBuilder<'a> {
impl Drop for VariantArrayVariantBuilder<'_> {
/// If the builder was not finished, roll back any changes made to the
/// underlying buffers (by truncating them)
fn drop(&mut self) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ impl<'a, T: ArrowPrimitiveVariant> PrimitiveOutputBuilder<'a, T> {
}
}

impl<'a, T: ArrowPrimitiveVariant> OutputBuilder for PrimitiveOutputBuilder<'a, T> {
impl<T: ArrowPrimitiveVariant> OutputBuilder for PrimitiveOutputBuilder<'_, T> {
fn partially_shredded(
&self,
variant_array: &VariantArray,
Expand Down
2 changes: 1 addition & 1 deletion parquet-variant-compute/src/variant_get/output/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl<'a> VariantOutputBuilder<'a> {
}
}

impl<'a> OutputBuilder for VariantOutputBuilder<'a> {
impl OutputBuilder for VariantOutputBuilder<'_> {
fn partially_shredded(
&self,
variant_array: &VariantArray,
Expand Down
Loading