Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 79 additions & 9 deletions datafusion/spark/src/function/map/map_from_arrays.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@ use crate::function::map::utils::{
};
use arrow::array::{Array, ArrayRef, NullArray};
use arrow::compute::kernels::cast;
use arrow::datatypes::DataType;
use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::utils::take_function_args;
use datafusion_common::Result;
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
use datafusion_common::{internal_err, Result};
use datafusion_expr::{
ColumnarValue, ReturnFieldArgs, ScalarUDFImpl, Signature, Volatility,
};
use datafusion_functions::utils::make_scalar_function;
use std::sync::Arc;

/// Spark-compatible `map_from_arrays` expression
/// <https://spark.apache.org/docs/latest/api/sql/index.html#map_from_arrays>
Expand Down Expand Up @@ -63,12 +66,23 @@ impl ScalarUDFImpl for MapFromArrays {
&self.signature
}

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
let [key_type, value_type] = take_function_args("map_from_arrays", arg_types)?;
Ok(map_type_from_key_value_types(
get_element_type(key_type)?,
get_element_type(value_type)?,
))
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
internal_err!("return_field_from_args should be used instead")
}

fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
let [keys_field, values_field] = args.arg_fields else {
return internal_err!("map_from_arrays expects exactly 2 arguments");
};

let map_type = map_type_from_key_value_types(
get_element_type(keys_field.data_type())?,
get_element_type(values_field.data_type())?,
);
// Spark marks map_from_arrays as null intolerant, so the output is
// nullable if either input is nullable.
let nullable = keys_field.is_nullable() || values_field.is_nullable();
Ok(Arc::new(Field::new(self.name(), map_type, nullable)))
}

fn invoke_with_args(
Expand Down Expand Up @@ -103,3 +117,59 @@ fn map_from_arrays_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
values.nulls(),
)
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::datatypes::Field;
use datafusion_expr::ReturnFieldArgs;

#[test]
fn test_map_from_arrays_nullability_and_type() {
let func = MapFromArrays::new();

let keys_field: FieldRef = Arc::new(Field::new(
"keys",
DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
false,
));
let values_field: FieldRef = Arc::new(Field::new(
"values",
DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
false,
));

let out = func
.return_field_from_args(ReturnFieldArgs {
arg_fields: &[Arc::clone(&keys_field), Arc::clone(&values_field)],
scalar_arguments: &[None, None],
})
.expect("return_field_from_args should succeed");

let expected_type =
map_type_from_key_value_types(&DataType::Int32, &DataType::Utf8);
assert_eq!(out.data_type(), &expected_type);
assert!(
!out.is_nullable(),
"map_from_arrays should be non-nullable when both inputs are non-nullable"
);

let nullable_keys: FieldRef = Arc::new(Field::new(
"keys",
DataType::List(Arc::new(Field::new("item", DataType::Int32, false))),
true,
));

let out_nullable = func
.return_field_from_args(ReturnFieldArgs {
arg_fields: &[nullable_keys, values_field],
scalar_arguments: &[None, None],
})
.expect("return_field_from_args should succeed");

assert!(
out_nullable.is_nullable(),
"map_from_arrays should be nullable when any input is nullable"
);
}
}