From 90f72c0f5acdbb585fdac669818d342832e1159f Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Aug 2024 22:27:57 +0800 Subject: [PATCH 01/27] feat: Add map_extract module and function --- datafusion/common/src/utils/mod.rs | 26 ++- datafusion/expr-common/src/signature.rs | 12 ++ .../expr/src/type_coercion/functions.rs | 15 +- datafusion/functions-nested/src/lib.rs | 3 + .../functions-nested/src/map_extract.rs | 168 ++++++++++++++++++ 5 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 datafusion/functions-nested/src/map_extract.rs diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index bf506c0551eb6..34a3470cac911 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -34,7 +34,7 @@ use arrow_array::{ Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait, RecordBatchOptions, }; -use arrow_schema::DataType; +use arrow_schema::{DataType, Fields}; use sqlparser::ast::Ident; use sqlparser::dialect::GenericDialect; use sqlparser::parser::Parser; @@ -753,6 +753,30 @@ pub fn combine_limit( (combined_skip, combined_fetch) } +pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { + match data_type { + DataType::Map(filed, _) => { + let field_data_type = filed.data_type(); + match field_data_type { + DataType::Struct(fields) if fields.len() == 2 => Ok(fields), + DataType::Struct(fields) => { + _internal_err!( + "Expected a Struct type with 2 fields in Map, got {} fields", + fields.len() + ) + } + _ => { + return _internal_err!( + "Expected a Struct type, got {:?}", + field_data_type + ) + } + } + } + _ => return _internal_err!("Expected a Map type, got {:?}", data_type), + } +} + #[cfg(test)] mod tests { use crate::ScalarValue::Null; diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 4dcfa423e3718..16f2d164a80ed 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -146,6 +146,8 @@ pub enum ArrayFunctionSignature { /// Specialized Signature for MapArray /// The function takes a single argument that must be a MapArray MapArray, + /// Specialized Signature for MapExtract and similar functions + MapArrayAndKey, } impl std::fmt::Display for ArrayFunctionSignature { @@ -169,6 +171,9 @@ impl std::fmt::Display for ArrayFunctionSignature { ArrayFunctionSignature::MapArray => { write!(f, "map_array") } + ArrayFunctionSignature::MapArrayAndKey => { + write!(f, "map_array, key") + } } } } @@ -357,6 +362,13 @@ impl Signature { volatility, } } + /// Specialized Signature for MapArray and similar functions + pub fn map_array_and_key(volatility: Volatility) -> Self { + Signature { + type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::MapArrayAndKey), + volatility, + } + } } #[cfg(test)] diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 190374b01dd24..f94c7e977224a 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -22,7 +22,9 @@ use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, }; -use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; +use datafusion_common::utils::{ + coerced_fixed_size_list_to_list, get_map_entry_field, list_ndims, +}; use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, plan_err, Result, }; @@ -388,6 +390,17 @@ fn get_valid_types( _ => vec![vec![]], } } + ArrayFunctionSignature::MapArrayAndKey => { + if current_types.len() != 2 { + return Ok(vec![vec![]]); + } + + let field = get_map_entry_field(¤t_types[0])?; + vec![vec![ + current_types[0].clone(), + field.get(0).unwrap().data_type().clone(), + ]] + } }, TypeSignature::Any(number) => { if current_types.len() != *number { diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index ef2c5e709bc16..cc0a7b55cf866 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -42,6 +42,7 @@ pub mod flatten; pub mod length; pub mod make_array; pub mod map; +pub mod map_extract; pub mod planner; pub mod position; pub mod range; @@ -81,6 +82,7 @@ pub mod expr_fn { pub use super::flatten::flatten; pub use super::length::array_length; pub use super::make_array::make_array; + pub use super::map_extract::map_extract; pub use super::position::array_position; pub use super::position::array_positions; pub use super::range::gen_series; @@ -143,6 +145,7 @@ pub fn all_default_nested_functions() -> Vec> { replace::array_replace_all_udf(), replace::array_replace_udf(), map::map_udf(), + map_extract::map_extract_udf(), ] } diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs new file mode 100644 index 0000000000000..8d4b61f09c530 --- /dev/null +++ b/datafusion/functions-nested/src/map_extract.rs @@ -0,0 +1,168 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front and array_pop_back functions. + +use arrow::array::ArrayRef; + +use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt32Type}; +use arrow_array::{ + new_empty_array, Array, ArrowPrimitiveType, MapArray, PrimitiveArray, StringArray, +}; +use datafusion_common::cast::{as_primitive_array, as_string_array}; +use datafusion_common::utils::get_map_entry_field; +use datafusion_common::{cast::as_map_array, exec_err, Result}; +use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; + +use crate::utils::make_scalar_function; + +// Create static instances of ScalarUDFs for each function +make_udf_expr_and_func!( + MapExtract, + map_extract, + map key, + "Return a list containing the value for a given key or an empty list if the key is not contained in the map.", + map_extract_udf +); + +#[derive(Debug)] +pub(super) struct MapExtract { + signature: Signature, + aliases: Vec, +} + +impl MapExtract { + pub fn new() -> Self { + Self { + signature: Signature::map_array_and_key(Volatility::Immutable), + aliases: vec![String::from("element_at")], + } + } +} + +impl ScalarUDFImpl for MapExtract { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "map_extract" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + let map_type = &arg_types[0]; + let map_fields = get_map_entry_field(map_type)?; + Ok(map_fields[1].data_type().clone()) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + make_scalar_function(map_extract_inner)(args) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +fn map_extract_inner(args: &[ArrayRef]) -> Result { + if args.len() != 2 { + return exec_err!("map_extract expects two arguments"); + } + + let map_array = as_map_array(&args[0])?; + + if map_array.key_type() != args[1].data_type() { + return exec_err!( + "The key type {} does not match the map key type {}", + args[1].data_type(), + map_array.key_type() + ); + } + + let key_type = map_array.key_type(); + if key_type.is_integer() { + generic_map_extract_inner::( + &map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } else if key_type.is_floating() { + generic_map_extract_inner::( + &map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } else if key_type.is_unsigned_integer() { + generic_map_extract_inner::( + &map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } else if key_type == &DataType::Utf8 { + string_map_extract_inner( + &map_array, + as_string_array(map_array.keys())?, + as_string_array(&args[1])?, + ) + } else { + exec_err!("Unsupported key type: {:?}", args[1].data_type()) + } +} + +fn generic_map_extract_inner( + map_array: &MapArray, + keys_array: &PrimitiveArray, + query_keys_array: &PrimitiveArray, +) -> Result { + let mut values = new_empty_array(map_array.value_type()); + for index in 0..keys_array.len() { + let key = keys_array.value(index); + if key == query_keys_array.value(0) { + let map_values = map_array.values(); + if !map_values.is_null(index) { + values = map_values.slice(index, 1); + } + break; + } + } + + return Ok(values); +} + +fn string_map_extract_inner( + map_array: &MapArray, + keys_array: &StringArray, + query_keys_array: &StringArray, +) -> Result { + let mut values = new_empty_array(map_array.value_type()); + for index in 0..keys_array.len() { + let key = keys_array.value(index); + if key == query_keys_array.value(0) { + let map_values = map_array.values(); + if !map_values.is_null(index) { + values = map_values.slice(index, 1); + } + break; + } + } + + return Ok(values); +} From 7cf0003d09881d4cb5133aac132fb43e6f312a40 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Aug 2024 22:28:52 +0800 Subject: [PATCH 02/27] chore: Fix fmt --- datafusion/expr-common/src/signature.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 16f2d164a80ed..a394189b9a84d 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -365,7 +365,9 @@ impl Signature { /// Specialized Signature for MapArray and similar functions pub fn map_array_and_key(volatility: Volatility) -> Self { Signature { - type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::MapArrayAndKey), + type_signature: TypeSignature::ArraySignature( + ArrayFunctionSignature::MapArrayAndKey, + ), volatility, } } From b4898530a476d3dc62012b9aa6e6324de2f83aee Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Aug 2024 23:07:17 +0800 Subject: [PATCH 03/27] chore: Add tests --- .../functions-nested/src/map_extract.rs | 6 +++--- datafusion/sqllogictest/test_files/map.slt | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 8d4b61f09c530..7213456843de8 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -21,7 +21,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt32Type}; use arrow_array::{ - new_empty_array, Array, ArrowPrimitiveType, MapArray, PrimitiveArray, StringArray, + new_null_array, Array, ArrowPrimitiveType, MapArray, PrimitiveArray, StringArray, }; use datafusion_common::cast::{as_primitive_array, as_string_array}; use datafusion_common::utils::get_map_entry_field; @@ -132,7 +132,7 @@ fn generic_map_extract_inner( keys_array: &PrimitiveArray, query_keys_array: &PrimitiveArray, ) -> Result { - let mut values = new_empty_array(map_array.value_type()); + let mut values = new_null_array(map_array.value_type(), 1); for index in 0..keys_array.len() { let key = keys_array.value(index); if key == query_keys_array.value(0) { @@ -152,7 +152,7 @@ fn string_map_extract_inner( keys_array: &StringArray, query_keys_array: &StringArray, ) -> Result { - let mut values = new_empty_array(map_array.value_type()); + let mut values = new_null_array(map_array.value_type(), 1); for index in 0..keys_array.len() { let key = keys_array.value(index); if key == query_keys_array.value(0) { diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 0dc37c68bca4d..74e2a259cd310 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -493,3 +493,22 @@ select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b cardinality(MAP {'a': MAP {1:'a', 2:'b', 3:'c'}, 'b': MAP {2:'c', 4:'d'} }); ---- 3 2 0 2 + +# map_extract +query IIII +select map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'b'), + map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'd'); +---- +1 2 3 NULL + +query IIII +select map_extract(MAP {1: 1, 2: 2, 3:3}, 1), map_extract(MAP {1: 1, 2: 2, 3:3}, 2), + map_extract(MAP {1: 1, 2: 2, 3:3}, 3), map_extract(MAP {1: 1, 2: 2, 3:3}, 4); +---- +1 2 3 NULL + +# TODO: support Map key and value type coercion +# query II +# select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0); +# --- +# 1 1 \ No newline at end of file From eb78a581e829d4400cab2599f61b9dc8401e6225 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Aug 2024 23:15:55 +0800 Subject: [PATCH 04/27] chore: Simplify --- datafusion/functions-nested/src/map_extract.rs | 9 ++------- datafusion/sqllogictest/test_files/map.slt | 12 ++++++------ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 7213456843de8..b96bcd3267eb2 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -137,9 +137,7 @@ fn generic_map_extract_inner( let key = keys_array.value(index); if key == query_keys_array.value(0) { let map_values = map_array.values(); - if !map_values.is_null(index) { - values = map_values.slice(index, 1); - } + values = map_values.slice(index, 1); break; } } @@ -157,12 +155,9 @@ fn string_map_extract_inner( let key = keys_array.value(index); if key == query_keys_array.value(0) { let map_values = map_array.values(); - if !map_values.is_null(index) { - values = map_values.slice(index, 1); - } + values = map_values.slice(index, 1); break; } } - return Ok(values); } diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 74e2a259cd310..e52b7382bf033 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -496,16 +496,16 @@ select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b # map_extract query IIII -select map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'b'), - map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': 2, 'c': 3}, 'd'); +select map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'b'), + map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'd'); ---- -1 2 3 NULL +1 NULL 3 NULL query IIII -select map_extract(MAP {1: 1, 2: 2, 3:3}, 1), map_extract(MAP {1: 1, 2: 2, 3:3}, 2), - map_extract(MAP {1: 1, 2: 2, 3:3}, 3), map_extract(MAP {1: 1, 2: 2, 3:3}, 4); +select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, 3:3}, 2), + map_extract(MAP {1: 1, 2: NULL, 3:3}, 3), map_extract(MAP {1: 1, 2: NULL, 3:3}, 4); ---- -1 2 3 NULL +1 NULL 3 NULL # TODO: support Map key and value type coercion # query II From 92f633f72952a87c151a8d928d11f94dbd4249d2 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Tue, 13 Aug 2024 23:24:34 +0800 Subject: [PATCH 05/27] chore: Simplify --- .../functions-nested/src/map_extract.rs | 33 ++++++++----------- datafusion/sqllogictest/test_files/map.slt | 6 ++++ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index b96bcd3267eb2..1bf569a2e6094 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -132,17 +132,14 @@ fn generic_map_extract_inner( keys_array: &PrimitiveArray, query_keys_array: &PrimitiveArray, ) -> Result { - let mut values = new_null_array(map_array.value_type(), 1); - for index in 0..keys_array.len() { - let key = keys_array.value(index); - if key == query_keys_array.value(0) { - let map_values = map_array.values(); - values = map_values.slice(index, 1); - break; - } - } + let query_key = query_keys_array.value(0); + // key cannot be NULL, so we can unwrap + let index = keys_array.iter().position(|key| key.unwrap() == query_key); - return Ok(values); + match index { + Some(idx) => Ok(map_array.values().slice(idx, 1)), + None => Ok(new_null_array(map_array.value_type(), 1)), + } } fn string_map_extract_inner( @@ -150,14 +147,12 @@ fn string_map_extract_inner( keys_array: &StringArray, query_keys_array: &StringArray, ) -> Result { - let mut values = new_null_array(map_array.value_type(), 1); - for index in 0..keys_array.len() { - let key = keys_array.value(index); - if key == query_keys_array.value(0) { - let map_values = map_array.values(); - values = map_values.slice(index, 1); - break; - } + let query_key = query_keys_array.value(0); + // key cannot be NULL, so we can unwrap + let index = keys_array.iter().position(|key| key.unwrap() == query_key); + + match index { + Some(idx) => Ok(map_array.values().slice(idx, 1)), + None => Ok(new_null_array(map_array.value_type(), 1)), } - return Ok(values); } diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index e52b7382bf033..9e56a72a35fbc 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -507,6 +507,12 @@ select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, ---- 1 NULL 3 NULL +query ???? +select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 2), + map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 3), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 4); +---- +[1, 2] NULL [3] NULL + # TODO: support Map key and value type coercion # query II # select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0); From dc7df96bcf707924833cbb124daf28512eaa8770 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 11:24:15 +0800 Subject: [PATCH 06/27] chore: Fix clippy --- datafusion/common/src/utils/mod.rs | 7 ++----- datafusion/functions-nested/src/map_extract.rs | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 34a3470cac911..7aa9e2f1a1063 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -766,14 +766,11 @@ pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { ) } _ => { - return _internal_err!( - "Expected a Struct type, got {:?}", - field_data_type - ) + _internal_err!("Expected a Struct type, got {:?}", field_data_type) } } } - _ => return _internal_err!("Expected a Map type, got {:?}", data_type), + _ => _internal_err!("Expected a Map type, got {:?}", data_type), } } diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 1bf569a2e6094..e64a7ec387162 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -49,7 +49,8 @@ pub(super) struct MapExtract { impl MapExtract { pub fn new() -> Self { Self { - signature: Signature::map_array_and_key(Volatility::Immutable), + // signature: Signature::map_array_and_key(Volatility::Immutable), + signature: Signature::user_defined(Volatility::Immutable), aliases: vec![String::from("element_at")], } } From c55efd56f2165e968db88b8dcc6fcb7ef2497e6d Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 11:33:55 +0800 Subject: [PATCH 07/27] doc: Add user doc --- .../source/user-guide/sql/scalar_functions.md | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index c7490df04983e..03097636c23a0 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3640,6 +3640,7 @@ Unwraps struct fields into columns. - [map](#map) - [make_map](#make_map) +- [map_extract](#map_extract) ### `map` @@ -3700,6 +3701,36 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); {POST: 41, HEAD: 33, PATCH: } ``` +### `map_extract` + +Return a list containing the value for a given key or an empty list if the key is not contained in the map. + +``` +SELECT map_extract(map, key); +---- +1 +``` + +#### Arguments + +- `map`: Map expression. + Can be a constant, column, or function, and any combination of map operators. +- `key`: Key to extract from the map. + Can be a constant, column, or function, any combination of arithmetic or + string operators, or a named expression of previous listed. + +#### Example + +``` +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +1 +``` + +#### Aliases + +- element_at + ## Hashing Functions - [digest](#digest) From 3c28a06d8105f553f0a5725468b8ce8fbae13bf1 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 11:43:21 +0800 Subject: [PATCH 08/27] feat: use Signature::user_defined --- datafusion/expr-common/src/signature.rs | 14 -------------- datafusion/expr/src/type_coercion/functions.rs | 13 +------------ datafusion/functions-nested/src/map_extract.rs | 13 ++++++++++++- 3 files changed, 13 insertions(+), 27 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index a394189b9a84d..4dcfa423e3718 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -146,8 +146,6 @@ pub enum ArrayFunctionSignature { /// Specialized Signature for MapArray /// The function takes a single argument that must be a MapArray MapArray, - /// Specialized Signature for MapExtract and similar functions - MapArrayAndKey, } impl std::fmt::Display for ArrayFunctionSignature { @@ -171,9 +169,6 @@ impl std::fmt::Display for ArrayFunctionSignature { ArrayFunctionSignature::MapArray => { write!(f, "map_array") } - ArrayFunctionSignature::MapArrayAndKey => { - write!(f, "map_array, key") - } } } } @@ -362,15 +357,6 @@ impl Signature { volatility, } } - /// Specialized Signature for MapArray and similar functions - pub fn map_array_and_key(volatility: Volatility) -> Self { - Signature { - type_signature: TypeSignature::ArraySignature( - ArrayFunctionSignature::MapArrayAndKey, - ), - volatility, - } - } } #[cfg(test)] diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index f94c7e977224a..64dc4ee0a5bb9 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -23,7 +23,7 @@ use arrow::{ datatypes::{DataType, TimeUnit}, }; use datafusion_common::utils::{ - coerced_fixed_size_list_to_list, get_map_entry_field, list_ndims, + coerced_fixed_size_list_to_list, list_ndims, }; use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, plan_err, Result, @@ -390,17 +390,6 @@ fn get_valid_types( _ => vec![vec![]], } } - ArrayFunctionSignature::MapArrayAndKey => { - if current_types.len() != 2 { - return Ok(vec![vec![]]); - } - - let field = get_map_entry_field(¤t_types[0])?; - vec![vec![ - current_types[0].clone(), - field.get(0).unwrap().data_type().clone(), - ]] - } }, TypeSignature::Any(number) => { if current_types.len() != *number { diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index e64a7ec387162..b8a3980cc6a90 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -49,7 +49,6 @@ pub(super) struct MapExtract { impl MapExtract { pub fn new() -> Self { Self { - // signature: Signature::map_array_and_key(Volatility::Immutable), signature: Signature::user_defined(Volatility::Immutable), aliases: vec![String::from("element_at")], } @@ -81,6 +80,18 @@ impl ScalarUDFImpl for MapExtract { fn aliases(&self) -> &[String] { &self.aliases } + + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 2 { + return exec_err!("map_extract expects two arguments"); + } + + let field = get_map_entry_field(&arg_types[0])?; + Ok(vec![ + arg_types[0].clone(), + field.get(0).unwrap().data_type().clone(), + ]) + } } fn map_extract_inner(args: &[ArrayRef]) -> Result { From 0fff39ac0f16588a2bfc930578ba92e8a44e77f9 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 11:52:22 +0800 Subject: [PATCH 09/27] chore: Update tests --- datafusion/sqllogictest/test_files/map.slt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 9e56a72a35fbc..62cab79025f3e 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -495,26 +495,30 @@ select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b 3 2 0 2 # map_extract +# key is string query IIII select map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'b'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'd'); ---- 1 NULL 3 NULL +# key is integer query IIII select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, 3:3}, 2), map_extract(MAP {1: 1, 2: NULL, 3:3}, 3), map_extract(MAP {1: 1, 2: NULL, 3:3}, 4); ---- 1 NULL 3 NULL +# value is list query ???? select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 2), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 3), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 4); ---- [1, 2] NULL [3] NULL -# TODO: support Map key and value type coercion -# query II -# select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0); -# --- -# 1 1 \ No newline at end of file +# key in map and query key are different types +query IIII +select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0), + map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0); +---- +1 1 1 NULL \ No newline at end of file From 2ca8457372bc69f7acfdcd83d921fe6cbb43c468 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 11:54:40 +0800 Subject: [PATCH 10/27] chore: Fix fmt --- datafusion/expr/src/type_coercion/functions.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 64dc4ee0a5bb9..190374b01dd24 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -22,9 +22,7 @@ use arrow::{ compute::can_cast_types, datatypes::{DataType, TimeUnit}, }; -use datafusion_common::utils::{ - coerced_fixed_size_list_to_list, list_ndims, -}; +use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims}; use datafusion_common::{ exec_err, internal_datafusion_err, internal_err, plan_err, Result, }; From 9ac25f0ca6b8a0d7d55f9f74df200a8ede584e83 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 12:06:51 +0800 Subject: [PATCH 11/27] chore: Fix clippy --- datafusion/functions-nested/src/map_extract.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index b8a3980cc6a90..67eb669700d95 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -89,7 +89,7 @@ impl ScalarUDFImpl for MapExtract { let field = get_map_entry_field(&arg_types[0])?; Ok(vec![ arg_types[0].clone(), - field.get(0).unwrap().data_type().clone(), + field.first().unwrap().data_type().clone(), ]) } } @@ -112,25 +112,25 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { let key_type = map_array.key_type(); if key_type.is_integer() { generic_map_extract_inner::( - &map_array, + map_array, as_primitive_array::(map_array.keys())?, as_primitive_array::(&args[1])?, ) } else if key_type.is_floating() { generic_map_extract_inner::( - &map_array, + map_array, as_primitive_array::(map_array.keys())?, as_primitive_array::(&args[1])?, ) } else if key_type.is_unsigned_integer() { generic_map_extract_inner::( - &map_array, + map_array, as_primitive_array::(map_array.keys())?, as_primitive_array::(&args[1])?, ) } else if key_type == &DataType::Utf8 { string_map_extract_inner( - &map_array, + map_array, as_string_array(map_array.keys())?, as_string_array(&args[1])?, ) From 5547f5a6060363aec9982b0d1612906a4240ebac Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 12:16:08 +0800 Subject: [PATCH 12/27] chore --- datafusion/functions-nested/src/map_extract.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 67eb669700d95..41fbe121842cf 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front and array_pop_back functions. +//! [`ScalarUDFImpl`] definitions for map_extract functions. use arrow::array::ArrayRef; From bebb3b01783fa3d92e8cc666edcf0153ffff9f95 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:16:50 +0800 Subject: [PATCH 13/27] chore: typo --- datafusion/common/src/utils/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index 7aa9e2f1a1063..cb3b83dc52e08 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -755,8 +755,8 @@ pub fn combine_limit( pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { match data_type { - DataType::Map(filed, _) => { - let field_data_type = filed.data_type(); + DataType::Map(field, _) => { + let field_data_type = field.data_type(); match field_data_type { DataType::Struct(fields) if fields.len() == 2 => Ok(fields), DataType::Struct(fields) => { From 5779753b92f65a43dc74832f758a5a92b56a34a7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:18:49 +0800 Subject: [PATCH 14/27] chore: Check args len in return_type --- datafusion/functions-nested/src/map_extract.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 41fbe121842cf..b14d980993ac3 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -68,6 +68,9 @@ impl ScalarUDFImpl for MapExtract { } fn return_type(&self, arg_types: &[DataType]) -> Result { + if arg_types.len() != 2 { + return exec_err!("map_extract expects two arguments"); + } let map_type = &arg_types[0]; let map_fields = get_map_entry_field(map_type)?; Ok(map_fields[1].data_type().clone()) From f906e76b4d5eea933220b9eb0115dddc74c460c1 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:24:23 +0800 Subject: [PATCH 15/27] doc: Update doc --- datafusion/functions-nested/src/map_extract.rs | 2 +- docs/source/user-guide/sql/scalar_functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index b14d980993ac3..31078ddea7a97 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -36,7 +36,7 @@ make_udf_expr_and_func!( MapExtract, map_extract, map key, - "Return a list containing the value for a given key or an empty list if the key is not contained in the map.", + "Return a corresponding value from a map for a given key, or NULL if the key is not found.", map_extract_udf ); diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 03097636c23a0..9f8acc54dc05f 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3703,7 +3703,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); ### `map_extract` -Return a list containing the value for a given key or an empty list if the key is not contained in the map. +Return a corresponding value from a map for a given key, or NULL if the key is not found. ``` SELECT map_extract(map, key); From 64ef06ae74e49da5d65c323202670119cca1380d Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:29:00 +0800 Subject: [PATCH 16/27] chore: Simplify logic --- datafusion/common/src/utils/mod.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index cb3b83dc52e08..d7059e882e555 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -758,13 +758,7 @@ pub fn get_map_entry_field(data_type: &DataType) -> Result<&Fields> { DataType::Map(field, _) => { let field_data_type = field.data_type(); match field_data_type { - DataType::Struct(fields) if fields.len() == 2 => Ok(fields), - DataType::Struct(fields) => { - _internal_err!( - "Expected a Struct type with 2 fields in Map, got {} fields", - fields.len() - ) - } + DataType::Struct(fields) => Ok(fields), _ => { _internal_err!("Expected a Struct type, got {:?}", field_data_type) } From 3bd2b51d309d93ae88d8c5fc3723a7732374e50b Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:35:13 +0800 Subject: [PATCH 17/27] chore: check args earlier --- datafusion/functions-nested/src/map_extract.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 31078ddea7a97..2ee6a4845c9d0 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -102,17 +102,21 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { return exec_err!("map_extract expects two arguments"); } - let map_array = as_map_array(&args[0])?; + let map_array = match args[0].data_type() { + DataType::Map(_, _) => as_map_array(&args[0])?, + _ => return exec_err!("The first argument in map_extract must be a map"), + }; - if map_array.key_type() != args[1].data_type() { + let key_type = map_array.key_type(); + + if key_type != args[1].data_type() { return exec_err!( "The key type {} does not match the map key type {}", args[1].data_type(), - map_array.key_type() + key_type ); } - let key_type = map_array.key_type(); if key_type.is_integer() { generic_map_extract_inner::( map_array, From b6e31e857906c1edb6f9352b1bc4943111e56f21 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:56:43 +0800 Subject: [PATCH 18/27] feat: Support UTF8VIEW --- .../functions-nested/src/map_extract.rs | 78 +++++++++++++------ datafusion/sqllogictest/test_files/map.slt | 7 +- 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 2ee6a4845c9d0..cabe4719730c4 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -22,8 +22,11 @@ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt32Type}; use arrow_array::{ new_null_array, Array, ArrowPrimitiveType, MapArray, PrimitiveArray, StringArray, + StringViewArray, +}; +use datafusion_common::cast::{ + as_primitive_array, as_string_array, as_string_view_array, }; -use datafusion_common::cast::{as_primitive_array, as_string_array}; use datafusion_common::utils::get_map_entry_field; use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; @@ -117,32 +120,42 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { ); } - if key_type.is_integer() { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) - } else if key_type.is_floating() { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) - } else if key_type.is_unsigned_integer() { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) - } else if key_type == &DataType::Utf8 { - string_map_extract_inner( + match key_type { + DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => { + generic_map_extract_inner::( + map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { + generic_map_extract_inner::( + map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } + DataType::Float32 | DataType::Float64 => { + generic_map_extract_inner::( + map_array, + as_primitive_array::(map_array.keys())?, + as_primitive_array::(&args[1])?, + ) + } + DataType::Utf8 => string_map_extract_inner( map_array, - as_string_array(map_array.keys())?, + as_string_array(&map_array.keys())?, as_string_array(&args[1])?, - ) - } else { - exec_err!("Unsupported key type: {:?}", args[1].data_type()) + ), + DataType::Utf8View => string_view_map_extract_inner( + map_array, + as_string_view_array(&map_array.keys())?, + as_string_view_array(&args[1])?, + ), + + _ => { + return exec_err!("Unsupported key type for map_extract"); + } } } @@ -175,3 +188,18 @@ fn string_map_extract_inner( None => Ok(new_null_array(map_array.value_type(), 1)), } } + +fn string_view_map_extract_inner( + map_array: &MapArray, + keys_array: &StringViewArray, + query_keys_array: &StringViewArray, +) -> Result { + let query_key = query_keys_array.value(0); + // key cannot be NULL, so we can unwrap + let index = keys_array.iter().position(|key| key.unwrap() == query_key); + + match index { + Some(idx) => Ok(map_array.values().slice(idx, 1)), + None => Ok(new_null_array(map_array.value_type(), 1)), + } +} diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 62cab79025f3e..bfa805a2a9763 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -517,8 +517,9 @@ select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, [1, 2] NULL [3] NULL # key in map and query key are different types -query IIII +query IIIII select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0), - map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0); + map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0), + map_extract(MAP {arrow_cast('1', 'Utf8View'): 1, arrow_cast('2', 'Utf8View'): 2, arrow_cast('3', 'Utf8View'):3}, '1'); ---- -1 1 1 NULL \ No newline at end of file +1 1 1 NULL 1 \ No newline at end of file From 71b01d7c8414a37ee9a7d0e562155e1ab14f0030 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 22:59:09 +0800 Subject: [PATCH 19/27] chore: Update doc --- datafusion/functions-nested/src/map_extract.rs | 2 +- docs/source/user-guide/sql/scalar_functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index cabe4719730c4..c4bed19df0be8 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -39,7 +39,7 @@ make_udf_expr_and_func!( MapExtract, map_extract, map key, - "Return a corresponding value from a map for a given key, or NULL if the key is not found.", + "Return corresponding values from a map for a given key, or NULL if the key is not found.", map_extract_udf ); diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 9f8acc54dc05f..c527d002aa669 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3703,7 +3703,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); ### `map_extract` -Return a corresponding value from a map for a given key, or NULL if the key is not found. +Return corresponding values from a map for a given key, or NULL if the key is not found. ``` SELECT map_extract(map, key); From 55bb3c77ad3e5ef88147210d6e2e9b31ca1225e5 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Wed, 14 Aug 2024 23:10:03 +0800 Subject: [PATCH 20/27] chore: Fic clippy --- datafusion/functions-nested/src/map_extract.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index c4bed19df0be8..242e520eb425a 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -154,7 +154,7 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { ), _ => { - return exec_err!("Unsupported key type for map_extract"); + exec_err!("Unsupported key type for map_extract") } } } From 5e978b41e8efcf04cc60339e5afcd32ddf5e2ba7 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 15 Aug 2024 17:45:48 +0800 Subject: [PATCH 21/27] refacotr: Use MutableArrayData --- .../functions-nested/src/map_extract.rs | 182 ++++++++++-------- datafusion/sqllogictest/test_files/map.slt | 53 ++++- 2 files changed, 150 insertions(+), 85 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 242e520eb425a..2c5faec8fef15 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -17,17 +17,13 @@ //! [`ScalarUDFImpl`] definitions for map_extract functions. -use arrow::array::ArrayRef; - -use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt32Type}; -use arrow_array::{ - new_null_array, Array, ArrowPrimitiveType, MapArray, PrimitiveArray, StringArray, - StringViewArray, -}; -use datafusion_common::cast::{ - as_primitive_array, as_string_array, as_string_view_array, -}; +use arrow::array::{ArrayRef, Capacities, MutableArrayData}; +use arrow_array::make_array; + +use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt64Type}; +use arrow_array::{Array, MapArray, PrimitiveArray, StringArray, StringViewArray}; use datafusion_common::utils::get_map_entry_field; +use datafusion_common::DataFusionError; use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; @@ -100,6 +96,92 @@ impl ScalarUDFImpl for MapExtract { } } +macro_rules! impl_map_extract_inner { + ($func_name:ident, $key_type:ty, $array_type:ty) => { + fn $func_name( + map_array: &MapArray, + query_keys_array: &dyn Array, + ) -> Result { + let keys_array = map_array + .keys() + .as_any() + .downcast_ref::<$array_type>() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to downcast keys array to {}", + stringify!($array_type) + )) + })?; + let query_keys_array = query_keys_array + .as_any() + .downcast_ref::<$array_type>() + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to downcast query keys array to {}", + stringify!($array_type) + )) + })?; + + let values = map_array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(original_data.len()); + + let mut offsets = vec![0_i32]; + + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], true, capacity); + + for (row_index, offset_window) in + map_array.value_offsets().windows(2).enumerate() + { + let start = offset_window[0] as usize; + let end = offset_window[1] as usize; + let len = end - start; + + let query_key = query_keys_array.value(row_index); + + let value_index = keys_array + .slice(start, len) + .iter() + .position(|key| key.unwrap() == query_key); + match value_index { + Some(index) => { + let new_index = start + index; + mutable.extend(0, new_index, new_index + 1); + offsets.push(offsets.last().unwrap().to_owned() + 1); + } + None => { + offsets.push(offsets.last().unwrap().to_owned()); + mutable.extend_nulls(1); + } + } + } + + let data = mutable.freeze(); + Ok(make_array(data)) + } + }; +} + +// Implement for different types +impl_map_extract_inner!( + generic_map_extract_inner_int, + i64, + PrimitiveArray +); +impl_map_extract_inner!( + generic_map_extract_inner_uint, + u64, + PrimitiveArray +); +impl_map_extract_inner!( + generic_map_extract_inner_float, + f64, + PrimitiveArray +); +impl_map_extract_inner!(string_map_extract_inner, &str, StringArray); +impl_map_extract_inner!(string_view_map_extract_inner, &str, StringViewArray); + fn map_extract_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { return exec_err!("map_extract expects two arguments"); @@ -122,84 +204,16 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { match key_type { DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) + generic_map_extract_inner_int(map_array, args[1].as_ref()) } DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) + generic_map_extract_inner_uint(map_array, args[1].as_ref()) } DataType::Float32 | DataType::Float64 => { - generic_map_extract_inner::( - map_array, - as_primitive_array::(map_array.keys())?, - as_primitive_array::(&args[1])?, - ) - } - DataType::Utf8 => string_map_extract_inner( - map_array, - as_string_array(&map_array.keys())?, - as_string_array(&args[1])?, - ), - DataType::Utf8View => string_view_map_extract_inner( - map_array, - as_string_view_array(&map_array.keys())?, - as_string_view_array(&args[1])?, - ), - - _ => { - exec_err!("Unsupported key type for map_extract") + generic_map_extract_inner_float(map_array, args[1].as_ref()) } - } -} - -fn generic_map_extract_inner( - map_array: &MapArray, - keys_array: &PrimitiveArray, - query_keys_array: &PrimitiveArray, -) -> Result { - let query_key = query_keys_array.value(0); - // key cannot be NULL, so we can unwrap - let index = keys_array.iter().position(|key| key.unwrap() == query_key); - - match index { - Some(idx) => Ok(map_array.values().slice(idx, 1)), - None => Ok(new_null_array(map_array.value_type(), 1)), - } -} - -fn string_map_extract_inner( - map_array: &MapArray, - keys_array: &StringArray, - query_keys_array: &StringArray, -) -> Result { - let query_key = query_keys_array.value(0); - // key cannot be NULL, so we can unwrap - let index = keys_array.iter().position(|key| key.unwrap() == query_key); - - match index { - Some(idx) => Ok(map_array.values().slice(idx, 1)), - None => Ok(new_null_array(map_array.value_type(), 1)), - } -} - -fn string_view_map_extract_inner( - map_array: &MapArray, - keys_array: &StringViewArray, - query_keys_array: &StringViewArray, -) -> Result { - let query_key = query_keys_array.value(0); - // key cannot be NULL, so we can unwrap - let index = keys_array.iter().position(|key| key.unwrap() == query_key); - - match index { - Some(idx) => Ok(map_array.values().slice(idx, 1)), - None => Ok(new_null_array(map_array.value_type(), 1)), + DataType::Utf8 => string_map_extract_inner(map_array, args[1].as_ref()), + DataType::Utf8View => string_view_map_extract_inner(map_array, args[1].as_ref()), + _ => exec_err!("Unsupported key type: {}", key_type), } } diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index bfa805a2a9763..69a01acbc64c9 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -15,6 +15,22 @@ # specific language governing permissions and limitations # under the License. +statement ok +CREATE TABLE map_array_table_1 +AS VALUES + (MAP {1: [1, NULL, 3], 2: [4, NULL, 6], 3: [7, 8, 9]}, 1, 1.0, '1'), + (MAP {4: [1, NULL, 3], 5: [4, NULL, 6], 6: [7, 8, 9]}, 5, 5.0, '5'), + (MAP {7: [1, NULL, 3], 8: [9, NULL, 6], 9: [7, 8, 9]}, 4, 4.0, '4') +; + +statement ok +CREATE TABLE map_array_table_2 +AS VALUES + (MAP {'1': [1, NULL, 3], '2': [4, NULL, 6], '3': [7, 8, 9]}, 1, 1.0, '1'), + (MAP {'4': [1, NULL, 3], '5': [4, NULL, 6], '6': [7, 8, 9]}, 5, 5.0, '5'), + (MAP {'7': [1, NULL, 3], '8': [9, NULL, 6], '9': [7, 8, 9]}, 4, 4.0, '4') +; + statement ok CREATE EXTERNAL TABLE data STORED AS PARQUET @@ -522,4 +538,39 @@ select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3 map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0), map_extract(MAP {arrow_cast('1', 'Utf8View'): 1, arrow_cast('2', 'Utf8View'): 2, arrow_cast('3', 'Utf8View'):3}, '1'); ---- -1 1 1 NULL 1 \ No newline at end of file +1 1 1 NULL 1 + +# map_extract with columns +query ??? +select map_extract(column1, 1), map_extract(column1, 4), map_extract(column1, 5) from map_array_table_1; +---- +[1, , 3] NULL NULL +NULL [1, , 3] [4, , 6] +NULL NULL NULL + +query ??? +select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_1; +---- +[1, , 3] [1, , 3] [1, , 3] +[4, , 6] [4, , 6] [4, , 6] +NULL NULL NULL + +query ??? +select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_2; +---- +[1, , 3] NULL [1, , 3] +[4, , 6] NULL [4, , 6] +NULL NULL NULL + +query ??? +select map_extract(column1, 1), map_extract(column1, 4), map_extract(column1, 5) from map_array_table_2; +---- +[1, , 3] NULL NULL +NULL [1, , 3] [4, , 6] +NULL NULL NULL + +statement ok +drop table map_array_table_1; + +statement ok +drop table map_array_table_2; \ No newline at end of file From bebc67d1809abe03023c3d467984435e140d1511 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 15 Aug 2024 17:55:14 +0800 Subject: [PATCH 22/27] chore --- datafusion/functions-nested/src/map_extract.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 2c5faec8fef15..0e7d9b9667ecb 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -163,7 +163,7 @@ macro_rules! impl_map_extract_inner { }; } -// Implement for different types +// Implementations for different key types impl_map_extract_inner!( generic_map_extract_inner_int, i64, From 2bb67bd131f9fba8e29ab23a1591992cc82b8528 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 15 Aug 2024 21:04:45 +0800 Subject: [PATCH 23/27] refactor: Avoid type conversion --- .../functions-nested/src/map_extract.rs | 136 +++++------------- 1 file changed, 37 insertions(+), 99 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 0e7d9b9667ecb..e4dfc29fa3bae 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -20,10 +20,10 @@ use arrow::array::{ArrayRef, Capacities, MutableArrayData}; use arrow_array::make_array; -use arrow::datatypes::{DataType, Float64Type, Int64Type, UInt64Type}; -use arrow_array::{Array, MapArray, PrimitiveArray, StringArray, StringViewArray}; +use arrow::datatypes::DataType; +use arrow_array::{Array, MapArray}; use datafusion_common::utils::get_map_entry_field; -use datafusion_common::DataFusionError; + use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; @@ -96,91 +96,42 @@ impl ScalarUDFImpl for MapExtract { } } -macro_rules! impl_map_extract_inner { - ($func_name:ident, $key_type:ty, $array_type:ty) => { - fn $func_name( - map_array: &MapArray, - query_keys_array: &dyn Array, - ) -> Result { - let keys_array = map_array - .keys() - .as_any() - .downcast_ref::<$array_type>() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Failed to downcast keys array to {}", - stringify!($array_type) - )) - })?; - let query_keys_array = query_keys_array - .as_any() - .downcast_ref::<$array_type>() - .ok_or_else(|| { - DataFusionError::Internal(format!( - "Failed to downcast query keys array to {}", - stringify!($array_type) - )) - })?; - - let values = map_array.values(); - let original_data = values.to_data(); - let capacity = Capacities::Array(original_data.len()); - - let mut offsets = vec![0_i32]; - - let mut mutable = - MutableArrayData::with_capacities(vec![&original_data], true, capacity); - - for (row_index, offset_window) in - map_array.value_offsets().windows(2).enumerate() - { - let start = offset_window[0] as usize; - let end = offset_window[1] as usize; - let len = end - start; - - let query_key = query_keys_array.value(row_index); - - let value_index = keys_array - .slice(start, len) - .iter() - .position(|key| key.unwrap() == query_key); - match value_index { - Some(index) => { - let new_index = start + index; - mutable.extend(0, new_index, new_index + 1); - offsets.push(offsets.last().unwrap().to_owned() + 1); - } - None => { - offsets.push(offsets.last().unwrap().to_owned()); - mutable.extend_nulls(1); - } - } - } +fn general_map_extract_inner( + map_array: &MapArray, + query_keys_array: &dyn Array, +) -> Result { + let keys = map_array.keys(); + + let values = map_array.values(); + let original_data = values.to_data(); + let capacity = Capacities::Array(original_data.len()); + + let mut mutable = + MutableArrayData::with_capacities(vec![&original_data], true, capacity); + + for (row_index, offset_window) in map_array.value_offsets().windows(2).enumerate() { + let start = offset_window[0] as usize; + let end = offset_window[1] as usize; + let len = end - start; - let data = mutable.freeze(); - Ok(make_array(data)) + let query_key = query_keys_array.slice(row_index, 1); + + let value_index = + (0..len).find(|&i| keys.slice(start + i, 1) == query_key.clone()); + + match value_index { + Some(index) => { + mutable.extend(0, start + index, start + index + 1); + } + None => { + mutable.extend_nulls(1); + } } - }; -} + } -// Implementations for different key types -impl_map_extract_inner!( - generic_map_extract_inner_int, - i64, - PrimitiveArray -); -impl_map_extract_inner!( - generic_map_extract_inner_uint, - u64, - PrimitiveArray -); -impl_map_extract_inner!( - generic_map_extract_inner_float, - f64, - PrimitiveArray -); -impl_map_extract_inner!(string_map_extract_inner, &str, StringArray); -impl_map_extract_inner!(string_view_map_extract_inner, &str, StringViewArray); + let data = mutable.freeze(); + Ok(make_array(data)) +} fn map_extract_inner(args: &[ArrayRef]) -> Result { if args.len() != 2 { @@ -202,18 +153,5 @@ fn map_extract_inner(args: &[ArrayRef]) -> Result { ); } - match key_type { - DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => { - generic_map_extract_inner_int(map_array, args[1].as_ref()) - } - DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { - generic_map_extract_inner_uint(map_array, args[1].as_ref()) - } - DataType::Float32 | DataType::Float64 => { - generic_map_extract_inner_float(map_array, args[1].as_ref()) - } - DataType::Utf8 => string_map_extract_inner(map_array, args[1].as_ref()), - DataType::Utf8View => string_view_map_extract_inner(map_array, args[1].as_ref()), - _ => exec_err!("Unsupported key type: {}", key_type), - } + general_map_extract_inner(map_array, &args[1]) } From 514ee771fe3036d7b4d63ab69c2f79cb8d8da276 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Thu, 15 Aug 2024 21:16:50 +0800 Subject: [PATCH 24/27] chore: Fix clippy --- datafusion/functions-nested/src/map_extract.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index e4dfc29fa3bae..d07789938537c 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -27,6 +27,7 @@ use datafusion_common::utils::get_map_entry_field; use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; +use std::sync::Arc; use crate::utils::make_scalar_function; @@ -116,8 +117,8 @@ fn general_map_extract_inner( let query_key = query_keys_array.slice(row_index, 1); - let value_index = - (0..len).find(|&i| keys.slice(start + i, 1) == query_key.clone()); + let value_index = (0..len) + .find(|&i| keys.slice(start + i, 1) == Arc::::clone(&query_key)); match value_index { Some(index) => { From c5b23ec5f9d66ae65863b00af80e0f8cb6aa2014 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 16 Aug 2024 10:39:49 +0800 Subject: [PATCH 25/27] chore: Follow DuckDB --- .../functions-nested/src/map_extract.rs | 23 ++++++++-- datafusion/sqllogictest/test_files/map.slt | 42 +++++++++---------- .../source/user-guide/sql/scalar_functions.md | 8 ++-- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index d07789938537c..f17ce2ae04bad 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -18,16 +18,19 @@ //! [`ScalarUDFImpl`] definitions for map_extract functions. use arrow::array::{ArrayRef, Capacities, MutableArrayData}; -use arrow_array::make_array; +use arrow_array::{make_array, ListArray}; use arrow::datatypes::DataType; use arrow_array::{Array, MapArray}; +use arrow_buffer::OffsetBuffer; +use arrow_schema::Field; use datafusion_common::utils::get_map_entry_field; use datafusion_common::{cast::as_map_array, exec_err, Result}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::Arc; +use std::vec; use crate::utils::make_scalar_function; @@ -36,7 +39,7 @@ make_udf_expr_and_func!( MapExtract, map_extract, map key, - "Return corresponding values from a map for a given key, or NULL if the key is not found.", + "Return a list containing the value for a given key or an empty list if the key is not contained in the map.", map_extract_udf ); @@ -73,7 +76,11 @@ impl ScalarUDFImpl for MapExtract { } let map_type = &arg_types[0]; let map_fields = get_map_entry_field(map_type)?; - Ok(map_fields[1].data_type().clone()) + Ok(DataType::List(Arc::new(Field::new( + "item", + map_fields.last().unwrap().data_type().clone(), + true, + )))) } fn invoke(&self, args: &[ColumnarValue]) -> Result { @@ -102,6 +109,7 @@ fn general_map_extract_inner( query_keys_array: &dyn Array, ) -> Result { let keys = map_array.keys(); + let mut offsets = vec![0_i32]; let values = map_array.values(); let original_data = values.to_data(); @@ -128,10 +136,17 @@ fn general_map_extract_inner( mutable.extend_nulls(1); } } + offsets.push(offsets[row_index] + 1); } let data = mutable.freeze(); - Ok(make_array(data)) + + Ok(Arc::new(ListArray::new( + Arc::new(Field::new("item", map_array.value_type().clone(), true)), + OffsetBuffer::::new(offsets.into()), + Arc::new(make_array(data)), + None, + ))) } fn map_extract_inner(args: &[ArrayRef]) -> Result { diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt index 69a01acbc64c9..b7a0a74913b06 100644 --- a/datafusion/sqllogictest/test_files/map.slt +++ b/datafusion/sqllogictest/test_files/map.slt @@ -512,62 +512,62 @@ select cardinality(map([1, 2, 3], ['a', 'b', 'c'])), cardinality(MAP {'a': 1, 'b # map_extract # key is string -query IIII +query ???? select map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'b'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'c'), map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'd'); ---- -1 NULL 3 NULL +[1] [] [3] [] # key is integer -query IIII +query ???? select map_extract(MAP {1: 1, 2: NULL, 3:3}, 1), map_extract(MAP {1: 1, 2: NULL, 3:3}, 2), map_extract(MAP {1: 1, 2: NULL, 3:3}, 3), map_extract(MAP {1: 1, 2: NULL, 3:3}, 4); ---- -1 NULL 3 NULL +[1] [] [3] [] # value is list query ???? select map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 1), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 2), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 3), map_extract(MAP {1: [1, 2], 2: NULL, 3:[3]}, 4); ---- -[1, 2] NULL [3] NULL +[[1, 2]] [] [[3]] [] # key in map and query key are different types -query IIIII +query ????? select map_extract(MAP {1: 1, 2: 2, 3:3}, '1'), map_extract(MAP {1: 1, 2: 2, 3:3}, 1.0), map_extract(MAP {1.0: 1, 2: 2, 3:3}, '1'), map_extract(MAP {'1': 1, '2': 2, '3':3}, 1.0), map_extract(MAP {arrow_cast('1', 'Utf8View'): 1, arrow_cast('2', 'Utf8View'): 2, arrow_cast('3', 'Utf8View'):3}, '1'); ---- -1 1 1 NULL 1 +[1] [1] [1] [] [1] # map_extract with columns query ??? -select map_extract(column1, 1), map_extract(column1, 4), map_extract(column1, 5) from map_array_table_1; +select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_1; ---- -[1, , 3] NULL NULL -NULL [1, , 3] [4, , 6] -NULL NULL NULL +[[1, , 3]] [] [] +[] [[4, , 6]] [] +[] [] [[1, , 3]] query ??? select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_1; ---- -[1, , 3] [1, , 3] [1, , 3] -[4, , 6] [4, , 6] [4, , 6] -NULL NULL NULL +[[1, , 3]] [[1, , 3]] [[1, , 3]] +[[4, , 6]] [[4, , 6]] [[4, , 6]] +[] [] [] query ??? select map_extract(column1, column2), map_extract(column1, column3), map_extract(column1, column4) from map_array_table_2; ---- -[1, , 3] NULL [1, , 3] -[4, , 6] NULL [4, , 6] -NULL NULL NULL +[[1, , 3]] [] [[1, , 3]] +[[4, , 6]] [] [[4, , 6]] +[] [] [] query ??? -select map_extract(column1, 1), map_extract(column1, 4), map_extract(column1, 5) from map_array_table_2; +select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7) from map_array_table_2; ---- -[1, , 3] NULL NULL -NULL [1, , 3] [4, , 6] -NULL NULL NULL +[[1, , 3]] [] [] +[] [[4, , 6]] [] +[] [] [[1, , 3]] statement ok drop table map_array_table_1; diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index c527d002aa669..c7b3409ba7cd2 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3703,12 +3703,10 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 33, 'PATCH', null); ### `map_extract` -Return corresponding values from a map for a given key, or NULL if the key is not found. +Return a list containing the value for a given key or an empty list if the key is not contained in the map. ``` -SELECT map_extract(map, key); ----- -1 +map_extract(map, key) ``` #### Arguments @@ -3724,7 +3722,7 @@ SELECT map_extract(map, key); ``` SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); ---- -1 +[1] ``` #### Aliases From b4ad47676a8691c46a763953b3b1384a7a785e58 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Fri, 16 Aug 2024 22:04:19 +0800 Subject: [PATCH 26/27] Update datafusion/functions-nested/src/map_extract.rs Co-authored-by: Jay Zhan --- datafusion/functions-nested/src/map_extract.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index f17ce2ae04bad..5b8992ad71133 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -126,7 +126,7 @@ fn general_map_extract_inner( let query_key = query_keys_array.slice(row_index, 1); let value_index = (0..len) - .find(|&i| keys.slice(start + i, 1) == Arc::::clone(&query_key)); + .find(|&i| keys.slice(start + i, 1).as_ref() == query_key.as_ref()); match value_index { Some(index) => { From f48b5663d030572b68e9d31f5f20aec89b80e800 Mon Sep 17 00:00:00 2001 From: Weijun-H Date: Fri, 16 Aug 2024 22:10:46 +0800 Subject: [PATCH 27/27] chore: Fix fmt --- datafusion/functions-nested/src/map_extract.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 5b8992ad71133..82f0d8d6c15e4 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -125,8 +125,8 @@ fn general_map_extract_inner( let query_key = query_keys_array.slice(row_index, 1); - let value_index = (0..len) - .find(|&i| keys.slice(start + i, 1).as_ref() == query_key.as_ref()); + let value_index = + (0..len).find(|&i| keys.slice(start + i, 1).as_ref() == query_key.as_ref()); match value_index { Some(index) => {