From d563551a4c494acdabb1030f3a347db03a9b4d29 Mon Sep 17 00:00:00 2001 From: Subhan Date: Fri, 8 Nov 2024 16:09:47 +0000 Subject: [PATCH 1/5] implement get_possible_types for Uniform, Coercible, Variadic, Numeric and String --- datafusion/expr-common/src/signature.rs | 127 ++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 8 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index be69d3b809c09..e3d374d066b02 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -18,8 +18,10 @@ //! Signature module contains foundational types that are used to represent signatures, types, //! and return types of functions in DataFusion. +use crate::type_coercion::aggregates::{NUMERICS, STRINGS}; use arrow::datatypes::DataType; -use datafusion_common::types::LogicalTypeRef; +use datafusion_common::types::{LogicalTypeRef, NativeType}; +use itertools::Itertools; /// Constant that is used as a placeholder for any valid timezone. /// This is used where a function can accept a timestamp type with any @@ -258,20 +260,65 @@ impl TypeSignature { .iter() .flat_map(|type_sig| type_sig.get_possible_types()) .collect(), + TypeSignature::Uniform(arg_count, types) => types + .iter() + .map(|data_type| vec![data_type.clone(); *arg_count]) + .collect(), + TypeSignature::Coercible(types) => types + .iter() + .map(|logical_type| get_data_types(logical_type.native())) + .multi_cartesian_product() + .collect(), + TypeSignature::Variadic(types) => types + .iter() + .cloned() + .map(|data_type| vec![data_type; types.len()]) + .collect(), + TypeSignature::Numeric(arg_count) => NUMERICS + .iter() + .map(|numeric_type| vec![numeric_type.clone(); *arg_count]) + .collect(), + TypeSignature::String(arg_count) => STRINGS + .iter() + .map(|string_type| vec![string_type.clone(); *arg_count]) + .collect(), // TODO: Implement for other types - TypeSignature::Uniform(_, _) - | TypeSignature::Coercible(_) - | TypeSignature::Any(_) - | TypeSignature::Variadic(_) + TypeSignature::Any(_) | TypeSignature::VariadicAny - | TypeSignature::UserDefined | TypeSignature::ArraySignature(_) - | TypeSignature::Numeric(_) - | TypeSignature::String(_) => vec![], + | TypeSignature::UserDefined => vec![], } } } +fn get_data_types(native_type: &NativeType) -> Vec { + match native_type { + NativeType::Null => vec![DataType::Null], + NativeType::Boolean => vec![DataType::Boolean], + NativeType::Int8 => vec![DataType::Int8], + NativeType::Int16 => vec![DataType::Int16], + NativeType::Int32 => vec![DataType::Int32], + NativeType::Int64 => vec![DataType::Int64], + NativeType::UInt8 => vec![DataType::UInt8], + NativeType::UInt16 => vec![DataType::UInt16], + NativeType::UInt32 => vec![DataType::UInt32], + NativeType::UInt64 => vec![DataType::UInt64], + NativeType::Float16 => vec![DataType::Float16], + NativeType::Float32 => vec![DataType::Float32], + NativeType::Float64 => vec![DataType::Float64], + NativeType::Date => vec![DataType::Date32, DataType::Date64], + NativeType::Binary => vec![ + DataType::Binary, + DataType::LargeBinary, + DataType::BinaryView, + ], + NativeType::String => { + vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] + } + _ => unreachable!(), + } +} + /// Defines the supported argument types ([`TypeSignature`]) and [`Volatility`] for a function. /// /// DataFusion will automatically coerce (cast) argument types to one of the supported @@ -417,6 +464,8 @@ impl Signature { #[cfg(test)] mod tests { + use datafusion_common::types::{logical_int64, logical_string}; + use super::*; #[test] @@ -515,5 +564,67 @@ mod tests { vec![DataType::Utf8] ] ); + + let type_signature = + TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Int64]); + let possible_types = type_signature.get_possible_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Float32, DataType::Float32], + vec![DataType::Int64, DataType::Int64] + ] + ); + + let type_signature = + TypeSignature::Coercible(vec![logical_string(), logical_int64()]); + let possible_types = type_signature.get_possible_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Utf8, DataType::Int64], + vec![DataType::LargeUtf8, DataType::Int64], + vec![DataType::Utf8View, DataType::Int64] + ] + ); + + let type_signature = + TypeSignature::Variadic(vec![DataType::Int32, DataType::Int64]); + let possible_types = type_signature.get_possible_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Int32, DataType::Int32], + vec![DataType::Int64, DataType::Int64] + ] + ); + + let type_signature = TypeSignature::Numeric(2); + let possible_types = type_signature.get_possible_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Int8, DataType::Int8], + vec![DataType::Int16, DataType::Int16], + vec![DataType::Int32, DataType::Int32], + vec![DataType::Int64, DataType::Int64], + vec![DataType::UInt8, DataType::UInt8], + vec![DataType::UInt16, DataType::UInt16], + vec![DataType::UInt32, DataType::UInt32], + vec![DataType::UInt64, DataType::UInt64], + vec![DataType::Float32, DataType::Float32], + vec![DataType::Float64, DataType::Float64] + ] + ); + + let type_signature = TypeSignature::String(2); + let possible_types = type_signature.get_possible_types(); + assert_eq!( + possible_types, + vec![ + vec![DataType::Utf8, DataType::Utf8], + vec![DataType::LargeUtf8, DataType::LargeUtf8] + ] + ); } } From 040492cedbf08737a106e8cb3ae166132d319276 Mon Sep 17 00:00:00 2001 From: Subhan Date: Sat, 9 Nov 2024 15:02:35 +0000 Subject: [PATCH 2/5] fix possible types for variadic --- datafusion/expr-common/src/signature.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index e3d374d066b02..8fc45a69e33fb 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -272,7 +272,7 @@ impl TypeSignature { TypeSignature::Variadic(types) => types .iter() .cloned() - .map(|data_type| vec![data_type; types.len()]) + .map(|data_type| vec![data_type]) .collect(), TypeSignature::Numeric(arg_count) => NUMERICS .iter() @@ -593,10 +593,7 @@ mod tests { let possible_types = type_signature.get_possible_types(); assert_eq!( possible_types, - vec![ - vec![DataType::Int32, DataType::Int32], - vec![DataType::Int64, DataType::Int64] - ] + vec![vec![DataType::Int32], vec![DataType::Int64]] ); let type_signature = TypeSignature::Numeric(2); From f2fbc1fe3245c666b46a6f02c17e5ba865072475 Mon Sep 17 00:00:00 2001 From: Subhan Date: Sat, 9 Nov 2024 15:06:13 +0000 Subject: [PATCH 3/5] use cloned --- datafusion/expr-common/src/signature.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 8fc45a69e33fb..e55093c701d0f 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -262,7 +262,8 @@ impl TypeSignature { .collect(), TypeSignature::Uniform(arg_count, types) => types .iter() - .map(|data_type| vec![data_type.clone(); *arg_count]) + .cloned() + .map(|data_type| vec![data_type; *arg_count]) .collect(), TypeSignature::Coercible(types) => types .iter() @@ -276,11 +277,13 @@ impl TypeSignature { .collect(), TypeSignature::Numeric(arg_count) => NUMERICS .iter() - .map(|numeric_type| vec![numeric_type.clone(); *arg_count]) + .cloned() + .map(|numeric_type| vec![numeric_type; *arg_count]) .collect(), TypeSignature::String(arg_count) => STRINGS .iter() - .map(|string_type| vec![string_type.clone(); *arg_count]) + .cloned() + .map(|string_type| vec![string_type; *arg_count]) .collect(), // TODO: Implement for other types TypeSignature::Any(_) From a1b376f197ff8b5d6bc06d1b5fddedc30cc3af84 Mon Sep 17 00:00:00 2001 From: Subhan Date: Sat, 9 Nov 2024 15:22:31 +0000 Subject: [PATCH 4/5] add Utf8View to STRINGS --- datafusion/expr-common/src/signature.rs | 3 ++- datafusion/expr-common/src/type_coercion/aggregates.rs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index e55093c701d0f..5d1406967e1c9 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -623,7 +623,8 @@ mod tests { possible_types, vec![ vec![DataType::Utf8, DataType::Utf8], - vec![DataType::LargeUtf8, DataType::LargeUtf8] + vec![DataType::LargeUtf8, DataType::LargeUtf8], + vec![DataType::Utf8View, DataType::Utf8View] ] ); } diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs index fee75f9e45959..384d688cc27ed 100644 --- a/datafusion/expr-common/src/type_coercion/aggregates.rs +++ b/datafusion/expr-common/src/type_coercion/aggregates.rs @@ -23,7 +23,8 @@ use arrow::datatypes::{ use datafusion_common::{internal_err, plan_err, Result}; -pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8]; +pub static STRINGS: &[DataType] = + &[DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View]; pub static SIGNED_INTEGERS: &[DataType] = &[ DataType::Int8, From b78db507085e8fff1a66e4610eca687e308cbc51 Mon Sep 17 00:00:00 2001 From: delamarch3 <68732277+delamarch3@users.noreply.github.com> Date: Sat, 9 Nov 2024 15:23:16 +0000 Subject: [PATCH 5/5] add todo to support other native types Co-authored-by: Jax Liu --- datafusion/expr-common/src/signature.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 5d1406967e1c9..3846fae5de5dc 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -318,7 +318,8 @@ fn get_data_types(native_type: &NativeType) -> Vec { NativeType::String => { vec![DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View] } - _ => unreachable!(), + // TODO: support other native types + _ => vec![], } }