diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 53e38745afa9..93bb2e7512fd 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; @@ -31,6 +32,8 @@ use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f32_array.len(); @@ -43,7 +46,6 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Boolean, true)); - let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("iszero f32 array: {size}"), |b| { b.iter(|| { @@ -60,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; @@ -88,6 +91,46 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); } + + // Scalar benchmarks - run once since size doesn't affect scalar performance + let scalar_f32_args = vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(1.0)))]; + let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; + let return_field_scalar = Arc::new(Field::new("f", DataType::Boolean, false)); + + c.bench_function("iszero f32 scalar", |b| { + b.iter(|| { + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f32_args.clone(), + arg_fields: scalar_f32_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_scalar), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); + + let scalar_f64_args = vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.0)))]; + let scalar_f64_arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + + c.bench_function("iszero f64 scalar", |b| { + b.iter(|| { + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f64_args.clone(), + arg_fields: scalar_f64_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_scalar), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index ba4afc5622eb..dd1b082cf0f3 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -18,12 +18,13 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, ArrowNativeTypeOp, AsArray, BooleanArray}; +use arrow::array::{ArrowNativeTypeOp, AsArray, BooleanArray}; use arrow::datatypes::DataType::{Boolean, Float16, Float32, Float64}; use arrow::datatypes::{DataType, Float16Type, Float32Type, Float64Type}; use datafusion_common::types::NativeType; -use datafusion_common::{Result, ScalarValue, exec_err}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{Coercion, TypeSignatureClass}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -31,8 +32,6 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use crate::utils::make_scalar_function; - #[user_doc( doc_section(label = "Math Functions"), description = "Returns true if a given number is +0.0 or -0.0 otherwise returns false.", @@ -90,79 +89,53 @@ impl ScalarUDFImpl for IsZeroFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - // Handle NULL input - if args.args[0].data_type().is_null() { - return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + let [arg] = take_function_args(self.name(), args.args)?; + + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + } + + match scalar { + ScalarValue::Float64(Some(v)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0)))) + } + ScalarValue::Float32(Some(v)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0)))) + } + ScalarValue::Float16(Some(v)) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(v.is_zero())), + )), + _ => { + internal_err!( + "Unexpected scalar type for iszero: {:?}", + scalar.data_type() + ) + } + } + } + ColumnarValue::Array(array) => match array.data_type() { + Float64 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x == 0.0, + )))), + Float32 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x == 0.0, + )))), + Float16 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x.is_zero(), + )))), + other => { + internal_err!("Unexpected data type {other:?} for function iszero") + } + }, } - make_scalar_function(iszero, vec![])(&args.args) } fn documentation(&self) -> Option<&Documentation> { self.doc() } } - -/// Iszero SQL function -fn iszero(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - Float64 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x == 0.0, - )) as ArrayRef), - - Float32 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x == 0.0, - )) as ArrayRef), - - Float16 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x.is_zero(), - )) as ArrayRef), - - other => exec_err!("Unsupported data type {other:?} for function iszero"), - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use arrow::array::{ArrayRef, Float32Array, Float64Array}; - - use datafusion_common::cast::as_boolean_array; - - use crate::math::iszero::iszero; - - #[test] - fn test_iszero_f64() { - let args: Vec = - vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } - - #[test] - fn test_iszero_f32() { - let args: Vec = - vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } -}