From 3a53e7025a3adaffd81ca08d207a2f9932e992fe Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Wed, 21 Jan 2026 09:53:47 +0530 Subject: [PATCH 1/2] perf: Optimize scalar fast path for iszero --- datafusion/functions/benches/iszero.rs | 45 ++++- datafusion/functions/src/math/iszero.rs | 257 ++++++++++++++++++------ 2 files changed, 245 insertions(+), 57 deletions(-) diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 53e38745afa92..93bb2e7512fd0 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; @@ -31,6 +32,8 @@ use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let iszero = iszero(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f32_array.len(); @@ -43,7 +46,6 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Boolean, true)); - let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("iszero f32 array: {size}"), |b| { b.iter(|| { @@ -60,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = f64_array.len(); let f64_args = vec![ColumnarValue::Array(f64_array)]; @@ -88,6 +91,46 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); } + + // Scalar benchmarks - run once since size doesn't affect scalar performance + let scalar_f32_args = vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(1.0)))]; + let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; + let return_field_scalar = Arc::new(Field::new("f", DataType::Boolean, false)); + + c.bench_function("iszero f32 scalar", |b| { + b.iter(|| { + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f32_args.clone(), + arg_fields: scalar_f32_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_scalar), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); + + let scalar_f64_args = vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.0)))]; + let scalar_f64_arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + + c.bench_function("iszero f64 scalar", |b| { + b.iter(|| { + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f64_args.clone(), + arg_fields: scalar_f64_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&return_field_scalar), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index ba4afc5622eb3..5d4f0bcf44348 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -18,12 +18,13 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, ArrowNativeTypeOp, AsArray, BooleanArray}; +use arrow::array::{ArrowNativeTypeOp, AsArray, BooleanArray}; use arrow::datatypes::DataType::{Boolean, Float16, Float32, Float64}; use arrow::datatypes::{DataType, Float16Type, Float32Type, Float64Type}; use datafusion_common::types::NativeType; -use datafusion_common::{Result, ScalarValue, exec_err}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{Coercion, TypeSignatureClass}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -31,8 +32,6 @@ use datafusion_expr::{ }; use datafusion_macros::user_doc; -use crate::utils::make_scalar_function; - #[user_doc( doc_section(label = "Math Functions"), description = "Returns true if a given number is +0.0 or -0.0 otherwise returns false.", @@ -90,11 +89,50 @@ impl ScalarUDFImpl for IsZeroFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - // Handle NULL input - if args.args[0].data_type().is_null() { - return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + let [arg] = take_function_args(self.name(), args.args)?; + + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None))); + } + + match scalar { + ScalarValue::Float64(Some(v)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0)))) + } + ScalarValue::Float32(Some(v)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0)))) + } + ScalarValue::Float16(Some(v)) => Ok(ColumnarValue::Scalar( + ScalarValue::Boolean(Some(v.is_zero())), + )), + _ => { + internal_err!( + "Unexpected scalar type for iszero: {:?}", + scalar.data_type() + ) + } + } + } + ColumnarValue::Array(array) => match array.data_type() { + Float64 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x == 0.0, + )))), + Float32 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x == 0.0, + )))), + Float16 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary( + array.as_primitive::(), + |x| x.is_zero(), + )))), + other => { + internal_err!("Unexpected data type {other:?} for function iszero") + } + }, } - make_scalar_function(iszero, vec![])(&args.args) } fn documentation(&self) -> Option<&Documentation> { @@ -102,67 +140,174 @@ impl ScalarUDFImpl for IsZeroFunc { } } -/// Iszero SQL function -fn iszero(args: &[ArrayRef]) -> Result { - match args[0].data_type() { - Float64 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x == 0.0, - )) as ArrayRef), - - Float32 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x == 0.0, - )) as ArrayRef), - - Float16 => Ok(Arc::new(BooleanArray::from_unary( - args[0].as_primitive::(), - |x| x.is_zero(), - )) as ArrayRef), - - other => exec_err!("Unsupported data type {other:?} for function iszero"), - } -} - #[cfg(test)] mod test { use std::sync::Arc; use arrow::array::{ArrayRef, Float32Array, Float64Array}; - + use arrow::datatypes::{DataType, Field}; + use datafusion_common::ScalarValue; use datafusion_common::cast::as_boolean_array; + use datafusion_common::config::ConfigOptions; + use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; - use crate::math::iszero::iszero; + use crate::math::iszero::IsZeroFunc; #[test] fn test_iszero_f64() { - let args: Vec = - vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); + let array = Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0])); + let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], + arg_fields, + number_rows: array.len(), + return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("failed to initialize function iszero"); + + match result { + ColumnarValue::Array(arr) => { + let booleans = + as_boolean_array(&arr).expect("failed to convert to BooleanArray"); + assert_eq!(booleans.len(), 4); + assert!(!booleans.value(0)); + assert!(booleans.value(1)); + assert!(!booleans.value(2)); + assert!(booleans.value(3)); + } + ColumnarValue::Scalar(_) => panic!("Expected an array value"), + } } #[test] fn test_iszero_f32() { - let args: Vec = - vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))]; - - let result = iszero(&args).expect("failed to initialize function iszero"); - let booleans = - as_boolean_array(&result).expect("failed to initialize function iszero"); - - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); + let array = Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0])); + let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], + arg_fields, + number_rows: array.len(), + return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("failed to initialize function iszero"); + + match result { + ColumnarValue::Array(arr) => { + let booleans = + as_boolean_array(&arr).expect("failed to convert to BooleanArray"); + assert_eq!(booleans.len(), 4); + assert!(!booleans.value(0)); + assert!(booleans.value(1)); + assert!(!booleans.value(2)); + assert!(booleans.value(3)); + } + ColumnarValue::Scalar(_) => panic!("Expected an array value"), + } + } + + #[test] + fn test_iszero_scalar_f64_zero() { + let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(0.0)))], + arg_fields, + number_rows: 1, + return_field: Field::new("f", DataType::Boolean, false).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("iszero scalar zero should succeed"); + + match result { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), + _ => panic!("Expected Boolean scalar"), + } + } + + #[test] + fn test_iszero_scalar_f64_neg_zero() { + let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(-0.0)))], + arg_fields, + number_rows: 1, + return_field: Field::new("f", DataType::Boolean, false).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("iszero scalar -0.0 should succeed"); + + match result { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), + _ => panic!("Expected Boolean scalar"), + } + } + + #[test] + fn test_iszero_scalar_f64_non_zero() { + let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.5)))], + arg_fields, + number_rows: 1, + return_field: Field::new("f", DataType::Boolean, false).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("iszero scalar non-zero should succeed"); + + match result { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(!v), + _ => panic!("Expected Boolean scalar"), + } + } + + #[test] + fn test_iszero_scalar_null() { + let arg_fields = vec![Field::new("a", DataType::Float64, true).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float64(None))], + arg_fields, + number_rows: 1, + return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("iszero null should succeed"); + + match result { + ColumnarValue::Scalar(scalar) => assert!(scalar.is_null()), + _ => panic!("Expected scalar result"), + } + } + + #[test] + fn test_iszero_scalar_f32() { + let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(0.0)))], + arg_fields, + number_rows: 1, + return_field: Field::new("f", DataType::Boolean, false).into(), + config_options: Arc::new(ConfigOptions::default()), + }; + let result = IsZeroFunc::new() + .invoke_with_args(args) + .expect("iszero scalar f32 should succeed"); + + match result { + ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), + _ => panic!("Expected Boolean scalar"), + } } } From 9845ddc9eba97e60aab2a120b8f4e85071ffbb08 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Wed, 21 Jan 2026 17:36:59 +0530 Subject: [PATCH 2/2] remove unit tests --- datafusion/functions/src/math/iszero.rs | 172 ------------------------ 1 file changed, 172 deletions(-) diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index 5d4f0bcf44348..dd1b082cf0f37 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -139,175 +139,3 @@ impl ScalarUDFImpl for IsZeroFunc { self.doc() } } - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use arrow::array::{ArrayRef, Float32Array, Float64Array}; - use arrow::datatypes::{DataType, Field}; - use datafusion_common::ScalarValue; - use datafusion_common::cast::as_boolean_array; - use datafusion_common::config::ConfigOptions; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; - - use crate::math::iszero::IsZeroFunc; - - #[test] - fn test_iszero_f64() { - let array = Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0])); - let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], - arg_fields, - number_rows: array.len(), - return_field: Field::new("f", DataType::Boolean, true).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("failed to initialize function iszero"); - - match result { - ColumnarValue::Array(arr) => { - let booleans = - as_boolean_array(&arr).expect("failed to convert to BooleanArray"); - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } - ColumnarValue::Scalar(_) => panic!("Expected an array value"), - } - } - - #[test] - fn test_iszero_f32() { - let array = Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0])); - let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Array(Arc::clone(&array) as ArrayRef)], - arg_fields, - number_rows: array.len(), - return_field: Field::new("f", DataType::Boolean, true).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("failed to initialize function iszero"); - - match result { - ColumnarValue::Array(arr) => { - let booleans = - as_boolean_array(&arr).expect("failed to convert to BooleanArray"); - assert_eq!(booleans.len(), 4); - assert!(!booleans.value(0)); - assert!(booleans.value(1)); - assert!(!booleans.value(2)); - assert!(booleans.value(3)); - } - ColumnarValue::Scalar(_) => panic!("Expected an array value"), - } - } - - #[test] - fn test_iszero_scalar_f64_zero() { - let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(0.0)))], - arg_fields, - number_rows: 1, - return_field: Field::new("f", DataType::Boolean, false).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("iszero scalar zero should succeed"); - - match result { - ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), - _ => panic!("Expected Boolean scalar"), - } - } - - #[test] - fn test_iszero_scalar_f64_neg_zero() { - let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(-0.0)))], - arg_fields, - number_rows: 1, - return_field: Field::new("f", DataType::Boolean, false).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("iszero scalar -0.0 should succeed"); - - match result { - ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), - _ => panic!("Expected Boolean scalar"), - } - } - - #[test] - fn test_iszero_scalar_f64_non_zero() { - let arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.5)))], - arg_fields, - number_rows: 1, - return_field: Field::new("f", DataType::Boolean, false).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("iszero scalar non-zero should succeed"); - - match result { - ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(!v), - _ => panic!("Expected Boolean scalar"), - } - } - - #[test] - fn test_iszero_scalar_null() { - let arg_fields = vec![Field::new("a", DataType::Float64, true).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float64(None))], - arg_fields, - number_rows: 1, - return_field: Field::new("f", DataType::Boolean, true).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("iszero null should succeed"); - - match result { - ColumnarValue::Scalar(scalar) => assert!(scalar.is_null()), - _ => panic!("Expected scalar result"), - } - } - - #[test] - fn test_iszero_scalar_f32() { - let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; - let args = ScalarFunctionArgs { - args: vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(0.0)))], - arg_fields, - number_rows: 1, - return_field: Field::new("f", DataType::Boolean, false).into(), - config_options: Arc::new(ConfigOptions::default()), - }; - let result = IsZeroFunc::new() - .invoke_with_args(args) - .expect("iszero scalar f32 should succeed"); - - match result { - ColumnarValue::Scalar(ScalarValue::Boolean(Some(v))) => assert!(v), - _ => panic!("Expected Boolean scalar"), - } - } -}