From 1888c99f81609e4829de606c54b90f3ed8ede43b Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 13 Jan 2026 17:39:17 +0530 Subject: [PATCH 1/5] perf: Optimize trunc scalar performance --- datafusion/functions/benches/trunc.rs | 46 +++++++++++++++++++++++++ datafusion/functions/src/math/trunc.rs | 47 +++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index d0a6e2be75e0..d19bf41aa05f 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -74,6 +74,52 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); } + + // Scalar benchmarks - to measure optimized performance + let scalar_f64_args = vec![ColumnarValue::Scalar( + datafusion_common::ScalarValue::Float64(Some(3.14159265)), + )]; + let scalar_arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; + let scalar_return_field = Field::new("f", DataType::Float64, false).into(); + let config_options = Arc::new(ConfigOptions::default()); + + c.bench_function("trunc f64 scalar", |b| { + b.iter(|| { + black_box( + trunc + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f64_args.clone(), + arg_fields: scalar_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&scalar_return_field), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); + + let scalar_f32_args = vec![ColumnarValue::Scalar( + datafusion_common::ScalarValue::Float32(Some(3.14159)), + )]; + let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; + let scalar_f32_return_field = Field::new("f", DataType::Float32, false).into(); + + c.bench_function("trunc f32 scalar", |b| { + b.iter(|| { + black_box( + trunc + .invoke_with_args(ScalarFunctionArgs { + args: scalar_f32_args.clone(), + arg_fields: scalar_f32_arg_fields.clone(), + number_rows: 1, + return_field: Arc::clone(&scalar_f32_return_field), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 6727ba8fbdf0..2358d1919051 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -24,7 +24,7 @@ use arrow::array::{ArrayRef, AsArray, PrimitiveArray}; use arrow::datatypes::DataType::{Float32, Float64}; use arrow::datatypes::{DataType, Float32Type, Float64Type, Int64Type}; use datafusion_common::ScalarValue::Int64; -use datafusion_common::{Result, exec_err}; +use datafusion_common::{Result, ScalarValue, exec_err}; use datafusion_expr::TypeSignature::Exact; use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion_expr::{ @@ -110,6 +110,51 @@ impl ScalarUDFImpl for TruncFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let value = &args.args[0]; + + // Scalar fast path for float types with scalar or default precision + if let ColumnarValue::Scalar(scalar) = value { + // Get precision: default 0 or from second scalar arg + let precision = if args.args.len() >= 2 { + match &args.args[1] { + ColumnarValue::Scalar(Int64(Some(p))) => *p, + ColumnarValue::Scalar(Int64(None)) => { + return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))); + } + _ => { + // Precision is an array - fall through to array path + return make_scalar_function(trunc, vec![])(&args.args); + } + } + } else { + 0 // default precision + }; + + match scalar { + ScalarValue::Float64(v) => { + let result = v.map(|x| { + if precision == 0 { + if x == 0.0 { 0.0 } else { x.trunc() } + } else { + compute_truncate64(x, precision) + } + }); + return Ok(ColumnarValue::Scalar(ScalarValue::Float64(result))); + } + ScalarValue::Float32(v) => { + let result = v.map(|x| { + if precision == 0 { + if x == 0.0 { 0.0 } else { x.trunc() } + } else { + compute_truncate32(x, precision) + } + }); + return Ok(ColumnarValue::Scalar(ScalarValue::Float32(result))); + } + _ => {} + } + } + make_scalar_function(trunc, vec![])(&args.args) } From 8db0ed7fb36e23a86ef8f6b8192e5f7fceeae851 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Tue, 13 Jan 2026 21:09:30 +0530 Subject: [PATCH 2/5] fix clippy and fix return type --- datafusion/functions/benches/trunc.rs | 8 ++++---- datafusion/functions/src/math/trunc.rs | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index d19bf41aa05f..d88d56a94ed7 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -32,12 +32,13 @@ use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let trunc = trunc(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let f32_array = Arc::new(create_primitive_array::(size, 0.2)); let f32_args = vec![ColumnarValue::Array(f32_array)]; let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; let return_field = Field::new("f", DataType::Float32, true).into(); - let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("trunc f32 array: {size}"), |b| { b.iter(|| { @@ -77,11 +78,10 @@ fn criterion_benchmark(c: &mut Criterion) { // Scalar benchmarks - to measure optimized performance let scalar_f64_args = vec![ColumnarValue::Scalar( - datafusion_common::ScalarValue::Float64(Some(3.14159265)), + datafusion_common::ScalarValue::Float64(Some(std::f64::consts::PI)), )]; let scalar_arg_fields = vec![Field::new("a", DataType::Float64, false).into()]; let scalar_return_field = Field::new("f", DataType::Float64, false).into(); - let config_options = Arc::new(ConfigOptions::default()); c.bench_function("trunc f64 scalar", |b| { b.iter(|| { @@ -100,7 +100,7 @@ fn criterion_benchmark(c: &mut Criterion) { }); let scalar_f32_args = vec![ColumnarValue::Scalar( - datafusion_common::ScalarValue::Float32(Some(3.14159)), + datafusion_common::ScalarValue::Float32(Some(std::f32::consts::PI)), )]; let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; let scalar_f32_return_field = Field::new("f", DataType::Float32, false).into(); diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 2358d1919051..44fd3f353ed3 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -119,7 +119,13 @@ impl ScalarUDFImpl for TruncFunc { match &args.args[1] { ColumnarValue::Scalar(Int64(Some(p))) => *p, ColumnarValue::Scalar(Int64(None)) => { - return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))); + // Return null with the same type as the input + return match scalar { + ScalarValue::Float32(_) => { + Ok(ColumnarValue::Scalar(ScalarValue::Float32(None))) + } + _ => Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))), + }; } _ => { // Precision is an array - fall through to array path @@ -134,7 +140,7 @@ impl ScalarUDFImpl for TruncFunc { ScalarValue::Float64(v) => { let result = v.map(|x| { if precision == 0 { - if x == 0.0 { 0.0 } else { x.trunc() } + x.trunc() } else { compute_truncate64(x, precision) } @@ -144,13 +150,16 @@ impl ScalarUDFImpl for TruncFunc { ScalarValue::Float32(v) => { let result = v.map(|x| { if precision == 0 { - if x == 0.0 { 0.0 } else { x.trunc() } + x.trunc() } else { compute_truncate32(x, precision) } }); return Ok(ColumnarValue::Scalar(ScalarValue::Float32(result))); } + ScalarValue::Null => { + return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))); + } _ => {} } } From a2fd8b8a8d2f1cbd97bcb24718eab4f21198af5d Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Wed, 14 Jan 2026 20:27:07 +0530 Subject: [PATCH 3/5] refactor to flatten nested structure --- datafusion/functions/src/math/trunc.rs | 98 ++++++++++++-------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 44fd3f353ed3..d70bf7b34b0d 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -110,61 +110,57 @@ impl ScalarUDFImpl for TruncFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - let value = &args.args[0]; - - // Scalar fast path for float types with scalar or default precision - if let ColumnarValue::Scalar(scalar) = value { - // Get precision: default 0 or from second scalar arg - let precision = if args.args.len() >= 2 { - match &args.args[1] { - ColumnarValue::Scalar(Int64(Some(p))) => *p, - ColumnarValue::Scalar(Int64(None)) => { - // Return null with the same type as the input - return match scalar { - ScalarValue::Float32(_) => { - Ok(ColumnarValue::Scalar(ScalarValue::Float32(None))) - } - _ => Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))), - }; + // Extract precision from second argument (default 0) + let precision = match args.args.get(1) { + Some(ColumnarValue::Scalar(Int64(Some(p)))) => Some(*p), + Some(ColumnarValue::Scalar(Int64(None))) => None, // null precision + Some(ColumnarValue::Array(_)) => { + // Precision is an array - use array path + return make_scalar_function(trunc, vec![])(&args.args); + } + None => Some(0), // default precision + _ => Some(0), + }; + + // Scalar fast path using tuple matching for (value, precision) + match (&args.args[0], precision) { + // Null precision returns null with same type as input + (ColumnarValue::Scalar(ScalarValue::Float32(_)), None) => { + Ok(ColumnarValue::Scalar(ScalarValue::Float32(None))) + } + (ColumnarValue::Scalar(ScalarValue::Float64(_)), None) + | (ColumnarValue::Scalar(ScalarValue::Null), None) => { + Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))) + } + // Float64 scalar with precision + (ColumnarValue::Scalar(ScalarValue::Float64(v)), Some(p)) => { + let result = v.map(|x| { + if p == 0 { + x.trunc() + } else { + compute_truncate64(x, p) } - _ => { - // Precision is an array - fall through to array path - return make_scalar_function(trunc, vec![])(&args.args); + }); + Ok(ColumnarValue::Scalar(ScalarValue::Float64(result))) + } + // Float32 scalar with precision + (ColumnarValue::Scalar(ScalarValue::Float32(v)), Some(p)) => { + let result = v.map(|x| { + if p == 0 { + x.trunc() + } else { + compute_truncate32(x, p) } - } - } else { - 0 // default precision - }; - - match scalar { - ScalarValue::Float64(v) => { - let result = v.map(|x| { - if precision == 0 { - x.trunc() - } else { - compute_truncate64(x, precision) - } - }); - return Ok(ColumnarValue::Scalar(ScalarValue::Float64(result))); - } - ScalarValue::Float32(v) => { - let result = v.map(|x| { - if precision == 0 { - x.trunc() - } else { - compute_truncate32(x, precision) - } - }); - return Ok(ColumnarValue::Scalar(ScalarValue::Float32(result))); - } - ScalarValue::Null => { - return Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))); - } - _ => {} + }); + Ok(ColumnarValue::Scalar(ScalarValue::Float32(result))) + } + // Null scalar + (ColumnarValue::Scalar(ScalarValue::Null), Some(_)) => { + Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))) } + // Array path for everything else + _ => make_scalar_function(trunc, vec![])(&args.args), } - - make_scalar_function(trunc, vec![])(&args.args) } fn output_ordering(&self, input: &[ExprProperties]) -> Result { From 0180eb64a23a476a3ff179ff884bb0ad3d1f9da9 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Wed, 14 Jan 2026 22:29:01 +0530 Subject: [PATCH 4/5] suggestion from Jeffrey Co-authored-by: Jeffrey Vo --- datafusion/functions/src/math/trunc.rs | 52 ++++++++++---------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index d70bf7b34b0d..90d8b32d2091 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -124,40 +124,28 @@ impl ScalarUDFImpl for TruncFunc { // Scalar fast path using tuple matching for (value, precision) match (&args.args[0], precision) { - // Null precision returns null with same type as input - (ColumnarValue::Scalar(ScalarValue::Float32(_)), None) => { - Ok(ColumnarValue::Scalar(ScalarValue::Float32(None))) + // Null cases + (ColumnarValue::Scalar(sv), _) if sv.is_null() => { + ColumnarValue::Scalar(ScalarValue::Null).cast_to(args.return_type(), None) } - (ColumnarValue::Scalar(ScalarValue::Float64(_)), None) - | (ColumnarValue::Scalar(ScalarValue::Null), None) => { - Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))) - } - // Float64 scalar with precision - (ColumnarValue::Scalar(ScalarValue::Float64(v)), Some(p)) => { - let result = v.map(|x| { - if p == 0 { - x.trunc() - } else { - compute_truncate64(x, p) - } - }); - Ok(ColumnarValue::Scalar(ScalarValue::Float64(result))) - } - // Float32 scalar with precision - (ColumnarValue::Scalar(ScalarValue::Float32(v)), Some(p)) => { - let result = v.map(|x| { - if p == 0 { - x.trunc() - } else { - compute_truncate32(x, p) - } - }); - Ok(ColumnarValue::Scalar(ScalarValue::Float32(result))) - } - // Null scalar - (ColumnarValue::Scalar(ScalarValue::Null), Some(_)) => { - Ok(ColumnarValue::Scalar(ScalarValue::Float64(None))) + (_, None) => { + ColumnarValue::Scalar(ScalarValue::Null).cast_to(args.return_type(), None) } + // Scalar cases + (ColumnarValue::Scalar(ScalarValue::Float64(Some(v))), Some(p)) => Ok( + ColumnarValue::Scalar(ScalarValue::Float64(Some(if p == 0 { + v.trunc() + } else { + compute_truncate64(*v, p) + }))), + ), + (ColumnarValue::Scalar(ScalarValue::Float32(Some(v))), Some(p)) => Ok( + ColumnarValue::Scalar(ScalarValue::Float32(Some(if p == 0 { + v.trunc() + } else { + compute_truncate32(*v, p) + }))), + ), // Array path for everything else _ => make_scalar_function(trunc, vec![])(&args.args), } From e6e2825857dd4514f9cd2f668139664b470593f2 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Thu, 15 Jan 2026 15:12:16 +0530 Subject: [PATCH 5/5] catch arm returns exec_err! with message --- datafusion/functions/src/math/trunc.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/functions/src/math/trunc.rs b/datafusion/functions/src/math/trunc.rs index 90d8b32d2091..bd21eeef179d 100644 --- a/datafusion/functions/src/math/trunc.rs +++ b/datafusion/functions/src/math/trunc.rs @@ -119,7 +119,12 @@ impl ScalarUDFImpl for TruncFunc { return make_scalar_function(trunc, vec![])(&args.args); } None => Some(0), // default precision - _ => Some(0), + Some(cv) => { + return exec_err!( + "trunc function requires precision to be Int64, got {:?}", + cv.data_type() + ); + } }; // Scalar fast path using tuple matching for (value, precision)