From 434fca52203805619571b2ef2b676267e24ba0e3 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Fri, 23 Jan 2026 11:55:19 +0530 Subject: [PATCH] perf: Optimize scalar path for ascii function --- datafusion/functions/benches/ascii.rs | 31 +++++++++++++++++++++++- datafusion/functions/src/string/ascii.rs | 30 ++++++++++++++++++++--- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/datafusion/functions/benches/ascii.rs b/datafusion/functions/benches/ascii.rs index 66d81261bfe8..e15483c19a45 100644 --- a/datafusion/functions/benches/ascii.rs +++ b/datafusion/functions/benches/ascii.rs @@ -20,14 +20,43 @@ mod helper; use arrow::datatypes::{DataType, Field}; use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; use datafusion_common::config::ConfigOptions; -use datafusion_expr::ScalarFunctionArgs; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use helper::gen_string_array; use std::hint::black_box; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let ascii = datafusion_functions::string::ascii(); + let config_options = Arc::new(ConfigOptions::default()); + + // Scalar benchmarks (outside loop) + c.bench_function("ascii/scalar_utf8", |b| { + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "hello".to_string(), + )))], + arg_fields: vec![Field::new("a", DataType::Utf8, false).into()], + number_rows: 1, + return_field: Field::new("f", DataType::Int32, true).into(), + config_options: Arc::clone(&config_options), + }; + b.iter(|| black_box(ascii.invoke_with_args(args.clone()).unwrap())) + }); + + c.bench_function("ascii/scalar_utf8view", |b| { + let args = ScalarFunctionArgs { + args: vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some( + "hello".to_string(), + )))], + arg_fields: vec![Field::new("a", DataType::Utf8View, false).into()], + number_rows: 1, + return_field: Field::new("f", DataType::Int32, true).into(), + config_options: Arc::clone(&config_options), + }; + b.iter(|| black_box(ascii.invoke_with_args(args.clone()).unwrap())) + }); // All benches are single batch run with 8192 rows const N_ROWS: usize = 8192; diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index fe3c508edea0..bfd035ed3c0d 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -15,12 +15,12 @@ // specific language governing permissions and limitations // under the License. -use crate::utils::make_scalar_function; use arrow::array::{ArrayRef, AsArray, Int32Array, StringArrayType}; use arrow::datatypes::DataType; use arrow::error::ArrowError; use datafusion_common::types::logical_string; -use datafusion_common::{Result, internal_err}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; use datafusion_expr_common::signature::Coercion; @@ -91,7 +91,31 @@ impl ScalarUDFImpl for AsciiFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - make_scalar_function(ascii, vec![])(&args.args) + let [arg] = take_function_args(self.name(), args.args)?; + + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::Int32(None))); + } + + match scalar { + ScalarValue::Utf8(Some(s)) + | ScalarValue::LargeUtf8(Some(s)) + | ScalarValue::Utf8View(Some(s)) => { + let result = s.chars().next().map_or(0, |c| c as i32); + Ok(ColumnarValue::Scalar(ScalarValue::Int32(Some(result)))) + } + _ => { + internal_err!( + "Unexpected data type {:?} for function ascii", + scalar.data_type() + ) + } + } + } + ColumnarValue::Array(array) => Ok(ColumnarValue::Array(ascii(&[array])?)), + } } fn documentation(&self) -> Option<&Documentation> {