From 2026df418d4977bedb8592f28d08568e166c67b3 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Thu, 14 Mar 2024 09:42:01 -0400 Subject: [PATCH 1/4] Fix to_timestamp benchmark --- datafusion/functions/benches/to_timestamp.rs | 173 ++++++++++--------- 1 file changed, 92 insertions(+), 81 deletions(-) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index c83824526442c..31d609dee9bce 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -17,97 +17,108 @@ extern crate criterion; +use std::sync::Arc; + +use arrow_array::builder::StringBuilder; +use arrow_array::ArrayRef; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::lit; -use datafusion_functions::expr_fn::to_timestamp; +use datafusion_expr::ColumnarValue; +use datafusion_functions::datetime::to_timestamp; fn criterion_benchmark(c: &mut Criterion) { c.bench_function("to_timestamp_no_formats", |b| { - let inputs = vec![ - lit("1997-01-31T09:26:56.123Z"), - lit("1997-01-31T09:26:56.123-05:00"), - lit("1997-01-31 09:26:56.123-05:00"), - lit("2023-01-01 04:05:06.789 -08"), - lit("1997-01-31T09:26:56.123"), - lit("1997-01-31 09:26:56.123"), - lit("1997-01-31 09:26:56"), - lit("1997-01-31 13:26:56"), - lit("1997-01-31 13:26:56+04:00"), - lit("1997-01-31"), - ]; + let mut inputs = StringBuilder::new(); + inputs.append_value("1997-01-31T09:26:56.123Z"); + inputs.append_value("1997-01-31T09:26:56.123-05:00"); + inputs.append_value("1997-01-31 09:26:56.123-05:00"); + inputs.append_value("2023-01-01 04:05:06.789 -08"); + inputs.append_value("1997-01-31T09:26:56.123"); + inputs.append_value("1997-01-31 09:26:56.123"); + inputs.append_value("1997-01-31 09:26:56"); + inputs.append_value("1997-01-31 13:26:56"); + inputs.append_value("1997-01-31 13:26:56+04:00"); + inputs.append_value("1997-01-31"); + + let string_array = ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef); + b.iter(|| { - for i in inputs.iter() { - black_box(to_timestamp(vec![i.clone()])); - } - }); + black_box( + to_timestamp() + .invoke(&[string_array.clone()]) + .expect("to_timestamp should work on valid values"), + ) + }) }); c.bench_function("to_timestamp_with_formats", |b| { - let mut inputs = vec![]; - let mut format1 = vec![]; - let mut format2 = vec![]; - let mut format3 = vec![]; - - inputs.push(lit("1997-01-31T09:26:56.123Z")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%dT%H:%M:%S%.f%Z")); - - inputs.push(lit("1997-01-31T09:26:56.123-05:00")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%dT%H:%M:%S%.f%z")); - - inputs.push(lit("1997-01-31 09:26:56.123-05:00")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H:%M:%S%.f%Z")); - - inputs.push(lit("2023-01-01 04:05:06.789 -08")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H:%M:%S%.f %#z")); - - inputs.push(lit("1997-01-31T09:26:56.123")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%dT%H:%M:%S%.f")); - - inputs.push(lit("1997-01-31 09:26:56.123")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H:%M:%S%.f")); - - inputs.push(lit("1997-01-31 09:26:56")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H:%M:%S")); - - inputs.push(lit("1997-01-31 092656")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H%M%S")); - - inputs.push(lit("1997-01-31 092656+04:00")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d %H%M%S%:z")); - - inputs.push(lit("Sun Jul 8 00:34:60 2001")); - format1.push(lit("%+")); - format2.push(lit("%c")); - format3.push(lit("%Y-%m-%d 00:00:00")); - + let mut inputs = StringBuilder::new(); + let mut format1_builder = StringBuilder::with_capacity(2, 10); + let mut format2_builder = StringBuilder::with_capacity(2, 10); + let mut format3_builder = StringBuilder::with_capacity(2, 10); + + inputs.append_value("1997-01-31T09:26:56.123Z"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%Z"); + + inputs.append_value("1997-01-31T09:26:56.123-05:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%z"); + + inputs.append_value("1997-01-31 09:26:56.123-05:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f%Z"); + + inputs.append_value("2023-01-01 04:05:06.789 -08"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f %#z"); + + inputs.append_value("1997-01-31T09:26:56.123"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f"); + + inputs.append_value("1997-01-31 09:26:56.123"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f"); + + inputs.append_value("1997-01-31 09:26:56"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H:%M:%S"); + + inputs.append_value("1997-01-31 092656"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H%M%S"); + + inputs.append_value("1997-01-31 092656+04:00"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d %H%M%S%:z"); + + inputs.append_value("Sun Jul 8 00:34:60 2001"); + format1_builder.append_value("%+"); + format2_builder.append_value("%c"); + format3_builder.append_value("%Y-%m-%d 00:00:00"); + + let args = [ + ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef), + ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef), + ]; b.iter(|| { - inputs.iter().enumerate().for_each(|(idx, i)| { - black_box(to_timestamp(vec![ - i.clone(), - format1.get(idx).unwrap().clone(), - format2.get(idx).unwrap().clone(), - format3.get(idx).unwrap().clone(), - ])); - }) + black_box( + to_timestamp() + .invoke(&args.clone()) + .expect("to_timestamp should work on valid values"), + ) }) }); } From 6a450b4fa2caa523cb42580c912f742dd1a1ed2b Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Mon, 18 Mar 2024 10:43:02 -0400 Subject: [PATCH 2/4] Remove reference to simd and nightly build as simd is no longer an available feature in DataFusion and building with nightly may not be a good recommendation when getting started. --- docs/source/user-guide/example-usage.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index 1c5c8f49a16ac..c5eefbdaf1564 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -240,17 +240,11 @@ async fn main() -> datafusion::error::Result<()> { } ``` -Finally, in order to build with the `simd` optimization `cargo nightly` is required. - -```shell -rustup toolchain install nightly -``` - Based on the instruction set architecture you are building on you will want to configure the `target-cpu` as well, ideally with `native` or at least `avx2`. ```shell -RUSTFLAGS='-C target-cpu=native' cargo +nightly run --release +RUSTFLAGS='-C target-cpu=native' cargo run --release ``` ## Enable backtraces From a94a4f6c3317e8e952d34d968996fbd603cd0c2e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Fri, 22 Mar 2024 22:20:35 -0400 Subject: [PATCH 3/4] Fixed missing trim() function. --- datafusion/functions/src/string/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs index 63026092f39ab..517869a256820 100644 --- a/datafusion/functions/src/string/mod.rs +++ b/datafusion/functions/src/string/mod.rs @@ -72,6 +72,11 @@ pub mod expr_fn { super::to_hex().call(vec![arg1]) } + #[doc = "Removes all characters, spaces by default, from both sides of a string"] + pub fn trim(args: Vec) -> Expr { + super::btrim().call(args) + } + #[doc = "Converts a string to uppercase."] pub fn upper(arg1: Expr) -> Expr { super::upper().call(vec![arg1]) From 59b648ab36133f03e4bd0f0c0cad0fd5f3f42af8 Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 24 Mar 2024 12:43:02 -0400 Subject: [PATCH 4/4] Move repeat, replace, split_part to datafusion_functions --- datafusion/expr/src/built_in_function.rs | 44 +---- datafusion/expr/src/expr_fn.rs | 6 - datafusion/functions/src/string/mod.rs | 24 +++ datafusion/functions/src/string/repeat.rs | 144 +++++++++++++++ datafusion/functions/src/string/replace.rs | 97 ++++++++++ datafusion/functions/src/string/split_part.rs | 170 ++++++++++++++++++ datafusion/physical-expr/src/functions.rs | 122 ++----------- .../physical-expr/src/string_expressions.rs | 67 ------- datafusion/proto/proto/datafusion.proto | 6 +- datafusion/proto/src/generated/pbjson.rs | 9 - datafusion/proto/src/generated/prost.rs | 12 +- .../proto/src/logical_plan/from_proto.rs | 26 +-- datafusion/proto/src/logical_plan/to_proto.rs | 3 - 13 files changed, 469 insertions(+), 261 deletions(-) create mode 100644 datafusion/functions/src/string/repeat.rs create mode 100644 datafusion/functions/src/string/replace.rs create mode 100644 datafusion/functions/src/string/split_part.rs diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 1904d58cfc926..b3f17ae3c2ca4 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -123,18 +123,12 @@ pub enum BuiltinScalarFunction { Lpad, /// random Random, - /// repeat - Repeat, - /// replace - Replace, /// reverse Reverse, /// right Right, /// rpad Rpad, - /// split_part - SplitPart, /// strpos Strpos, /// substr @@ -238,12 +232,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Left => Volatility::Immutable, BuiltinScalarFunction::Lpad => Volatility::Immutable, BuiltinScalarFunction::Radians => Volatility::Immutable, - BuiltinScalarFunction::Repeat => Volatility::Immutable, - BuiltinScalarFunction::Replace => Volatility::Immutable, BuiltinScalarFunction::Reverse => Volatility::Immutable, BuiltinScalarFunction::Right => Volatility::Immutable, BuiltinScalarFunction::Rpad => Volatility::Immutable, - BuiltinScalarFunction::SplitPart => Volatility::Immutable, BuiltinScalarFunction::Strpos => Volatility::Immutable, BuiltinScalarFunction::Substr => Volatility::Immutable, BuiltinScalarFunction::Translate => Volatility::Immutable, @@ -293,12 +284,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Lpad => utf8_to_str_type(&input_expr_types[0], "lpad"), BuiltinScalarFunction::Pi => Ok(Float64), BuiltinScalarFunction::Random => Ok(Float64), - BuiltinScalarFunction::Repeat => { - utf8_to_str_type(&input_expr_types[0], "repeat") - } - BuiltinScalarFunction::Replace => { - utf8_to_str_type(&input_expr_types[0], "replace") - } BuiltinScalarFunction::Reverse => { utf8_to_str_type(&input_expr_types[0], "reverse") } @@ -306,9 +291,6 @@ impl BuiltinScalarFunction { utf8_to_str_type(&input_expr_types[0], "right") } BuiltinScalarFunction::Rpad => utf8_to_str_type(&input_expr_types[0], "rpad"), - BuiltinScalarFunction::SplitPart => { - utf8_to_str_type(&input_expr_types[0], "split_part") - } BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { utf8_to_int_type(&input_expr_types[0], "strpos/instr/position") @@ -417,21 +399,12 @@ impl BuiltinScalarFunction { self.volatility(), ) } - BuiltinScalarFunction::Left - | BuiltinScalarFunction::Repeat - | BuiltinScalarFunction::Right => Signature::one_of( - vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])], - self.volatility(), - ), - BuiltinScalarFunction::SplitPart => Signature::one_of( - vec![ - Exact(vec![Utf8, Utf8, Int64]), - Exact(vec![LargeUtf8, Utf8, Int64]), - Exact(vec![Utf8, LargeUtf8, Int64]), - Exact(vec![LargeUtf8, LargeUtf8, Int64]), - ], - self.volatility(), - ), + BuiltinScalarFunction::Left | BuiltinScalarFunction::Right => { + Signature::one_of( + vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])], + self.volatility(), + ) + } BuiltinScalarFunction::EndsWith | BuiltinScalarFunction::Strpos => { Signature::one_of( @@ -467,7 +440,7 @@ impl BuiltinScalarFunction { self.volatility(), ), - BuiltinScalarFunction::Replace | BuiltinScalarFunction::Translate => { + BuiltinScalarFunction::Translate => { Signature::one_of(vec![Exact(vec![Utf8, Utf8, Utf8])], self.volatility()) } BuiltinScalarFunction::Pi => Signature::exact(vec![], self.volatility()), @@ -637,12 +610,9 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::InitCap => &["initcap"], BuiltinScalarFunction::Left => &["left"], BuiltinScalarFunction::Lpad => &["lpad"], - BuiltinScalarFunction::Repeat => &["repeat"], - BuiltinScalarFunction::Replace => &["replace"], BuiltinScalarFunction::Reverse => &["reverse"], BuiltinScalarFunction::Right => &["right"], BuiltinScalarFunction::Rpad => &["rpad"], - BuiltinScalarFunction::SplitPart => &["split_part"], BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], BuiltinScalarFunction::Translate => &["translate"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 60db21e5f5fef..f75d8869671e1 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -598,11 +598,8 @@ scalar_expr!( ); scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase"); scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`"); -scalar_expr!(Replace, replace, string from to, "replaces all occurrences of `from` with `to` in the `string`"); -scalar_expr!(Repeat, repeat, string n, "repeats the `string` to `n` times"); scalar_expr!(Reverse, reverse, string, "reverses the `string`"); scalar_expr!(Right, right, string n, "returns the last `n` characters in the `string`"); -scalar_expr!(SplitPart, split_part, string delimiter index, "splits a string based on a delimiter and picks out the desired field based on the index."); scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`"); scalar_expr!(Strpos, strpos, string substring, "finds the position from where the `substring` matches the `string`"); scalar_expr!(Substr, substr, string position, "substring from the `position` to the end"); @@ -1056,13 +1053,10 @@ mod test { test_scalar_expr!(Left, left, string, count); test_nary_scalar_expr!(Lpad, lpad, string, count); test_nary_scalar_expr!(Lpad, lpad, string, count, characters); - test_scalar_expr!(Replace, replace, string, from, to); - test_scalar_expr!(Repeat, repeat, string, count); test_scalar_expr!(Reverse, reverse, string); test_scalar_expr!(Right, right, string, count); test_nary_scalar_expr!(Rpad, rpad, string, count); test_nary_scalar_expr!(Rpad, rpad, string, count, characters); - test_scalar_expr!(SplitPart, split_part, expr, delimiter, index); test_scalar_expr!(EndsWith, ends_with, string, characters); test_scalar_expr!(Strpos, strpos, string, substring); test_scalar_expr!(Substr, substr, string, position); diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs index 165a7c6604043..d2b9fb2da8053 100644 --- a/datafusion/functions/src/string/mod.rs +++ b/datafusion/functions/src/string/mod.rs @@ -29,7 +29,10 @@ mod lower; mod ltrim; mod octet_length; mod overlay; +mod repeat; +mod replace; mod rtrim; +mod split_part; mod starts_with; mod to_hex; mod upper; @@ -43,8 +46,11 @@ make_udf_function!(ltrim::LtrimFunc, LTRIM, ltrim); make_udf_function!(lower::LowerFunc, LOWER, lower); make_udf_function!(octet_length::OctetLengthFunc, OCTET_LENGTH, octet_length); make_udf_function!(overlay::OverlayFunc, OVERLAY, overlay); +make_udf_function!(repeat::RepeatFunc, REPEAT, repeat); +make_udf_function!(replace::ReplaceFunc, REPLACE, replace); make_udf_function!(rtrim::RtrimFunc, RTRIM, rtrim); make_udf_function!(starts_with::StartsWithFunc, STARTS_WITH, starts_with); +make_udf_function!(split_part::SplitPartFunc, SPLIT_PART, split_part); make_udf_function!(to_hex::ToHexFunc, TO_HEX, to_hex); make_udf_function!(upper::UpperFunc, UPPER, upper); make_udf_function!(uuid::UuidFunc, UUID, uuid); @@ -87,11 +93,26 @@ pub mod expr_fn { super::overlay().call(args) } + #[doc = "Repeats the `string` to `n` times"] + pub fn repeat(string: Expr, n: Expr) -> Expr { + super::repeat().call(vec![string, n]) + } + + #[doc = "Replaces all occurrences of `from` with `to` in the `string`"] + pub fn replace(string: Expr, from: Expr, to: Expr) -> Expr { + super::replace().call(vec![string, from, to]) + } + #[doc = "Removes all characters, spaces by default, from the end of a string"] pub fn rtrim(args: Vec) -> Expr { super::rtrim().call(args) } + #[doc = "Splits a string based on a delimiter and picks out the desired field based on the index."] + pub fn split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr { + super::split_part().call(vec![string, delimiter, index]) + } + #[doc = "Returns true if string starts with prefix."] pub fn starts_with(arg1: Expr, arg2: Expr) -> Expr { super::starts_with().call(vec![arg1, arg2]) @@ -128,7 +149,10 @@ pub fn functions() -> Vec> { ltrim(), octet_length(), overlay(), + repeat(), + replace(), rtrim(), + split_part(), starts_with(), to_hex(), upper(), diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs new file mode 100644 index 0000000000000..83bc929cb9a48 --- /dev/null +++ b/datafusion/functions/src/string/repeat.rs @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::datatypes::DataType; + +use datafusion_common::cast::{as_generic_string_array, as_int64_array}; +use datafusion_common::{exec_err, Result}; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ColumnarValue, Volatility}; +use datafusion_expr::{ScalarUDFImpl, Signature}; + +use crate::string::common::*; + +#[derive(Debug)] +pub(super) struct RepeatFunc { + signature: Signature, +} + +impl RepeatFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![Exact(vec![Utf8, Int64]), Exact(vec![LargeUtf8, Int64])], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for RepeatFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "repeat" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "repeat") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(repeat::, vec![])(args), + DataType::LargeUtf8 => make_scalar_function(repeat::, vec![])(args), + other => exec_err!("Unsupported data type {other:?} for function repeat"), + } + } +} + +/// Repeats string the specified number of times. +/// repeat('Pg', 4) = 'PgPgPgPg' +fn repeat(args: &[ArrayRef]) -> Result { + let string_array = as_generic_string_array::(&args[0])?; + let number_array = as_int64_array(&args[1])?; + + let result = string_array + .iter() + .zip(number_array.iter()) + .map(|(string, number)| match (string, number) { + (Some(string), Some(number)) => Some(string.repeat(number as usize)), + _ => None, + }) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) +} + +#[cfg(test)] +mod tests { + use arrow::array::{Array, StringArray}; + use arrow::datatypes::DataType::Utf8; + + use datafusion_common::Result; + use datafusion_common::ScalarValue; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + use crate::string::common::test::test_function; + use crate::string::repeat::RepeatFunc; + + #[test] + fn test_functions() -> Result<()> { + test_function!( + RepeatFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("Pg")))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ], + Ok(Some("PgPgPgPg")), + &str, + Utf8, + StringArray + ); + + test_function!( + RepeatFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(None)), + ColumnarValue::Scalar(ScalarValue::Int64(Some(4))), + ], + Ok(None), + &str, + Utf8, + StringArray + ); + test_function!( + RepeatFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("Pg")))), + ColumnarValue::Scalar(ScalarValue::Int64(None)), + ], + Ok(None), + &str, + Utf8, + StringArray + ); + + Ok(()) + } +} diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs new file mode 100644 index 0000000000000..e352442960907 --- /dev/null +++ b/datafusion/functions/src/string/replace.rs @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::datatypes::DataType; + +use datafusion_common::cast::as_generic_string_array; +use datafusion_common::{exec_err, Result}; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ColumnarValue, Volatility}; +use datafusion_expr::{ScalarUDFImpl, Signature}; + +use crate::string::common::*; + +#[derive(Debug)] +pub(super) struct ReplaceFunc { + signature: Signature, +} + +impl ReplaceFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![Exact(vec![Utf8, Utf8, Utf8])], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for ReplaceFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "replace" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "replace") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(replace::, vec![])(args), + DataType::LargeUtf8 => make_scalar_function(replace::, vec![])(args), + other => { + exec_err!("Unsupported data type {other:?} for function replace") + } + } + } +} + +/// Replaces all occurrences in string of substring from with substring to. +/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef' +fn replace(args: &[ArrayRef]) -> Result { + let string_array = as_generic_string_array::(&args[0])?; + let from_array = as_generic_string_array::(&args[1])?; + let to_array = as_generic_string_array::(&args[2])?; + + let result = string_array + .iter() + .zip(from_array.iter()) + .zip(to_array.iter()) + .map(|((string, from), to)| match (string, from, to) { + (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)), + _ => None, + }) + .collect::>(); + + Ok(Arc::new(result) as ArrayRef) +} + +mod test {} diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs new file mode 100644 index 0000000000000..af201e90fcf6d --- /dev/null +++ b/datafusion/functions/src/string/split_part.rs @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow::array::{ArrayRef, GenericStringArray, OffsetSizeTrait}; +use arrow::datatypes::DataType; + +use datafusion_common::cast::{as_generic_string_array, as_int64_array}; +use datafusion_common::{exec_err, Result}; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ColumnarValue, Volatility}; +use datafusion_expr::{ScalarUDFImpl, Signature}; + +use crate::string::common::*; + +#[derive(Debug)] +pub(super) struct SplitPartFunc { + signature: Signature, +} + +impl SplitPartFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8, Int64]), + Exact(vec![LargeUtf8, Utf8, Int64]), + Exact(vec![Utf8, LargeUtf8, Int64]), + Exact(vec![LargeUtf8, LargeUtf8, Int64]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for SplitPartFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "split_part" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + utf8_to_str_type(&arg_types[0], "split_part") + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + match args[0].data_type() { + DataType::Utf8 => make_scalar_function(split_part::, vec![])(args), + DataType::LargeUtf8 => make_scalar_function(split_part::, vec![])(args), + other => { + exec_err!("Unsupported data type {other:?} for function split_part") + } + } + } +} + +/// Splits string at occurrences of delimiter and returns the n'th field (counting from one). +/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' +fn split_part(args: &[ArrayRef]) -> Result { + let string_array = as_generic_string_array::(&args[0])?; + let delimiter_array = as_generic_string_array::(&args[1])?; + let n_array = as_int64_array(&args[2])?; + let result = string_array + .iter() + .zip(delimiter_array.iter()) + .zip(n_array.iter()) + .map(|((string, delimiter), n)| match (string, delimiter, n) { + (Some(string), Some(delimiter), Some(n)) => { + if n <= 0 { + exec_err!("field position must be greater than zero") + } else { + let split_string: Vec<&str> = string.split(delimiter).collect(); + match split_string.get(n as usize - 1) { + Some(s) => Ok(Some(*s)), + None => Ok(Some("")), + } + } + } + _ => Ok(None), + }) + .collect::>>()?; + + Ok(Arc::new(result) as ArrayRef) +} + +#[cfg(test)] +mod tests { + use arrow::array::{Array, StringArray}; + use arrow::datatypes::DataType::Utf8; + + use datafusion_common::ScalarValue; + use datafusion_common::{exec_err, Result}; + use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; + + use crate::string::common::test::test_function; + use crate::string::split_part::SplitPartFunc; + + #[test] + fn test_functions() -> Result<()> { + test_function!( + SplitPartFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from( + "abc~@~def~@~ghi" + )))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), + ], + Ok(Some("def")), + &str, + Utf8, + StringArray + ); + test_function!( + SplitPartFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from( + "abc~@~def~@~ghi" + )))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(20))), + ], + Ok(Some("")), + &str, + Utf8, + StringArray + ); + test_function!( + SplitPartFunc::new(), + &[ + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from( + "abc~@~def~@~ghi" + )))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("~@~")))), + ColumnarValue::Scalar(ScalarValue::Int64(Some(-1))), + ], + exec_err!("field position must be greater than zero"), + &str, + Utf8, + StringArray + ); + + Ok(()) + } +} diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 8759adc89b404..163598c2df827 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -30,17 +30,16 @@ //! an argument i32 is passed to a function that supports f64, the //! argument is automatically is coerced to f64. -use crate::sort_properties::SortProperties; -use crate::{ - conditional_expressions, math_expressions, string_expressions, PhysicalExpr, - ScalarFunctionExpr, -}; +use std::ops::Neg; +use std::sync::Arc; + use arrow::{ array::ArrayRef, compute::kernels::length::bit_length, datatypes::{DataType, Int32Type, Int64Type, Schema}, }; use arrow_array::Array; + use datafusion_common::{exec_err, Result, ScalarValue}; use datafusion_expr::execution_props::ExecutionProps; pub use datafusion_expr::FuncMonotonicity; @@ -49,8 +48,12 @@ use datafusion_expr::{ type_coercion::functions::data_types, BuiltinScalarFunction, ColumnarValue, ScalarFunctionImplementation, }; -use std::ops::Neg; -use std::sync::Arc; + +use crate::sort_properties::SortProperties; +use crate::{ + conditional_expressions, math_expressions, string_expressions, PhysicalExpr, + ScalarFunctionExpr, +}; /// Create a physical (function) expression. /// This function errors when `args`' can't be coerced to a valid argument type of the function. @@ -328,26 +331,6 @@ pub fn create_physical_fun( } other => exec_err!("Unsupported data type {other:?} for function lpad"), }), - BuiltinScalarFunction::Repeat => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::repeat::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::repeat::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function repeat"), - }), - BuiltinScalarFunction::Replace => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::replace::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::replace::)(args) - } - other => { - exec_err!("Unsupported data type {other:?} for function replace") - } - }), BuiltinScalarFunction::Reverse => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { let func = @@ -387,17 +370,6 @@ pub fn create_physical_fun( } other => exec_err!("Unsupported data type {other:?} for function rpad"), }), - BuiltinScalarFunction::SplitPart => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::split_part::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::split_part::)(args) - } - other => { - exec_err!("Unsupported data type {other:?} for function split_part") - } - }), BuiltinScalarFunction::EndsWith => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { make_scalar_function_inner(string_expressions::ends_with::)(args) @@ -568,9 +540,6 @@ fn func_order_in_one_dimension( #[cfg(test)] mod tests { - use super::*; - use crate::expressions::lit; - use crate::expressions::try_cast; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Float32Array, Float64Array, Int32Array, @@ -579,12 +548,18 @@ mod tests { datatypes::Field, record_batch::RecordBatch, }; + use datafusion_common::cast::as_uint64_array; use datafusion_common::{exec_err, internal_err, plan_err}; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::type_coercion::functions::data_types; use datafusion_expr::Signature; + use crate::expressions::lit; + use crate::expressions::try_cast; + + use super::*; + /// $FUNC function to test /// $ARGS arguments (vec) to pass to function /// $EXPECTED a Result> where Result allows testing errors and Option allows testing Null @@ -1124,33 +1099,6 @@ mod tests { Utf8, StringArray ); - test_function!( - Repeat, - &[lit("Pg"), lit(ScalarValue::Int64(Some(4))),], - Ok(Some("PgPgPgPg")), - &str, - Utf8, - StringArray - ); - test_function!( - Repeat, - &[ - lit(ScalarValue::Utf8(None)), - lit(ScalarValue::Int64(Some(4))), - ], - Ok(None), - &str, - Utf8, - StringArray - ); - test_function!( - Repeat, - &[lit("Pg"), lit(ScalarValue::Int64(None)),], - Ok(None), - &str, - Utf8, - StringArray - ); #[cfg(feature = "unicode_expressions")] test_function!( Reverse, @@ -1447,42 +1395,6 @@ mod tests { Utf8, StringArray ); - test_function!( - SplitPart, - &[ - lit("abc~@~def~@~ghi"), - lit("~@~"), - lit(ScalarValue::Int64(Some(2))), - ], - Ok(Some("def")), - &str, - Utf8, - StringArray - ); - test_function!( - SplitPart, - &[ - lit("abc~@~def~@~ghi"), - lit("~@~"), - lit(ScalarValue::Int64(Some(20))), - ], - Ok(Some("")), - &str, - Utf8, - StringArray - ); - test_function!( - SplitPart, - &[ - lit("abc~@~def~@~ghi"), - lit("~@~"), - lit(ScalarValue::Int64(Some(-1))), - ], - exec_err!("field position must be greater than zero"), - &str, - Utf8, - StringArray - ); test_function!( EndsWith, &[lit("alphabet"), lit("alph"),], @@ -1812,7 +1724,7 @@ mod tests { let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); // pick some arbitrary functions to test - let funs = [BuiltinScalarFunction::Concat, BuiltinScalarFunction::Repeat]; + let funs = [BuiltinScalarFunction::Concat]; for fun in funs.iter() { let expr = create_physical_expr_with_type_coercion( diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs index 766e167a94261..812b746354a4a 100644 --- a/datafusion/physical-expr/src/string_expressions.rs +++ b/datafusion/physical-expr/src/string_expressions.rs @@ -242,73 +242,6 @@ pub fn instr(args: &[ArrayRef]) -> Result { } } -/// Repeats string the specified number of times. -/// repeat('Pg', 4) = 'PgPgPgPg' -pub fn repeat(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; - let number_array = as_int64_array(&args[1])?; - - let result = string_array - .iter() - .zip(number_array.iter()) - .map(|(string, number)| match (string, number) { - (Some(string), Some(number)) => Some(string.repeat(number as usize)), - _ => None, - }) - .collect::>(); - - Ok(Arc::new(result) as ArrayRef) -} - -/// Replaces all occurrences in string of substring from with substring to. -/// replace('abcdefabcdef', 'cd', 'XX') = 'abXXefabXXef' -pub fn replace(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; - let from_array = as_generic_string_array::(&args[1])?; - let to_array = as_generic_string_array::(&args[2])?; - - let result = string_array - .iter() - .zip(from_array.iter()) - .zip(to_array.iter()) - .map(|((string, from), to)| match (string, from, to) { - (Some(string), Some(from), Some(to)) => Some(string.replace(from, to)), - _ => None, - }) - .collect::>(); - - Ok(Arc::new(result) as ArrayRef) -} - -/// Splits string at occurrences of delimiter and returns the n'th field (counting from one). -/// split_part('abc~@~def~@~ghi', '~@~', 2) = 'def' -pub fn split_part(args: &[ArrayRef]) -> Result { - let string_array = as_generic_string_array::(&args[0])?; - let delimiter_array = as_generic_string_array::(&args[1])?; - let n_array = as_int64_array(&args[2])?; - let result = string_array - .iter() - .zip(delimiter_array.iter()) - .zip(n_array.iter()) - .map(|((string, delimiter), n)| match (string, delimiter, n) { - (Some(string), Some(delimiter), Some(n)) => { - if n <= 0 { - exec_err!("field position must be greater than zero") - } else { - let split_string: Vec<&str> = string.split(delimiter).collect(); - match split_string.get(n as usize - 1) { - Some(s) => Ok(Some(*s)), - None => Ok(Some("")), - } - } - } - _ => Ok(None), - }) - .collect::>>()?; - - Ok(Arc::new(result) as ArrayRef) -} - /// Returns true if string starts with prefix. /// starts_with('alphabet', 'alph') = 't' pub fn starts_with(args: &[ArrayRef]) -> Result { diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 795995ce2c468..297e355dd7b18 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -581,8 +581,8 @@ enum ScalarFunction { // 37 was OctetLength Random = 38; // 39 was RegexpReplace - Repeat = 40; - Replace = 41; + // 40 was Repeat + // 41 was Replace Reverse = 42; Right = 43; Rpad = 44; @@ -591,7 +591,7 @@ enum ScalarFunction { // 47 was SHA256 // 48 was SHA384 // 49 was SHA512 - SplitPart = 50; + // 50 was SplitPart // StartsWith = 51; Strpos = 52; Substr = 53; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 3941171e4fe6e..dce815f0f2345 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22937,12 +22937,9 @@ impl serde::Serialize for ScalarFunction { Self::Left => "Left", Self::Lpad => "Lpad", Self::Random => "Random", - Self::Repeat => "Repeat", - Self::Replace => "Replace", Self::Reverse => "Reverse", Self::Right => "Right", Self::Rpad => "Rpad", - Self::SplitPart => "SplitPart", Self::Strpos => "Strpos", Self::Substr => "Substr", Self::Translate => "Translate", @@ -23002,12 +22999,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Left", "Lpad", "Random", - "Repeat", - "Replace", "Reverse", "Right", "Rpad", - "SplitPart", "Strpos", "Substr", "Translate", @@ -23096,12 +23090,9 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Left" => Ok(ScalarFunction::Left), "Lpad" => Ok(ScalarFunction::Lpad), "Random" => Ok(ScalarFunction::Random), - "Repeat" => Ok(ScalarFunction::Repeat), - "Replace" => Ok(ScalarFunction::Replace), "Reverse" => Ok(ScalarFunction::Reverse), "Right" => Ok(ScalarFunction::Right), "Rpad" => Ok(ScalarFunction::Rpad), - "SplitPart" => Ok(ScalarFunction::SplitPart), "Strpos" => Ok(ScalarFunction::Strpos), "Substr" => Ok(ScalarFunction::Substr), "Translate" => Ok(ScalarFunction::Translate), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 58fda7fcb5ad6..2292687b45a6c 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2880,8 +2880,8 @@ pub enum ScalarFunction { /// 37 was OctetLength Random = 38, /// 39 was RegexpReplace - Repeat = 40, - Replace = 41, + /// 40 was Repeat + /// 41 was Replace Reverse = 42, Right = 43, Rpad = 44, @@ -2890,7 +2890,7 @@ pub enum ScalarFunction { /// 47 was SHA256 /// 48 was SHA384 /// 49 was SHA512 - SplitPart = 50, + /// 50 was SplitPart /// StartsWith = 51; Strpos = 52, Substr = 53, @@ -3010,12 +3010,9 @@ impl ScalarFunction { ScalarFunction::Left => "Left", ScalarFunction::Lpad => "Lpad", ScalarFunction::Random => "Random", - ScalarFunction::Repeat => "Repeat", - ScalarFunction::Replace => "Replace", ScalarFunction::Reverse => "Reverse", ScalarFunction::Right => "Right", ScalarFunction::Rpad => "Rpad", - ScalarFunction::SplitPart => "SplitPart", ScalarFunction::Strpos => "Strpos", ScalarFunction::Substr => "Substr", ScalarFunction::Translate => "Translate", @@ -3069,12 +3066,9 @@ impl ScalarFunction { "Left" => Some(Self::Left), "Lpad" => Some(Self::Lpad), "Random" => Some(Self::Random), - "Repeat" => Some(Self::Repeat), - "Replace" => Some(Self::Replace), "Reverse" => Some(Self::Reverse), "Right" => Some(Self::Right), "Rpad" => Some(Self::Rpad), - "SplitPart" => Some(Self::SplitPart), "Strpos" => Some(Self::Strpos), "Substr" => Some(Self::Substr), "Translate" => Some(Self::Translate), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 3b44c1cb276dd..b78e3ae6dc618 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -53,11 +53,10 @@ use datafusion_expr::{ expr::{self, InList, Sort, WindowFunction}, factorial, find_in_set, floor, gcd, initcap, iszero, lcm, left, ln, log, log10, log2, logical_plan::{PlanType, StringifiedPlan}, - lpad, nanvl, pi, power, radians, random, repeat, replace, reverse, right, round, - rpad, signum, sin, sinh, split_part, sqrt, strpos, substr, substr_index, substring, - translate, trunc, AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, - BuiltinScalarFunction, Case, Cast, Expr, GetFieldAccess, GetIndexedField, - GroupingSet, + lpad, nanvl, pi, power, radians, random, reverse, right, round, rpad, signum, sin, + sinh, sqrt, strpos, substr, substr_index, substring, translate, trunc, + AggregateFunction, Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, + Case, Cast, Expr, GetFieldAccess, GetIndexedField, GroupingSet, GroupingSet::GroupingSets, JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound, WindowFrameUnits, @@ -468,12 +467,9 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::Left => Self::Left, ScalarFunction::Lpad => Self::Lpad, ScalarFunction::Random => Self::Random, - ScalarFunction::Repeat => Self::Repeat, - ScalarFunction::Replace => Self::Replace, ScalarFunction::Reverse => Self::Reverse, ScalarFunction::Right => Self::Right, ScalarFunction::Rpad => Self::Rpad, - ScalarFunction::SplitPart => Self::SplitPart, ScalarFunction::Strpos => Self::Strpos, ScalarFunction::Substr => Self::Substr, ScalarFunction::Translate => Self::Translate, @@ -1445,15 +1441,6 @@ pub fn parse_expr( parse_expr(&args[1], registry, codec)?, )), ScalarFunction::Random => Ok(random()), - ScalarFunction::Repeat => Ok(repeat( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), - ScalarFunction::Replace => Ok(replace( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - parse_expr(&args[2], registry, codec)?, - )), ScalarFunction::Reverse => { Ok(reverse(parse_expr(&args[0], registry, codec)?)) } @@ -1485,11 +1472,6 @@ pub fn parse_expr( .map(|expr| parse_expr(expr, registry, codec)) .collect::, _>>()?, )), - ScalarFunction::SplitPart => Ok(split_part( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - parse_expr(&args[2], registry, codec)?, - )), ScalarFunction::EndsWith => Ok(ends_with( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 446a91a39a1b1..0c0f0c6e0a922 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1490,12 +1490,9 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::Left => Self::Left, BuiltinScalarFunction::Lpad => Self::Lpad, BuiltinScalarFunction::Random => Self::Random, - BuiltinScalarFunction::Repeat => Self::Repeat, - BuiltinScalarFunction::Replace => Self::Replace, BuiltinScalarFunction::Reverse => Self::Reverse, BuiltinScalarFunction::Right => Self::Right, BuiltinScalarFunction::Rpad => Self::Rpad, - BuiltinScalarFunction::SplitPart => Self::SplitPart, BuiltinScalarFunction::Strpos => Self::Strpos, BuiltinScalarFunction::Substr => Self::Substr, BuiltinScalarFunction::Translate => Self::Translate,