-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-11446: [DataFusion] Added support for scalarValue in Builtin functions. #9376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e551bb1
d9f076d
87d787f
75eec09
545d2d4
ae0d125
ca5bf89
27b01cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -17,17 +17,26 @@ | |||||
|
|
||||||
| //! Crypto expressions | ||||||
|
|
||||||
| use std::sync::Arc; | ||||||
|
|
||||||
| use md5::Md5; | ||||||
| use sha2::{ | ||||||
| digest::Output as SHA2DigestOutput, Digest as SHA2Digest, Sha224, Sha256, Sha384, | ||||||
| Sha512, | ||||||
| }; | ||||||
|
|
||||||
| use crate::error::{DataFusionError, Result}; | ||||||
| use arrow::array::{ | ||||||
| ArrayRef, GenericBinaryArray, GenericStringArray, StringOffsetSizeTrait, | ||||||
| use crate::{ | ||||||
| error::{DataFusionError, Result}, | ||||||
| scalar::ScalarValue, | ||||||
| }; | ||||||
| use arrow::{ | ||||||
| array::{Array, BinaryArray, GenericStringArray, StringOffsetSizeTrait}, | ||||||
| datatypes::DataType, | ||||||
| }; | ||||||
|
|
||||||
| use super::{string_expressions::unary_string_function, ColumnarValue}; | ||||||
|
|
||||||
| /// Computes the md5 of a string. | ||||||
| fn md5_process(input: &str) -> String { | ||||||
| let mut digest = Md5::default(); | ||||||
| digest.update(&input); | ||||||
|
|
@@ -49,58 +58,142 @@ fn sha_process<D: SHA2Digest + Default>(input: &str) -> SHA2DigestOutput<D> { | |||||
| digest.finalize() | ||||||
| } | ||||||
|
|
||||||
| macro_rules! crypto_unary_string_function { | ||||||
| ($NAME:ident, $FUNC:expr) => { | ||||||
| /// crypto function that accepts Utf8 or LargeUtf8 and returns Utf8 string | ||||||
| pub fn $NAME<T: StringOffsetSizeTrait>( | ||||||
| args: &[ArrayRef], | ||||||
| ) -> Result<GenericStringArray<i32>> { | ||||||
| if args.len() != 1 { | ||||||
| return Err(DataFusionError::Internal(format!( | ||||||
| "{:?} args were supplied but {} takes exactly one argument", | ||||||
| args.len(), | ||||||
| String::from(stringify!($NAME)), | ||||||
| ))); | ||||||
| } | ||||||
| /// # Errors | ||||||
| /// This function errors when: | ||||||
| /// * the number of arguments is not 1 | ||||||
| /// * the first argument is not castable to a `GenericStringArray` | ||||||
| fn unary_binary_function<T, R, F>( | ||||||
| args: &[&dyn Array], | ||||||
| op: F, | ||||||
| name: &str, | ||||||
| ) -> Result<BinaryArray> | ||||||
| where | ||||||
| R: AsRef<[u8]>, | ||||||
| T: StringOffsetSizeTrait, | ||||||
| F: Fn(&str) -> R, | ||||||
| { | ||||||
| if args.len() != 1 { | ||||||
| return Err(DataFusionError::Internal(format!( | ||||||
| "{:?} args were supplied but {} takes exactly one argument", | ||||||
| args.len(), | ||||||
| name, | ||||||
| ))); | ||||||
| } | ||||||
|
|
||||||
| let array = args[0] | ||||||
| .as_any() | ||||||
| .downcast_ref::<GenericStringArray<T>>() | ||||||
| .ok_or_else(|| { | ||||||
| DataFusionError::Internal("failed to downcast to string".to_string()) | ||||||
| })?; | ||||||
|
|
||||||
| let array = args[0] | ||||||
| .as_any() | ||||||
| .downcast_ref::<GenericStringArray<T>>() | ||||||
| .unwrap(); | ||||||
| // first map is the iterator, second is for the `Option<_>` | ||||||
| Ok(array.iter().map(|x| x.map(|x| op(x))).collect()) | ||||||
|
||||||
| Ok(array.iter().map(|x| x.map(|x| op(x))).collect()) | |
| Ok(array.iter().map(|x| x.map(op)).collect()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good idea, though, I also though it would work. However, because the functions have different signatures, a deref is needed and thus we need to write it explicitly. Same for md5_process.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec()); | |
| let result = a.as_ref().map(|x| op(x).as_ref().to_vec()); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| let result = a.as_ref().map(|x| (op)(x).as_ref().to_vec()); | |
| let result = a.as_ref().map(|x| op(x).as_ref().to_vec()); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| let result = a.as_ref().map(|x| md5_process(x)); | |
| let result = a.as_ref().map(md5_process); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| let result = a.as_ref().map(|x| md5_process(x)); | |
| let result = a.as_ref().map(md5_process); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍