diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index c9dd3c1f56a29..9ec07b1cab53d 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -19,10 +19,12 @@ use super::basic::{digest, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ - ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, }; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct DigestFunc { @@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { digest(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_digest_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_digest_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes the binary hash of an expression using the specified algorithm.", + ) + .with_syntax_example("digest(expression, algorithm)") + .with_sql_example( + r#"```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + ) + .with_standard_argument( + "expression", "String") + .with_argument( + "algorithm", + "String expression specifying algorithm to use. Must be one of: + +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3", + ) + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index ccb6fbba80aad..f273c9d28c234 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -19,8 +19,12 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{plan_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct Md5Func { @@ -84,4 +88,32 @@ impl ScalarUDFImpl for Md5Func { fn invoke(&self, args: &[ColumnarValue]) -> Result { md5(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_md5_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_md5_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description("Computes an MD5 128-bit checksum for a string expression.") + .with_syntax_example("md5(expression)") + .with_sql_example( + r#"```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +```"#, + ) + .with_standard_argument("expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index d603e5bcf2952..868c8cdc3558d 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation { .with_doc_section(DOC_SECTION_HASHING) .with_description("Computes the SHA-224 hash of a binary string.") .with_syntax_example("sha224(expression)") + .with_sql_example( + r#"```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + ) .with_standard_argument("expression", "String") .build() .unwrap() diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 0a3f3b26e4310..99a470efbc1f2 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -19,8 +19,12 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA256Func { @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA256Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha256(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha256_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha256_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description("Computes the SHA-256 hash of a binary string.") + .with_syntax_example("sha256(expression)") + .with_sql_example( + r#"```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + ) + .with_standard_argument("expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index c3f7845ce7bd7..afe2db7478f74 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -19,8 +19,12 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA384Func { @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA384Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha384(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha384_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha384_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description("Computes the SHA-384 hash of a binary string.") + .with_syntax_example("sha384(expression)") + .with_sql_example( + r#"```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +```"#, + ) + .with_standard_argument("expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index dc3bfac9d8bdb..c88579fd08eea 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -19,8 +19,12 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA512Func { @@ -60,7 +64,36 @@ impl ScalarUDFImpl for SHA512Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha512(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha512_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha512_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description("Computes the SHA-512 hash of a binary string.") + .with_syntax_example("sha512(expression)") + .with_sql_example( + r#"```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +```"#, + ) + .with_argument("expression", "String") + .build() + .unwrap() + }) } diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index de15b3dd33d89..4728cc3f635c1 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3058,90 +3058,6 @@ select map_values(map([100, 5], [42,43])); [42, 43] ``` -## Hashing Functions - -- [digest](#digest) -- [md5](#md5) -- [sha256](#sha256) -- [sha384](#sha384) -- [sha512](#sha512) - -### `digest` - -Computes the binary hash of an expression using the specified algorithm. - -``` -digest(expression, algorithm) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. -- **algorithm**: String expression specifying algorithm to use. - Must be one of: - - - md5 - - sha224 - - sha256 - - sha384 - - sha512 - - blake2s - - blake2b - - blake3 - -### `md5` - -Computes an MD5 128-bit checksum for a string expression. - -``` -md5(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha256` - -Computes the SHA-256 hash of a binary string. - -``` -sha256(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha384` - -Computes the SHA-384 hash of a binary string. - -``` -sha384(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha512` - -Computes the SHA-512 hash of a binary string. - -``` -sha512(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - ## Other Functions - [arrow_cast](#arrow_cast) diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md index 2423f9c4757d6..6342dbb9ffab9 100644 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ b/docs/source/user-guide/sql/scalar_functions_new.md @@ -1157,7 +1157,67 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ## Hashing Functions +- [digest](#digest) +- [md5](#md5) - [sha224](#sha224) +- [sha256](#sha256) +- [sha384](#sha384) +- [sha512](#sha512) + +### `digest` + +Computes the binary hash of an expression using the specified algorithm. + +``` +digest(expression, algorithm) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **algorithm**: String expression specifying algorithm to use. Must be one of: +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3 + +#### Example + +```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `md5` + +Computes an MD5 128-bit checksum for a string expression. + +``` +md5(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +``` ### `sha224` @@ -1170,3 +1230,83 @@ sha224(expression) #### Arguments - **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `sha256` + +Computes the SHA-256 hash of a binary string. + +``` +sha256(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +``` + +### `sha384` + +Computes the SHA-384 hash of a binary string. + +``` +sha384(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +``` + +### `sha512` + +Computes the SHA-512 hash of a binary string. + +``` +sha512(expression) +``` + +#### Arguments + +- **expression**: String + +#### Example + +```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +```