From f59b5ba06c274daa546d9aa740ae98a8d8d9de01 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Wed, 9 Oct 2024 20:58:48 -0400 Subject: [PATCH 1/3] Update crypto docs --- datafusion/functions/src/crypto/digest.rs | 48 +++++- datafusion/functions/src/crypto/md5.rs | 35 ++++- datafusion/functions/src/crypto/sha224.rs | 10 ++ datafusion/functions/src/crypto/sha256.rs | 35 ++++- datafusion/functions/src/crypto/sha384.rs | 36 ++++- datafusion/functions/src/crypto/sha512.rs | 36 ++++- .../user-guide/sql/scalar_functions_new.md | 140 ++++++++++++++++++ 7 files changed, 335 insertions(+), 5 deletions(-) diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index c9dd3c1f56a29..c248dd7b2617f 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -19,10 +19,12 @@ use super::basic::{digest, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; use datafusion_expr::{ - ColumnarValue, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, + ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature::*, Volatility, }; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct DigestFunc { @@ -69,4 +71,48 @@ impl ScalarUDFImpl for DigestFunc { fn invoke(&self, args: &[ColumnarValue]) -> Result { digest(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_digest_doc()) + } } + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_digest_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes the binary hash of an expression using the specified algorithm.", + ) + .with_syntax_example("digest(expression, algorithm)") + .with_sql_example( + r#"```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + ) + .with_standard_argument( + "expression", "String") + .with_argument( + "algorithm", + "String expression specifying algorithm to use. Must be one of: + +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3", + ) + .build() + .unwrap() + }) +} \ No newline at end of file diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index ccb6fbba80aad..c02bf5de03e88 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -19,8 +19,10 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{plan_err, Result}; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct Md5Func { @@ -84,4 +86,35 @@ impl ScalarUDFImpl for Md5Func { fn invoke(&self, args: &[ColumnarValue]) -> Result { md5(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_md5_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_md5_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes an MD5 128-bit checksum for a string expression.", + ) + .with_syntax_example("md5(expression)") + .with_sql_example( + r#"```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +```"#, + ) + .with_standard_argument( + "expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index d603e5bcf2952..868c8cdc3558d 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -58,6 +58,16 @@ fn get_sha224_doc() -> &'static Documentation { .with_doc_section(DOC_SECTION_HASHING) .with_description("Computes the SHA-224 hash of a binary string.") .with_syntax_example("sha224(expression)") + .with_sql_example( + r#"```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +```"#, + ) .with_standard_argument("expression", "String") .build() .unwrap() diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 0a3f3b26e4310..7575aac60160d 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -19,8 +19,10 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA256Func { @@ -60,7 +62,38 @@ impl ScalarUDFImpl for SHA256Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha256(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha256_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha256_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes the SHA-256 hash of a binary string.", + ) + .with_syntax_example("sha256(expression)") + .with_sql_example( + r#"```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +```"#, + ) + .with_standard_argument("expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index c3f7845ce7bd7..0fc974bafd1ec 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -19,8 +19,10 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA384Func { @@ -60,7 +62,39 @@ impl ScalarUDFImpl for SHA384Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha384(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha384_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha384_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes the SHA-384 hash of a binary string.", + ) + .with_syntax_example("sha384(expression)") + .with_sql_example( + r#"```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +```"#, + ) + .with_standard_argument( + "expression", "String") + .build() + .unwrap() + }) } diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index dc3bfac9d8bdb..de19a06ee2a38 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -19,8 +19,10 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; -use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; +use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; +use std::sync::OnceLock; #[derive(Debug)] pub struct SHA512Func { @@ -60,7 +62,39 @@ impl ScalarUDFImpl for SHA512Func { fn return_type(&self, arg_types: &[DataType]) -> Result { utf8_or_binary_to_binary_type(&arg_types[0], self.name()) } + fn invoke(&self, args: &[ColumnarValue]) -> Result { sha512(args) } + + fn documentation(&self) -> Option<&Documentation> { + Some(get_sha512_doc()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +fn get_sha512_doc() -> &'static Documentation { + DOCUMENTATION.get_or_init(|| { + Documentation::builder() + .with_doc_section(DOC_SECTION_HASHING) + .with_description( + "Computes the SHA-512 hash of a binary string.", + ) + .with_syntax_example("sha512(expression)") + .with_sql_example( + r#"```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +```"#, + ) + .with_argument( + "expression", "String") + .build() + .unwrap() + }) } diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md index 2423f9c4757d6..6342dbb9ffab9 100644 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ b/docs/source/user-guide/sql/scalar_functions_new.md @@ -1157,7 +1157,67 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo ## Hashing Functions +- [digest](#digest) +- [md5](#md5) - [sha224](#sha224) +- [sha256](#sha256) +- [sha384](#sha384) +- [sha512](#sha512) + +### `digest` + +Computes the binary hash of an expression using the specified algorithm. + +``` +digest(expression, algorithm) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **algorithm**: String expression specifying algorithm to use. Must be one of: +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3 + +#### Example + +```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `md5` + +Computes an MD5 128-bit checksum for a string expression. + +``` +md5(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +``` ### `sha224` @@ -1170,3 +1230,83 @@ sha224(expression) #### Arguments - **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `sha256` + +Computes the SHA-256 hash of a binary string. + +``` +sha256(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +``` + +### `sha384` + +Computes the SHA-384 hash of a binary string. + +``` +sha384(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +``` + +### `sha512` + +Computes the SHA-512 hash of a binary string. + +``` +sha512(expression) +``` + +#### Arguments + +- **expression**: String + +#### Example + +```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +``` From 1dfd9ab35ca7fdd5be67c899ab0768cc068c6475 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Wed, 9 Oct 2024 21:00:22 -0400 Subject: [PATCH 2/3] delete old --- .../source/user-guide/sql/scalar_functions.md | 84 ------------------- 1 file changed, 84 deletions(-) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index de15b3dd33d89..4728cc3f635c1 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -3058,90 +3058,6 @@ select map_values(map([100, 5], [42,43])); [42, 43] ``` -## Hashing Functions - -- [digest](#digest) -- [md5](#md5) -- [sha256](#sha256) -- [sha384](#sha384) -- [sha512](#sha512) - -### `digest` - -Computes the binary hash of an expression using the specified algorithm. - -``` -digest(expression, algorithm) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. -- **algorithm**: String expression specifying algorithm to use. - Must be one of: - - - md5 - - sha224 - - sha256 - - sha384 - - sha512 - - blake2s - - blake2b - - blake3 - -### `md5` - -Computes an MD5 128-bit checksum for a string expression. - -``` -md5(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha256` - -Computes the SHA-256 hash of a binary string. - -``` -sha256(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha384` - -Computes the SHA-384 hash of a binary string. - -``` -sha384(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - -### `sha512` - -Computes the SHA-512 hash of a binary string. - -``` -sha512(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. - Can be a constant, column, or function, and any combination of string operators. - ## Other Functions - [arrow_cast](#arrow_cast) From 590910ffe81a79376437d49ced9b72e76eb34fb1 Mon Sep 17 00:00:00 2001 From: Jonathan Chen Date: Wed, 9 Oct 2024 21:08:08 -0400 Subject: [PATCH 3/3] fmt checks --- datafusion/functions/src/crypto/digest.rs | 2 +- datafusion/functions/src/crypto/md5.rs | 11 +++++------ datafusion/functions/src/crypto/sha256.rs | 8 ++++---- datafusion/functions/src/crypto/sha384.rs | 11 +++++------ datafusion/functions/src/crypto/sha512.rs | 11 +++++------ 5 files changed, 20 insertions(+), 23 deletions(-) diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index c248dd7b2617f..9ec07b1cab53d 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -115,4 +115,4 @@ fn get_digest_doc() -> &'static Documentation { .build() .unwrap() }) -} \ No newline at end of file +} diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index c02bf5de03e88..f273c9d28c234 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -20,7 +20,9 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{plan_err, Result}; use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; -use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; use std::sync::OnceLock; @@ -98,9 +100,7 @@ fn get_md5_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { Documentation::builder() .with_doc_section(DOC_SECTION_HASHING) - .with_description( - "Computes an MD5 128-bit checksum for a string expression.", - ) + .with_description("Computes an MD5 128-bit checksum for a string expression.") .with_syntax_example("md5(expression)") .with_sql_example( r#"```sql @@ -112,8 +112,7 @@ fn get_md5_doc() -> &'static Documentation { +-------------------------------------+ ```"#, ) - .with_standard_argument( - "expression", "String") + .with_standard_argument("expression", "String") .build() .unwrap() }) diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 7575aac60160d..99a470efbc1f2 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -20,7 +20,9 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; -use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; use std::sync::OnceLock; @@ -78,9 +80,7 @@ fn get_sha256_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { Documentation::builder() .with_doc_section(DOC_SECTION_HASHING) - .with_description( - "Computes the SHA-256 hash of a binary string.", - ) + .with_description("Computes the SHA-256 hash of a binary string.") .with_syntax_example("sha256(expression)") .with_sql_example( r#"```sql diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 0fc974bafd1ec..afe2db7478f74 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -20,7 +20,9 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; -use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; use std::sync::OnceLock; @@ -78,9 +80,7 @@ fn get_sha384_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { Documentation::builder() .with_doc_section(DOC_SECTION_HASHING) - .with_description( - "Computes the SHA-384 hash of a binary string.", - ) + .with_description("Computes the SHA-384 hash of a binary string.") .with_syntax_example("sha384(expression)") .with_sql_example( r#"```sql @@ -92,8 +92,7 @@ fn get_sha384_doc() -> &'static Documentation { +-----------------------------------------+ ```"#, ) - .with_standard_argument( - "expression", "String") + .with_standard_argument("expression", "String") .build() .unwrap() }) diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index de19a06ee2a38..c88579fd08eea 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -20,7 +20,9 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::Result; use datafusion_expr::scalar_doc_sections::DOC_SECTION_HASHING; -use datafusion_expr::{ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, +}; use std::any::Any; use std::sync::OnceLock; @@ -78,9 +80,7 @@ fn get_sha512_doc() -> &'static Documentation { DOCUMENTATION.get_or_init(|| { Documentation::builder() .with_doc_section(DOC_SECTION_HASHING) - .with_description( - "Computes the SHA-512 hash of a binary string.", - ) + .with_description("Computes the SHA-512 hash of a binary string.") .with_syntax_example("sha512(expression)") .with_sql_example( r#"```sql @@ -92,8 +92,7 @@ fn get_sha512_doc() -> &'static Documentation { +-------------------------------------------+ ```"#, ) - .with_argument( - "expression", "String") + .with_argument("expression", "String") .build() .unwrap() })