-
Notifications
You must be signed in to change notification settings - Fork 181
Implement cryptographic hash UDFs #3574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
616a68c
33c390f
f56d5ff
607dab4
abdcc99
629cb34
d2711f2
0d24c31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.function.udf; | ||
|
|
||
| import java.util.List; | ||
| import org.apache.calcite.adapter.enumerable.NotNullImplementor; | ||
| import org.apache.calcite.adapter.enumerable.NullPolicy; | ||
| import org.apache.calcite.adapter.enumerable.RexToLixTranslator; | ||
| import org.apache.calcite.linq4j.tree.Expression; | ||
| import org.apache.calcite.linq4j.tree.Expressions; | ||
| import org.apache.calcite.rex.RexCall; | ||
| import org.apache.calcite.sql.type.ReturnTypes; | ||
| import org.apache.calcite.sql.type.SqlReturnTypeInference; | ||
| import org.apache.calcite.sql.type.SqlTypeTransforms; | ||
| import org.apache.commons.codec.binary.Hex; | ||
| import org.apache.commons.codec.digest.DigestUtils; | ||
| import org.apache.commons.codec.digest.MessageDigestAlgorithms; | ||
| import org.opensearch.sql.expression.function.ImplementorUDF; | ||
|
|
||
| public class CryptographicFunction extends ImplementorUDF { | ||
| private CryptographicFunction(NotNullImplementor implementor, NullPolicy nullPolicy) { | ||
| super(implementor, nullPolicy); | ||
| } | ||
|
|
||
| public static CryptographicFunction sha2() { | ||
| return new CryptographicFunction(new Sha2Implementor(), NullPolicy.ANY); | ||
| } | ||
|
|
||
| @Override | ||
| public SqlReturnTypeInference getReturnTypeInference() { | ||
| return ReturnTypes.VARCHAR.andThen(SqlTypeTransforms.FORCE_NULLABLE); | ||
| } | ||
|
|
||
| public static class Sha2Implementor implements NotNullImplementor { | ||
| @Override | ||
| public Expression implement( | ||
| RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { | ||
| return Expressions.call(Sha2Implementor.class, "getDigest", translatedOperands); | ||
| } | ||
|
|
||
| public static String getDigest(String input, int algorithm) { | ||
| return switch (algorithm) { | ||
| case 224 -> Hex.encodeHexString( | ||
| DigestUtils.getDigest(MessageDigestAlgorithms.SHA_224).digest(input.getBytes())); | ||
| case 256 -> DigestUtils.sha256Hex(input); | ||
| case 384 -> DigestUtils.sha384Hex(input); | ||
| case 512 -> DigestUtils.sha512Hex(input); | ||
| default -> throw new IllegalArgumentException( | ||
| String.format( | ||
| "Unsupported SHA2 algorithm: %d. Only 224, 256, 384, and 512 are supported.", | ||
| algorithm)); | ||
| }; | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| =========================== | ||
| PPL Cryptographic Functions | ||
| =========================== | ||
|
|
||
| .. rubric:: Table of contents | ||
|
|
||
| .. contents:: | ||
| :local: | ||
| :depth: 1 | ||
|
|
||
| MD5 | ||
| --- | ||
|
|
||
| Description | ||
| >>>>>>>>>>> | ||
|
|
||
|
|
||
| Usage: ``md5(str)`` calculates the MD5 digest and returns the value as a 32 character hex string. | ||
|
|
||
| Argument type: STRING | ||
|
|
||
| Return type: STRING | ||
|
|
||
| Example:: | ||
|
|
||
| os> source=people | eval `MD5('hello')` = MD5('hello') | fields `MD5('hello')` | ||
| fetched rows / total rows = 1/1 | ||
| +----------------------------------+ | ||
| | MD5('hello') | | ||
| |----------------------------------| | ||
| | 5d41402abc4b2a76b9719d911017c592 | | ||
| +----------------------------------+ | ||
|
|
||
| SHA1 | ||
| ---- | ||
|
|
||
| Description | ||
| >>>>>>>>>>> | ||
|
|
||
| Usage: ``sha1(str)`` returns the hex string result of SHA-1. | ||
|
|
||
| Argument type: STRING | ||
|
|
||
| Return type: STRING | ||
|
|
||
| Example:: | ||
|
|
||
| os> source=people | eval `SHA1('hello')` = SHA1('hello') | fields `SHA1('hello')` | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please apply function on field of
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests are corrected. Document kept unchanged. |
||
| fetched rows / total rows = 1/1 | ||
| +------------------------------------------+ | ||
| | SHA1('hello') | | ||
| |------------------------------------------| | ||
| | aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d | | ||
| +------------------------------------------+ | ||
|
|
||
| SHA2 | ||
| ---- | ||
|
|
||
| Description | ||
| >>>>>>>>>>> | ||
|
|
||
| Usage: ``sha2(str, numBits)`` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). | ||
| The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512. | ||
|
|
||
| Argument type: STRING, INTEGER | ||
|
|
||
| Return type: STRING | ||
|
|
||
| Example:: | ||
|
|
||
| os> source=people | eval `SHA2('hello',256)` = SHA2('hello',256) | fields `SHA2('hello',256)` | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| fetched rows / total rows = 1/1 | ||
| +------------------------------------------------------------------+ | ||
| | SHA2('hello',256) | | ||
| |------------------------------------------------------------------| | ||
| | 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | | ||
| +------------------------------------------------------------------+ | ||
|
|
||
| os> source=people | eval `SHA2('hello',512)` = SHA2('hello',512) | fields `SHA2('hello',512)` | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| fetched rows / total rows = 1/1 | ||
| +----------------------------------------------------------------------------------------------------------------------------------+ | ||
| | SHA2('hello',512) | | ||
| |----------------------------------------------------------------------------------------------------------------------------------| | ||
| | 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 | | ||
| +----------------------------------------------------------------------------------------------------------------------------------+ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.calcite.standalone; | ||
|
|
||
| import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; | ||
| import static org.opensearch.sql.util.MatcherUtils.rows; | ||
| import static org.opensearch.sql.util.MatcherUtils.schema; | ||
| import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; | ||
| import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains; | ||
| import static org.opensearch.sql.util.MatcherUtils.verifySchema; | ||
|
|
||
| import java.io.IOException; | ||
| import org.json.JSONObject; | ||
| import org.junit.jupiter.api.Test; | ||
|
|
||
| public class CalcitePPLCryptographicFunctionIT extends CalcitePPLIntegTestCase { | ||
| @Override | ||
| public void init() throws IOException { | ||
| super.init(); | ||
| loadIndex(Index.STATE_COUNTRY); | ||
| } | ||
|
|
||
| @Test | ||
| public void testMd5() { | ||
| JSONObject actual = | ||
| executeQuery( | ||
| String.format( | ||
| "source=%s | where name = 'Jake' | eval hello = MD5('hello'), california =" | ||
| + " md5(state) | fields hello, california", | ||
| TEST_INDEX_STATE_COUNTRY)); | ||
| verifySchema(actual, schema("hello", "string"), schema("california", "string")); | ||
| verifyDataRows( | ||
| actual, rows("5d41402abc4b2a76b9719d911017c592", "356779a9a1696714480f57fa3fb66d4c")); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSha1() { | ||
| JSONObject actual = | ||
| executeQuery( | ||
| String.format( | ||
| "source=%s | where name = 'John' | eval hello = SHA1('hello'), ontario =" | ||
| + " SHA1(state) | fields hello, ontario", | ||
| TEST_INDEX_STATE_COUNTRY)); | ||
| verifySchema(actual, schema("hello", "string"), schema("ontario", "string")); | ||
| verifyDataRows( | ||
| actual, | ||
| rows( | ||
| "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", | ||
| "f9f742e1f653a74c4cd78d7ea283b5556539b96b")); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSha2() { | ||
| JSONObject actual = | ||
| executeQuery( | ||
| String.format( | ||
| "source=%s | where name = 'Jane' | eval sha256 = SHA2('hello',256), sha512 =" | ||
| + " SHA2('hello',512), sha224 = SHA2(country, 224), sha384 = SHA2(country, 384)" | ||
| + " | fields sha256, sha512, sha224, sha384", | ||
| TEST_INDEX_STATE_COUNTRY)); | ||
| verifySchema( | ||
| actual, | ||
| schema("sha256", "string"), | ||
| schema("sha512", "string"), | ||
| schema("sha224", "string"), | ||
| schema("sha384", "string")); | ||
| verifyDataRows( | ||
| actual, | ||
| rows( | ||
| "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", | ||
| "9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043", | ||
| "c16f747ca3d2e267c76e7355429fb1583268d966887f237b8e1605c7", | ||
| "de2abcb28b87d681830f3af25cd8dde7fdc2a4da9dcfde60b371fd2378a70ac39cef3e104bbe09aecda022aee7b4bf59")); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSha2WrongAlgorithmShouldThrow() { | ||
| Throwable e = | ||
| assertThrows( | ||
| IllegalArgumentException.class, | ||
| () -> | ||
| executeQuery( | ||
| String.format( | ||
| "source=%s | head 1 | eval sha100 = SHA2('hello', 100) | fields sha100", | ||
| TEST_INDEX_STATE_COUNTRY))); | ||
| verifyErrorMessageContains(e, "Unsupported SHA2 algorithm"); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems the
SHA2is the only one we built by ourselves. So I checked why PPL Spark supportedSHA2, I found there was no requirement for this. This issue mentioned a link from Splunk, but there was noSHA2in Splunk either. @penghuo how about remove theSHA2both in Spark and OpenSearch.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm okay with using SHA2, as it provides more functionality.