diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index db5f88f26bf..8c35ee537bf 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -138,6 +138,11 @@ public enum BuiltinFunctionName { /** IP Functions. */ CIDRMATCH(FunctionName.of("cidrmatch")), + /** Cryptographic Functions. */ + MD5(FunctionName.of("md5")), + SHA1(FunctionName.of("sha1")), + SHA2(FunctionName.of("sha2")), + /** Arithmetic Operators. */ ADD(FunctionName.of("+")), ADDFUNCTION(FunctionName.of("add")), diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index e1ff69660b1..a80f776d5a0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -16,11 +16,13 @@ import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable; import org.apache.calcite.util.BuiltInMethod; +import org.opensearch.sql.expression.function.udf.CryptographicFunction; /** Defines functions and operators that are implemented only by PPL */ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { public static final SqlOperator SPAN = new SpanFunctionImpl().toUDF("SPAN"); + public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2"); /** * Invoking an implementor registered in {@link RexImpTable}, need to use reflection since they're diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index df9e53730c6..e1e45ce9480 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -43,6 +43,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOWER; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTRIM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL; @@ -58,6 +59,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RTRIM; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA1; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA2; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIGN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN; import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN; @@ -270,11 +273,14 @@ void populate() { registerOperator(RIGHT, SqlLibraryOperators.RIGHT); registerOperator(LEFT, SqlLibraryOperators.LEFT); registerOperator(LOG2, SqlLibraryOperators.LOG2); + registerOperator(MD5, SqlLibraryOperators.MD5); + registerOperator(SHA1, SqlLibraryOperators.SHA1); registerOperator(INTERNAL_REGEXP_EXTRACT, SqlLibraryOperators.REGEXP_EXTRACT); registerOperator(INTERNAL_REGEXP_REPLACE_2, SqlLibraryOperators.REGEXP_REPLACE_2); // Register PPL UDF operator registerOperator(SPAN, PPLBuiltinOperators.SPAN); + registerOperator(SHA2, PPLBuiltinOperators.SHA2); // Register implementation. // Note, make the implementation an individual class if too complex. diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java new file mode 100644 index 00000000000..f73b9efee30 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/CryptographicFunction.java @@ -0,0 +1,58 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.expression.function.udf; + +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.codec.digest.MessageDigestAlgorithms; +import org.opensearch.sql.expression.function.ImplementorUDF; + +public class CryptographicFunction extends ImplementorUDF { + private CryptographicFunction(NotNullImplementor implementor, NullPolicy nullPolicy) { + super(implementor, nullPolicy); + } + + public static CryptographicFunction sha2() { + return new CryptographicFunction(new Sha2Implementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + return ReturnTypes.VARCHAR.andThen(SqlTypeTransforms.FORCE_NULLABLE); + } + + public static class Sha2Implementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + return Expressions.call(Sha2Implementor.class, "getDigest", translatedOperands); + } + + public static String getDigest(String input, int algorithm) { + return switch (algorithm) { + case 224 -> Hex.encodeHexString( + DigestUtils.getDigest(MessageDigestAlgorithms.SHA_224).digest(input.getBytes())); + case 256 -> DigestUtils.sha256Hex(input); + case 384 -> DigestUtils.sha384Hex(input); + case 512 -> DigestUtils.sha512Hex(input); + default -> throw new IllegalArgumentException( + String.format( + "Unsupported SHA2 algorithm: %d. Only 224, 256, 384, and 512 are supported.", + algorithm)); + }; + } + } +} diff --git a/docs/user/ppl/functions/cryptographic.rst b/docs/user/ppl/functions/cryptographic.rst new file mode 100644 index 00000000000..496d7673835 --- /dev/null +++ b/docs/user/ppl/functions/cryptographic.rst @@ -0,0 +1,85 @@ +=========================== +PPL Cryptographic Functions +=========================== + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 1 + +MD5 +--- + +Description +>>>>>>>>>>> + + +Usage: ``md5(str)`` calculates the MD5 digest and returns the value as a 32 character hex string. + +Argument type: STRING + +Return type: STRING + +Example:: + + os> source=people | eval `MD5('hello')` = MD5('hello') | fields `MD5('hello')` + fetched rows / total rows = 1/1 + +----------------------------------+ + | MD5('hello') | + |----------------------------------| + | 5d41402abc4b2a76b9719d911017c592 | + +----------------------------------+ + +SHA1 +---- + +Description +>>>>>>>>>>> + +Usage: ``sha1(str)`` returns the hex string result of SHA-1. + +Argument type: STRING + +Return type: STRING + +Example:: + + os> source=people | eval `SHA1('hello')` = SHA1('hello') | fields `SHA1('hello')` + fetched rows / total rows = 1/1 + +------------------------------------------+ + | SHA1('hello') | + |------------------------------------------| + | aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d | + +------------------------------------------+ + +SHA2 +---- + +Description +>>>>>>>>>>> + +Usage: ``sha2(str, numBits)`` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512). +The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512. + +Argument type: STRING, INTEGER + +Return type: STRING + +Example:: + + os> source=people | eval `SHA2('hello',256)` = SHA2('hello',256) | fields `SHA2('hello',256)` + fetched rows / total rows = 1/1 + +------------------------------------------------------------------+ + | SHA2('hello',256) | + |------------------------------------------------------------------| + | 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 | + +------------------------------------------------------------------+ + + os> source=people | eval `SHA2('hello',512)` = SHA2('hello',512) | fields `SHA2('hello',512)` + fetched rows / total rows = 1/1 + +----------------------------------------------------------------------------------------------------------------------------------+ + | SHA2('hello',512) | + |----------------------------------------------------------------------------------------------------------------------------------| + | 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 | + +----------------------------------------------------------------------------------------------------------------------------------+ diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLCryptographicFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLCryptographicFunctionIT.java new file mode 100644 index 00000000000..6e30d2321da --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLCryptographicFunctionIT.java @@ -0,0 +1,91 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.standalone; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class CalcitePPLCryptographicFunctionIT extends CalcitePPLIntegTestCase { + @Override + public void init() throws IOException { + super.init(); + loadIndex(Index.STATE_COUNTRY); + } + + @Test + public void testMd5() { + JSONObject actual = + executeQuery( + String.format( + "source=%s | where name = 'Jake' | eval hello = MD5('hello'), california =" + + " md5(state) | fields hello, california", + TEST_INDEX_STATE_COUNTRY)); + verifySchema(actual, schema("hello", "string"), schema("california", "string")); + verifyDataRows( + actual, rows("5d41402abc4b2a76b9719d911017c592", "356779a9a1696714480f57fa3fb66d4c")); + } + + @Test + public void testSha1() { + JSONObject actual = + executeQuery( + String.format( + "source=%s | where name = 'John' | eval hello = SHA1('hello'), ontario =" + + " SHA1(state) | fields hello, ontario", + TEST_INDEX_STATE_COUNTRY)); + verifySchema(actual, schema("hello", "string"), schema("ontario", "string")); + verifyDataRows( + actual, + rows( + "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d", + "f9f742e1f653a74c4cd78d7ea283b5556539b96b")); + } + + @Test + public void testSha2() { + JSONObject actual = + executeQuery( + String.format( + "source=%s | where name = 'Jane' | eval sha256 = SHA2('hello',256), sha512 =" + + " SHA2('hello',512), sha224 = SHA2(country, 224), sha384 = SHA2(country, 384)" + + " | fields sha256, sha512, sha224, sha384", + TEST_INDEX_STATE_COUNTRY)); + verifySchema( + actual, + schema("sha256", "string"), + schema("sha512", "string"), + schema("sha224", "string"), + schema("sha384", "string")); + verifyDataRows( + actual, + rows( + "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", + "9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043", + "c16f747ca3d2e267c76e7355429fb1583268d966887f237b8e1605c7", + "de2abcb28b87d681830f3af25cd8dde7fdc2a4da9dcfde60b371fd2378a70ac39cef3e104bbe09aecda022aee7b4bf59")); + } + + @Test + public void testSha2WrongAlgorithmShouldThrow() { + Throwable e = + assertThrows( + IllegalArgumentException.class, + () -> + executeQuery( + String.format( + "source=%s | head 1 | eval sha100 = SHA2('hello', 100) | fields sha100", + TEST_INDEX_STATE_COUNTRY))); + verifyErrorMessageContains(e, "Unsupported SHA2 algorithm"); + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 4dd27b20921..985d3b48deb 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -271,6 +271,11 @@ RADIANS: 'RADIANS'; SIN: 'SIN'; TAN: 'TAN'; +// CRYPTOGRAPHIC FUNCTIONS +MD5: 'MD5'; +SHA1: 'SHA1'; +SHA2: 'SHA2'; + // DATE AND TIME FUNCTIONS ADDDATE: 'ADDDATE'; ADDTIME: 'ADDTIME'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 93098478ea0..9b44425a161 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -546,6 +546,7 @@ evalFunctionName | flowControlFunctionName | systemFunctionName | positionFunctionName + | cryptographicFunctionName | jsonFunctionName | geoipFunctionName ; @@ -666,6 +667,12 @@ trigonometricFunctionName | TAN ; +cryptographicFunctionName + : MD5 + | SHA1 + | SHA2 + ; + dateTimeFunctionName : ADDDATE | ADDTIME