Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ public enum BuiltinFunctionName {
/** IP Functions. */
CIDRMATCH(FunctionName.of("cidrmatch")),

/** Cryptographic Functions. */
MD5(FunctionName.of("md5")),
SHA1(FunctionName.of("sha1")),
SHA2(FunctionName.of("sha2")),

/** Arithmetic Operators. */
ADD(FunctionName.of("+")),
ADDFUNCTION(FunctionName.of("add")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
import org.apache.calcite.util.BuiltInMethod;
import org.opensearch.sql.expression.function.udf.CryptographicFunction;

/** Defines functions and operators that are implemented only by PPL */
public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {

public static final SqlOperator SPAN = new SpanFunctionImpl().toUDF("SPAN");
public static final SqlOperator SHA2 = CryptographicFunction.sha2().toUDF("SHA2");

/**
* Invoking an implementor registered in {@link RexImpTable}, need to use reflection since they're
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LOWER;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTRIM;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.NOTEQUAL;
Expand All @@ -58,6 +59,8 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RTRIM;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA1;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SHA2;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIGN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SIN;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.SPAN;
Expand Down Expand Up @@ -270,11 +273,14 @@ void populate() {
registerOperator(RIGHT, SqlLibraryOperators.RIGHT);
registerOperator(LEFT, SqlLibraryOperators.LEFT);
registerOperator(LOG2, SqlLibraryOperators.LOG2);
registerOperator(MD5, SqlLibraryOperators.MD5);
registerOperator(SHA1, SqlLibraryOperators.SHA1);
registerOperator(INTERNAL_REGEXP_EXTRACT, SqlLibraryOperators.REGEXP_EXTRACT);
registerOperator(INTERNAL_REGEXP_REPLACE_2, SqlLibraryOperators.REGEXP_REPLACE_2);

// Register PPL UDF operator
registerOperator(SPAN, PPLBuiltinOperators.SPAN);
registerOperator(SHA2, PPLBuiltinOperators.SHA2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems the SHA2 is the only one we built by ourselves. So I checked why PPL Spark supported SHA2, I found there was no requirement for this. This issue mentioned a link from Splunk, but there was no SHA2 in Splunk either. @penghuo how about remove the SHA2 both in Spark and OpenSearch.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm okay with using SHA2, as it provides more functionality.


// Register implementation.
// Note, make the implementation an individual class if too complex.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.expression.function.udf;

import java.util.List;
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
import org.apache.calcite.adapter.enumerable.NullPolicy;
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.linq4j.tree.Expressions;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeTransforms;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.codec.digest.MessageDigestAlgorithms;
import org.opensearch.sql.expression.function.ImplementorUDF;

public class CryptographicFunction extends ImplementorUDF {
private CryptographicFunction(NotNullImplementor implementor, NullPolicy nullPolicy) {
super(implementor, nullPolicy);
}

public static CryptographicFunction sha2() {
return new CryptographicFunction(new Sha2Implementor(), NullPolicy.ANY);
}

@Override
public SqlReturnTypeInference getReturnTypeInference() {
return ReturnTypes.VARCHAR.andThen(SqlTypeTransforms.FORCE_NULLABLE);
}

public static class Sha2Implementor implements NotNullImplementor {
@Override
public Expression implement(
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
return Expressions.call(Sha2Implementor.class, "getDigest", translatedOperands);
}

public static String getDigest(String input, int algorithm) {
return switch (algorithm) {
case 224 -> Hex.encodeHexString(
DigestUtils.getDigest(MessageDigestAlgorithms.SHA_224).digest(input.getBytes()));
case 256 -> DigestUtils.sha256Hex(input);
case 384 -> DigestUtils.sha384Hex(input);
case 512 -> DigestUtils.sha512Hex(input);
default -> throw new IllegalArgumentException(
String.format(
"Unsupported SHA2 algorithm: %d. Only 224, 256, 384, and 512 are supported.",
algorithm));
};
}
}
}
85 changes: 85 additions & 0 deletions docs/user/ppl/functions/cryptographic.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
===========================
PPL Cryptographic Functions
===========================

.. rubric:: Table of contents

.. contents::
:local:
:depth: 1

MD5
---

Description
>>>>>>>>>>>


Usage: ``md5(str)`` calculates the MD5 digest and returns the value as a 32 character hex string.

Argument type: STRING

Return type: STRING

Example::

os> source=people | eval `MD5('hello')` = MD5('hello') | fields `MD5('hello')`
fetched rows / total rows = 1/1
+----------------------------------+
| MD5('hello') |
|----------------------------------|
| 5d41402abc4b2a76b9719d911017c592 |
+----------------------------------+

SHA1
----

Description
>>>>>>>>>>>

Usage: ``sha1(str)`` returns the hex string result of SHA-1.

Argument type: STRING

Return type: STRING

Example::

os> source=people | eval `SHA1('hello')` = SHA1('hello') | fields `SHA1('hello')`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please apply function on field of people index

Copy link
Collaborator Author

@yuancu yuancu Apr 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are corrected. Document kept unchanged.

fetched rows / total rows = 1/1
+------------------------------------------+
| SHA1('hello') |
|------------------------------------------|
| aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d |
+------------------------------------------+

SHA2
----

Description
>>>>>>>>>>>

Usage: ``sha2(str, numBits)`` returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384, and SHA-512).
The numBits indicates the desired bit length of the result, which must have a value of 224, 256, 384, or 512.

Argument type: STRING, INTEGER

Return type: STRING

Example::

os> source=people | eval `SHA2('hello',256)` = SHA2('hello',256) | fields `SHA2('hello',256)`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

fetched rows / total rows = 1/1
+------------------------------------------------------------------+
| SHA2('hello',256) |
|------------------------------------------------------------------|
| 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 |
+------------------------------------------------------------------+

os> source=people | eval `SHA2('hello',512)` = SHA2('hello',512) | fields `SHA2('hello',512)`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

fetched rows / total rows = 1/1
+----------------------------------------------------------------------------------------------------------------------------------+
| SHA2('hello',512) |
|----------------------------------------------------------------------------------------------------------------------------------|
| 9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043 |
+----------------------------------------------------------------------------------------------------------------------------------+
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.standalone;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;

public class CalcitePPLCryptographicFunctionIT extends CalcitePPLIntegTestCase {
@Override
public void init() throws IOException {
super.init();
loadIndex(Index.STATE_COUNTRY);
}

@Test
public void testMd5() {
JSONObject actual =
executeQuery(
String.format(
"source=%s | where name = 'Jake' | eval hello = MD5('hello'), california ="
+ " md5(state) | fields hello, california",
TEST_INDEX_STATE_COUNTRY));
verifySchema(actual, schema("hello", "string"), schema("california", "string"));
verifyDataRows(
actual, rows("5d41402abc4b2a76b9719d911017c592", "356779a9a1696714480f57fa3fb66d4c"));
}

@Test
public void testSha1() {
JSONObject actual =
executeQuery(
String.format(
"source=%s | where name = 'John' | eval hello = SHA1('hello'), ontario ="
+ " SHA1(state) | fields hello, ontario",
TEST_INDEX_STATE_COUNTRY));
verifySchema(actual, schema("hello", "string"), schema("ontario", "string"));
verifyDataRows(
actual,
rows(
"aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d",
"f9f742e1f653a74c4cd78d7ea283b5556539b96b"));
}

@Test
public void testSha2() {
JSONObject actual =
executeQuery(
String.format(
"source=%s | where name = 'Jane' | eval sha256 = SHA2('hello',256), sha512 ="
+ " SHA2('hello',512), sha224 = SHA2(country, 224), sha384 = SHA2(country, 384)"
+ " | fields sha256, sha512, sha224, sha384",
TEST_INDEX_STATE_COUNTRY));
verifySchema(
actual,
schema("sha256", "string"),
schema("sha512", "string"),
schema("sha224", "string"),
schema("sha384", "string"));
verifyDataRows(
actual,
rows(
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824",
"9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043",
"c16f747ca3d2e267c76e7355429fb1583268d966887f237b8e1605c7",
"de2abcb28b87d681830f3af25cd8dde7fdc2a4da9dcfde60b371fd2378a70ac39cef3e104bbe09aecda022aee7b4bf59"));
}

@Test
public void testSha2WrongAlgorithmShouldThrow() {
Throwable e =
assertThrows(
IllegalArgumentException.class,
() ->
executeQuery(
String.format(
"source=%s | head 1 | eval sha100 = SHA2('hello', 100) | fields sha100",
TEST_INDEX_STATE_COUNTRY)));
verifyErrorMessageContains(e, "Unsupported SHA2 algorithm");
}
}
5 changes: 5 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,11 @@ RADIANS: 'RADIANS';
SIN: 'SIN';
TAN: 'TAN';

// CRYPTOGRAPHIC FUNCTIONS
MD5: 'MD5';
SHA1: 'SHA1';
SHA2: 'SHA2';

// DATE AND TIME FUNCTIONS
ADDDATE: 'ADDDATE';
ADDTIME: 'ADDTIME';
Expand Down
7 changes: 7 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ evalFunctionName
| flowControlFunctionName
| systemFunctionName
| positionFunctionName
| cryptographicFunctionName
| jsonFunctionName
| geoipFunctionName
;
Expand Down Expand Up @@ -666,6 +667,12 @@ trigonometricFunctionName
| TAN
;

cryptographicFunctionName
: MD5
| SHA1
| SHA2
;

dateTimeFunctionName
: ADDDATE
| ADDTIME
Expand Down
Loading