diff --git a/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 index cb323f7942c..4ed6738b22e 100644 --- a/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/async-query-core/src/main/antlr/OpenSearchPPLLexer.g4 @@ -384,10 +384,10 @@ TO_JSON_STRING: 'TO_JSON_STRING'; JSON_EXTRACT: 'JSON_EXTRACT'; JSON_KEYS: 'JSON_KEYS'; JSON_VALID: 'JSON_VALID'; +JSON_SET: 'JSON_SET'; //JSON_APPEND: 'JSON_APPEND'; //JSON_DELETE: 'JSON_DELETE'; //JSON_EXTEND: 'JSON_EXTEND'; -//JSON_SET: 'JSON_SET'; //JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH'; //JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH'; //JSON_ARRAY_FILTER: 'JSON_ARRAY_FILTER'; diff --git a/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 index 133cf64be58..c471d9d6d9c 100644 --- a/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 +++ b/async-query-core/src/main/antlr/OpenSearchPPLParser.g4 @@ -872,10 +872,10 @@ jsonFunctionName | JSON_EXTRACT | JSON_KEYS | JSON_VALID + | JSON_SET // | JSON_APPEND // | JSON_DELETE // | JSON_EXTEND -// | JSON_SET // | JSON_ARRAY_ALL_MATCH // | JSON_ARRAY_ANY_MATCH // | JSON_ARRAY_FILTER diff --git a/core/build.gradle b/core/build.gradle index c583c9c6462..95873bddaaa 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -54,6 +54,7 @@ dependencies { api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + api group: 'com.jayway.jsonpath', name: 'json-path', version: '2.9.0' api group: 'com.google.code.gson', name: 'gson', version: '2.8.9' api group: 'com.tdunning', name: 't-digest', version: '3.3' api project(':common') diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index cde00fcc924..b3f0cd522f3 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -687,6 +687,14 @@ public static FunctionExpression jsonValid(Expression... expressions) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON_VALID, expressions); } + public static FunctionExpression jsonExtract(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_EXTRACT, expressions); + } + + public static FunctionExpression jsonSet(Expression... expressions) { + return compile(FunctionProperties.None, BuiltinFunctionName.JSON_SET, expressions); + } + public static FunctionExpression stringToJson(Expression value) { return compile(FunctionProperties.None, BuiltinFunctionName.JSON, value); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 366321bed21..bebf57d393b 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -207,6 +207,8 @@ public enum BuiltinFunctionName { /** Json Functions. */ JSON_VALID(FunctionName.of("json_valid")), JSON(FunctionName.of("json")), + JSON_EXTRACT(FunctionName.of("json_extract")), + JSON_SET(FunctionName.of("json_set")), /** GEOSPATIAL Functions. */ GEOIP(FunctionName.of("geoip")), diff --git a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java index 75f134aa4e9..8ecd5aa2f46 100644 --- a/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/json/JsonFunctions.java @@ -5,8 +5,21 @@ package org.opensearch.sql.expression.json; +import static org.opensearch.sql.data.type.ExprCoreType.ARRAY; import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN; +import static org.opensearch.sql.data.type.ExprCoreType.BYTE; +import static org.opensearch.sql.data.type.ExprCoreType.DATE; +import static org.opensearch.sql.data.type.ExprCoreType.DOUBLE; +import static org.opensearch.sql.data.type.ExprCoreType.FLOAT; +import static org.opensearch.sql.data.type.ExprCoreType.INTEGER; +import static org.opensearch.sql.data.type.ExprCoreType.INTERVAL; +import static org.opensearch.sql.data.type.ExprCoreType.IP; +import static org.opensearch.sql.data.type.ExprCoreType.LONG; +import static org.opensearch.sql.data.type.ExprCoreType.SHORT; import static org.opensearch.sql.data.type.ExprCoreType.STRING; +import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import static org.opensearch.sql.data.type.ExprCoreType.TIME; +import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP; import static org.opensearch.sql.data.type.ExprCoreType.UNDEFINED; import static org.opensearch.sql.expression.function.FunctionDSL.define; import static org.opensearch.sql.expression.function.FunctionDSL.impl; @@ -23,6 +36,8 @@ public class JsonFunctions { public void register(BuiltinFunctionRepository repository) { repository.register(jsonValid()); repository.register(jsonFunction()); + repository.register(jsonExtract()); + repository.register(jsonSet()); } private DefaultFunctionResolver jsonValid() { @@ -35,4 +50,32 @@ private DefaultFunctionResolver jsonFunction() { BuiltinFunctionName.JSON.getName(), impl(nullMissingHandling(JsonUtils::castJson), UNDEFINED, STRING)); } + + private DefaultFunctionResolver jsonExtract() { + return define( + BuiltinFunctionName.JSON_EXTRACT.getName(), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING, STRING), + impl(JsonUtils::extractJson, UNDEFINED, STRING, STRING, STRING, STRING)); + } + + private DefaultFunctionResolver jsonSet() { + return define( + BuiltinFunctionName.JSON_SET.getName(), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, BYTE), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, SHORT), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, INTEGER), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, LONG), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, FLOAT), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, DOUBLE), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, STRING), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, BOOLEAN), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, DATE), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, TIME), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, TIMESTAMP), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, INTERVAL), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, IP), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, STRUCT), + impl(nullMissingHandling(JsonUtils::setJson), UNDEFINED, STRING, STRING, ARRAY)); + } } diff --git a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java index f38fe597891..89fbaf27070 100644 --- a/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java +++ b/core/src/main/java/org/opensearch/sql/utils/JsonUtils.java @@ -12,6 +12,14 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.InvalidJsonException; +import com.jayway.jsonpath.InvalidPathException; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; +import com.jayway.jsonpath.PathNotFoundException; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; @@ -79,6 +87,58 @@ public static ExprValue castJson(ExprValue json) { return processJsonNode(jsonNode); } + /** + * Extract value of JSON string at given JSON path. + * + * @param json JSON string (e.g. "{\"hello\": \"world\"}"). + * @param paths list of JSON path (e.g. "$.hello") + * @return ExprValue of value at given path of json string. + */ + public static ExprValue extractJson(ExprValue json, ExprValue... paths) { + List resultList = new ArrayList<>(); + + for (ExprValue path : paths) { + System.out.println("Processing path: " + path); + if (json.isNull() || json.isMissing()) { + return json; + } + + String jsonString = json.stringValue(); + String jsonPath = path.stringValue(); + + resultList.add(extractJsonPath(jsonString, jsonPath)); + } + + if (resultList.size() == 1) { + return resultList.getFirst(); + } else { + return new ExprCollectionValue(resultList); + } + } + + private static ExprValue extractJsonPath(String json, String path) { + if (json.isEmpty() || json.equals("null")) { + return LITERAL_NULL; + } + + try { + Object results = JsonPath.parse(json).read(path); + return ExprValueUtils.fromObjectValue(results); + } catch (PathNotFoundException ignored) { + return LITERAL_NULL; + } catch (InvalidPathException invalidPathException) { + final String errorFormat = "JSON path '%s' is not valid. Error details: %s"; + throw new SemanticCheckException( + String.format(errorFormat, path, invalidPathException.getMessage()), + invalidPathException); + } catch (InvalidJsonException invalidJsonException) { + final String errorFormat = "JSON string '%s' is not valid. Error details: %s"; + throw new SemanticCheckException( + String.format(errorFormat, json, invalidJsonException.getMessage()), + invalidJsonException); + } + } + private static ExprValue processJsonNode(JsonNode jsonNode) { switch (jsonNode.getNodeType()) { case ARRAY: @@ -109,4 +169,60 @@ private static ExprValue processJsonNode(JsonNode jsonNode) { return LITERAL_NULL; } } + + /** + * Perform an upsert operation against the incoming jsonString value with provided jsonPath and + * value. + * + * @param json jsonObject in String format. + * @param path upsert reference in the form of JsonPath. + * @param valueToInsert value to be added + * @return JsonString after the upsert operation. + */ + public static ExprValue setJson(ExprValue json, ExprValue path, ExprValue valueToInsert) { + + String jsonString = json.stringValue(); + String jsonPathString = path.stringValue(); + Object valueToInsertObj = valueToInsert.value(); + Configuration conf = + Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS); + try { + JsonPath jsonPath = JsonPath.compile(jsonPathString); + DocumentContext docContext = JsonPath.using(conf).parse(jsonString); + Object readResult = docContext.read(jsonPath); + if (readResult == null) { + recursiveCreate(docContext, jsonPathString, valueToInsertObj); + } else { + docContext.set(jsonPathString, valueToInsertObj); + } + return new ExprStringValue(docContext.jsonString()); + + } catch (InvalidPathException e) { + final String errorFormat = "JSON path '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, path, e.getMessage()), e); + + } catch (InvalidJsonException e) { + final String errorFormat = "JSON object '%s' is not valid. Error details: %s"; + throw new SemanticCheckException(String.format(errorFormat, json, e.getMessage()), e); + } + } + + /** + * Helper method to handle recursive scenario. + * + * @param docContext incoming Json in Java object form. + * @param path path in String to perform insertion. + * @param value value to be inserted with given path. + */ + private static DocumentContext recursiveCreate( + DocumentContext docContext, String path, Object value) { + final int pos = path.lastIndexOf('.'); + final String parent = path.substring(0, pos); + final String current = path.substring(pos + 1); + // Attempt to read the current path as it is, trigger the recursive in case of deep insert. + if (docContext.read(parent) == null) { + recursiveCreate(docContext, parent, new LinkedHashMap<>()); + } + return docContext.put(parent, current, value); + } } diff --git a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java index bba8475c110..985096b3b96 100644 --- a/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java +++ b/core/src/test/java/org/opensearch/sql/expression/json/JsonFunctionsTest.java @@ -21,22 +21,30 @@ import org.mockito.junit.jupiter.MockitoExtension; import org.opensearch.sql.data.model.ExprBooleanValue; import org.opensearch.sql.data.model.ExprCollectionValue; +import org.opensearch.sql.data.model.ExprDateValue; import org.opensearch.sql.data.model.ExprDoubleValue; +import org.opensearch.sql.data.model.ExprFloatValue; import org.opensearch.sql.data.model.ExprIntegerValue; import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprNullValue; import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprTimeValue; import org.opensearch.sql.data.model.ExprTupleValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.FunctionExpression; import org.opensearch.sql.expression.LiteralExpression; @ExtendWith(MockitoExtension.class) public class JsonFunctionsTest { + + private static final String JsonSetTestData = + "{\"members\":[{\"name\":\"Alice\",\"age\":19,\"phoneNumbers\":[{\"home\":\"alice_home_landline\"},{\"work\":\"alice_work_phone\"}]},{\"name\":\"Ben\",\"age\":30,\"phoneNumbers\":[{\"home\":\"ben_home_landline\"},{\"work\":\"ben_work_phone\"}]}]}"; + @Test public void json_valid_returns_false() { List expressions = @@ -216,5 +224,397 @@ void json_returnsSemanticCheckException() { SemanticCheckException.class, () -> DSL.castJson(expr).valueOf(), "Expected to throw SemanticCheckException when calling castJson with " + expr)); + + // invalid type + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("invalid")).valueOf()); + + // missing bracket + assertThrows(SemanticCheckException.class, () -> DSL.castJson(DSL.literal("{{[}}")).valueOf()); + + // missing quote + assertThrows( + SemanticCheckException.class, () -> DSL.castJson(DSL.literal("\"missing quote")).valueOf()); + } + + @Test + void json_extract_search() { + ExprValue expected = new ExprIntegerValue(1); + execute_extract_json(expected, "{\"a\":1}", "$.a"); + } + + @Test + void json_extract_search_arrays() { + String jsonArray = "{\"a\":[1,2.3,\"abc\",true,null,{\"c\":{\"d\":1}},[1,2,3]]}"; + List expectedExprValues = + List.of( + new ExprIntegerValue(1), + new ExprFloatValue(2.3), + new ExprStringValue("abc"), + LITERAL_TRUE, + LITERAL_NULL, + ExprTupleValue.fromExprValueMap( + Map.of("c", ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))))), + new ExprCollectionValue( + List.of( + new ExprIntegerValue(1), new ExprIntegerValue(2), new ExprIntegerValue(3)))); + + // extract specific index from JSON list + for (int i = 0; i < expectedExprValues.size(); i++) { + String path = String.format("$.a[%d]", i); + execute_extract_json(expectedExprValues.get(i), jsonArray, path); + } + + // extract nested object + ExprValue nestedExpected = + ExprTupleValue.fromExprValueMap(Map.of("d", new ExprIntegerValue(1))); + execute_extract_json(nestedExpected, jsonArray, "$.a[5].c"); + + // extract * from JSON list + ExprValue starExpected = new ExprCollectionValue(expectedExprValues); + execute_extract_json(starExpected, jsonArray, "$.a[*]"); + } + + @Test + void json_extract_returns_null() { + List jsonStrings = + List.of( + "{\"a\":\"1\",\"b\":\"2\"}", + "{\"a\":1,\"b\":{\"c\":2,\"d\":3}}", + "{\"arr1\": [1,2,3], \"arr2\": [4,5,6]}", + "[1, 2, 3, 4]", + "[{\"a\":1,\"b\":2}, {\"c\":3,\"d\":2}]", + "\"abc\"", + "1234", + "12.34", + "true", + "false", + ""); + + jsonStrings.forEach(str -> execute_extract_json(LITERAL_NULL, str, "$.a.path_not_found_key")); + + // null string literal + assertEquals(LITERAL_NULL, DSL.jsonExtract(DSL.literal("null"), DSL.literal("$.a")).valueOf()); + + // null json + assertEquals( + LITERAL_NULL, DSL.jsonExtract(DSL.literal(LITERAL_NULL), DSL.literal("$.a")).valueOf()); + + // missing json + assertEquals( + LITERAL_MISSING, + DSL.jsonExtract(DSL.literal(LITERAL_MISSING), DSL.literal("$.a")).valueOf()); + + // array out of bounds + execute_extract_json(LITERAL_NULL, "{\"a\":[1,2,3]}", "$.a[4]"); + } + + @Test + void json_extract_throws_SemanticCheckException() { + // invalid path + SemanticCheckException invalidPathError = + assertThrows( + SemanticCheckException.class, + () -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal("$a")).valueOf()); + assertEquals( + "JSON path '$a' is not valid. Error details: Illegal character at position 1 expected" + + " '.' or '['", + invalidPathError.getMessage()); + + // invalid json + SemanticCheckException invalidJsonError = + assertThrows( + SemanticCheckException.class, + () -> + DSL.jsonExtract( + DSL.literal("{\"invalid\":\"json\", \"string\"}"), DSL.literal("$.a")) + .valueOf()); + assertTrue( + invalidJsonError + .getMessage() + .startsWith( + "JSON string '{\"invalid\":\"json\", \"string\"}' is not valid. Error" + + " details:")); + } + + @Test + void json_extract_throws_ExpressionEvaluationException() { + // null path + assertThrows( + ExpressionEvaluationException.class, + () -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal(LITERAL_NULL)).valueOf()); + + // missing path + assertThrows( + ExpressionEvaluationException.class, + () -> DSL.jsonExtract(DSL.literal("{\"a\":1}"), DSL.literal(LITERAL_MISSING)).valueOf()); + } + + @Test + void json_extract_search_list_of_paths() { + final String objectJson = + "{\"foo\": \"foo\", \"fuzz\": true, \"bar\": 1234, \"bar2\": 12.34, \"baz\": null, " + + "\"obj\": {\"internal\": \"value\"}, \"arr\": [\"string\", true, null]}"; + + ExprValue expected = + new ExprCollectionValue( + List.of(new ExprStringValue("foo"), new ExprFloatValue(12.34), LITERAL_NULL)); + Expression pathExpr1 = DSL.literal(ExprValueUtils.stringValue("$.foo")); + Expression pathExpr2 = DSL.literal(ExprValueUtils.stringValue("$.bar2")); + Expression pathExpr3 = DSL.literal(ExprValueUtils.stringValue("$.potato")); + Expression jsonExpr = DSL.literal(ExprValueUtils.stringValue(objectJson)); + ExprValue actual = DSL.jsonExtract(jsonExpr, pathExpr1, pathExpr2, pathExpr3).valueOf(); + assertEquals(expected, actual); + } + + @Test + void json_set_InsertByte() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal((byte) 'a')); + assertEquals("{\"test\":97}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertShort() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(Short.valueOf("123"))); + assertEquals("{\"test\":123}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertInt() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(123)); + assertEquals("{\"test\":123}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertLong() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(123L)); + assertEquals("{\"test\":123}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertFloat() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(123.123F)); + assertEquals("{\"test\":123.123}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertDouble() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(123.123)); + assertEquals("{\"test\":123.123}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertString() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal("test_value")); + assertEquals("{\"test\":\"test_value\"}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertBoolean() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("{}"), DSL.literal("$.test"), DSL.literal(Boolean.TRUE)); + assertEquals("{\"test\":true}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertDate() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.date(DSL.literal(new ExprDateValue("2020-08-17")))); + assertEquals("{\"test\":\"2020-08-17\"}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertTime() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.time(DSL.literal(new ExprTimeValue("01:01:01")))); + assertEquals("{\"test\":\"01:01:01\"}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertTimestamp() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.timestamp(DSL.literal("2008-05-15 22:00:00"))); + assertEquals("{\"test\":\"2008-05-15 22:00:00\"}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertInterval() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.interval(DSL.literal(1), DSL.literal("second"))); + assertEquals("{\"test\":{\"seconds\":1}}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertIp() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), DSL.literal("$.test"), DSL.castIp(DSL.literal("192.168.1.1"))); + assertEquals("{\"test\":\"192.168.1.1\"}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertMap() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.literal( + ExprTupleValue.fromExprValueMap(Map.of("name", new ExprStringValue("alice"))))); + assertEquals("{\"test\":{\"name\":\"alice\"}}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_InsertArray() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{}"), + DSL.literal("$.test"), + DSL.literal( + new ExprCollectionValue( + List.of(new ExprStringValue("Alice"), new ExprStringValue("Ben"))))); + assertEquals("{\"test\":[\"Alice\",\"Ben\"]}", functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_insert_invalid_path() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{\"members\":[{\"name\":\"alice\"}]}"), + DSL.literal("$$$$$$$$$"), + DSL.literal("18")); + assertThrows(SemanticCheckException.class, functionExpression::valueOf); + } + + @Test + void json_set_insert_invalid_jsonObject() { + FunctionExpression functionExpression = + DSL.jsonSet(DSL.literal("[xxxx}}}}}"), DSL.literal("$.test"), DSL.literal("18")); + assertThrows(SemanticCheckException.class, functionExpression::valueOf); + } + + @Test + void json_set_noMatch_property() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{\"members\":[{\"name\":\"alice\"}]}"), + DSL.literal("$.members[0].age.innerAge"), + DSL.literal("18")); + assertEquals( + "{\"members\":[{\"name\":\"alice\",\"age\":{\"innerAge\":\"18\"}}]}", + functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_noMatch_array() { + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal("{\"members\":[{\"name\":\"alice\"}]}"), + DSL.literal("$.members[0].age.innerArray"), + DSL.literal( + new ExprCollectionValue( + List.of(new ExprStringValue("18"), new ExprStringValue("20"))))); + assertEquals( + "{\"members\":[{\"name\":\"alice\",\"age\":{\"innerArray\":[\"18\",\"20\"]}}]}", + functionExpression.valueOf().stringValue()); + } + + /** + * In the case of jsonPath hit single match on property, it should overwrite the existing value, + * regardless of the value type (Array, numeric....etc.) + */ + @Test + void json_set_singleMatch_property() { + + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal(JsonSetTestData), + DSL.literal("$.members[0].name"), + DSL.literal(new ExprStringValue("Alice Spring"))); + assertEquals( + "{\"members\":[{\"name\":\"Alice" + + " Spring\",\"age\":19,\"phoneNumbers\":[{\"home\":\"alice_home_landline\"},{\"work\":\"alice_work_phone\"}]},{\"name\":\"Ben\",\"age\":30,\"phoneNumbers\":[{\"home\":\"ben_home_landline\"},{\"work\":\"ben_work_phone\"}]}]}", + functionExpression.valueOf().stringValue()); + } + + /** + * In the case of jsonPath hit single match on property, it should overwrite the existing value, + * regardless of the value type (Array, numeric....etc.) + */ + @Test + void json_set_singleMatch_array() { + + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal(JsonSetTestData), + DSL.literal("$.members[0].phoneNumbers"), + DSL.literal( + new ExprCollectionValue( + List.of( + ExprTupleValue.fromExprValueMap( + Map.of("home", new ExprStringValue("alice_new_landline"))), + ExprTupleValue.fromExprValueMap( + Map.of("work", new ExprStringValue("alice_new_work_phone"))))))); + assertEquals( + "{\"members\":[{\"name\":\"Alice\",\"age\":19,\"phoneNumbers\":[{\"home\":\"alice_new_landline\"},{\"work\":\"alice_new_work_phone\"}]},{\"name\":\"Ben\",\"age\":30,\"phoneNumbers\":[{\"home\":\"ben_home_landline\"},{\"work\":\"ben_work_phone\"}]}]}", + functionExpression.valueOf().stringValue()); + } + + /** The handling would stay identical regardless of single match || multiple matches. */ + @Test + void json_set_multiMatches_property() { + + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal(JsonSetTestData), + DSL.literal("$.members..age"), + DSL.literal(new ExprLongValue(25))); + assertEquals( + "{\"members\":[{\"name\":\"Alice\",\"age\":25,\"phoneNumbers\":[{\"home\":\"alice_home_landline\"},{\"work\":\"alice_work_phone\"}]},{\"name\":\"Ben\",\"age\":25,\"phoneNumbers\":[{\"home\":\"ben_home_landline\"},{\"work\":\"ben_work_phone\"}]}]}", + functionExpression.valueOf().stringValue()); + } + + @Test + void json_set_multiMatches_array() { + + FunctionExpression functionExpression = + DSL.jsonSet( + DSL.literal(JsonSetTestData), + DSL.literal("$.members..phoneNumbers"), + DSL.literal( + new ExprCollectionValue( + List.of( + ExprTupleValue.fromExprValueMap( + Map.of("home", new ExprStringValue("generic_new_landline"))), + ExprTupleValue.fromExprValueMap( + Map.of("work", new ExprStringValue("generic_new_work_phone"))))))); + assertEquals( + "{\"members\":[{\"name\":\"Alice\",\"age\":19,\"phoneNumbers\":[{\"home\":\"generic_new_landline\"},{\"work\":\"generic_new_work_phone\"}]},{\"name\":\"Ben\",\"age\":30,\"phoneNumbers\":[{\"home\":\"generic_new_landline\"},{\"work\":\"generic_new_work_phone\"}]}]}", + functionExpression.valueOf().stringValue()); + } + + private static void execute_extract_json(ExprValue expected, String json, String path) { + Expression pathExpr = DSL.literal(ExprValueUtils.stringValue(path)); + Expression jsonExpr = DSL.literal(ExprValueUtils.stringValue(json)); + ExprValue actual = DSL.jsonExtract(jsonExpr, pathExpr).valueOf(); + assertEquals(expected, actual); } } diff --git a/docs/user/ppl/functions/json.rst b/docs/user/ppl/functions/json.rst index 77d9d00f45f..4241ef04601 100644 --- a/docs/user/ppl/functions/json.rst +++ b/docs/user/ppl/functions/json.rst @@ -22,18 +22,19 @@ Return type: BOOLEAN Example:: - > source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid - fetched rows / total rows = 6/6 - +---------------------+---------------------------------+----------+ - | test_name | json_string | is_valid | - |---------------------|---------------------------------|----------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | - | json object | {"a":"1","b":"2"} | True | - | json array | [1, 2, 3, 4] | True | - | json scalar string | "abc" | True | - | json empty string | | True | - | json invalid object | {"invalid":"json", "string"} | False | - +---------------------+---------------------------------+----------+ + os> source=json_test | eval is_valid = json_valid(json_string) | fields test_name, json_string, is_valid + fetched rows / total rows = 7/7 + +---------------------+--------------------------------------+----------+ + | test_name | json_string | is_valid | + |---------------------+--------------------------------------+----------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | True | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | True | + | json object | {"a":"1","b":"2"} | True | + | json array | [1, 2, 3, 4] | True | + | json scalar string | "abc" | True | + | json empty string | | True | + | json invalid object | {"invalid":"json", "string"} | False | + +---------------------+--------------------------------------+----------+ JSON ---------- @@ -49,14 +50,101 @@ Return type: BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY Example:: - > source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json - fetched rows / total rows = 5/5 - +---------------------+---------------------------------+-------------------------+ - | test_name | json_string | json | - |---------------------|---------------------------------|-------------------------| - | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {a:"1",b:{c:"2",d:"3"}} | - | json object | {"a":"1","b":"2"} | {a:"1",b:"2"} | - | json array | [1, 2, 3, 4] | [1,2,3,4] | - | json scalar string | "abc" | "abc" | - | json empty string | | null | - +---------------------+---------------------------------+-------------------------+ + os> source=json_test | where json_valid(json_string) | eval json=json(json_string) | fields test_name, json_string, json + fetched rows / total rows = 6/6 + +--------------------+--------------------------------------+-------------------------------------------+ + | test_name | json_string | json | + |--------------------+--------------------------------------+-------------------------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {'a': '1', 'b': {'c': '2', 'd': '3'}} | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | {'a': '1', 'b': [{'c': '2'}, {'c': '3'}]} | + | json object | {"a":"1","b":"2"} | {'a': '1', 'b': '2'} | + | json array | [1, 2, 3, 4] | [1,2,3,4] | + | json scalar string | "abc" | abc | + | json empty string | | null | + +--------------------+--------------------------------------+-------------------------------------------+ + +JSON_EXTRACT +____________ + +Description +>>>>>>>>>>> + +Usage: `json_extract(doc, path[, path])` Extracts a JSON value from a json document based on the path specified. + +Argument type: STRING, STRING + +Return type: STRING/BOOLEAN/DOUBLE/INTEGER/NULL/STRUCT/ARRAY + +- Up to 3 paths can be provided, and results of each `path` with be returned in an ARRAY. +- Returns an ARRAY if `path` points to multiple results (e.g. $.a[*]) or if the `path` points to an array. +- Return null if `path` is not valid, or if JSON `doc` is MISSING or NULL. +- Throws SemanticCheckException if `doc` or `path` is malformed. +- Throws ExpressionEvaluationException if `path` is missing. + +Example:: + + os> source=json_test | where json_valid(json_string) | eval json_extract=json_extract(json_string, '$.b') | fields test_name, json_string, json_extract + fetched rows / total rows = 6/6 + +--------------------+--------------------------------------+-------------------------+ + | test_name | json_string | json_extract | + |--------------------+--------------------------------------+-------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"}} | {'c': '2', 'd': '3'} | + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [{'c': '2'},{'c': '3'}] | + | json object | {"a":"1","b":"2"} | 2 | + | json array | [1, 2, 3, 4] | null | + | json scalar string | "abc" | null | + | json empty string | | null | + +--------------------+--------------------------------------+-------------------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.b[1].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | 3 | + +------------------+--------------------------------------+--------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.b[*].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [2,3] | + +------------------+--------------------------------------+--------------+ + + os> source=json_test | where test_name="json nested list" | eval json_extract=json_extract(json_string, '$.a', '$.b[*].c') | fields test_name, json_string, json_extract + fetched rows / total rows = 1/1 + +------------------+--------------------------------------+--------------+ + | test_name | json_string | json_extract | + |------------------+--------------------------------------+--------------| + | json nested list | {"a":"1","b":[{"c":"2"}, {"c":"3"}]} | [1,[2,3]] | + +------------------+--------------------------------------+--------------+ + + + +JSON_SET +---------- + +Description +>>>>>>>>>>> + +Usage: `json_set(json_string, json_path, value)` Perform value insertion or override with provided Json path and value. Returns the updated JSON object if valid, null otherwise. + +Argument type: STRING, STRING, BYTE/SHORT/INTEGER/LONG/FLOAT/DOUBLE/STRING/BOOLEAN/DATE/TIME/TIMESTAMP/INTERVAL/IP/STRUCT/ARRAY + +Return type: STRING + +Example:: + + os> source=json_test | eval updated=json_set(json_string, "$.c.innerProperty", "test_value") | fields test_name, updated + fetched rows / total rows = 6/6 + +---------------------+--------------------------------------------------------------------+ + | test_name | updated | + |---------------------+--------------------------------------------------------------------| + | json nested object | {"a":"1","b":{"c":"2","d":"3"},"c":{"innerProperty":"test_value"}} | + | json object | {"a":"1","b":"2","c":{"innerProperty":"test_value"}} | + | json array | null | + | json scalar string | null | + | json empty string | null | + | json invalid object | null | + +---------------------+--------------------------------------------------------------------+ \ No newline at end of file diff --git a/doctest/test_data/json_test.json b/doctest/test_data/json_test.json index 7494fc4aa91..63e7f150115 100644 --- a/doctest/test_data/json_test.json +++ b/doctest/test_data/json_test.json @@ -1,4 +1,5 @@ {"test_name":"json nested object", "json_string":"{\"a\":\"1\",\"b\":{\"c\":\"2\",\"d\":\"3\"}}"} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} {"test_name":"json object", "json_string":"{\"a\":\"1\",\"b\":\"2\"}"} {"test_name":"json array", "json_string":"[1, 2, 3, 4]"} {"test_name":"json scalar string", "json_string":"\"abc\""} diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java index b6a2d5e4aaf..268e2af47dc 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/JsonFunctionsIT.java @@ -46,7 +46,8 @@ public void test_json_valid() throws IOException { rows("json scalar double"), rows("json scalar boolean true"), rows("json scalar boolean false"), - rows("json empty string")); + rows("json empty string"), + rows("json nested list")); } @Test @@ -88,7 +89,10 @@ public void test_cast_json() throws IOException { rows("json scalar double", 2.99792458e8), rows("json scalar boolean true", true), rows("json scalar boolean false", false), - rows("json empty string", null)); + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); } @Test @@ -120,7 +124,10 @@ public void test_json() throws IOException { rows("json scalar double", 2.99792458e8), rows("json scalar boolean true", true), rows("json scalar boolean false", false), - rows("json empty string", null)); + rows("json empty string", null), + rows( + "json nested list", + new JSONObject(Map.of("a", "1", "b", List.of(Map.of("c", "2"), Map.of("c", "3")))))); } @Test @@ -183,4 +190,55 @@ public void test_cast_json_scalar_to_type() throws IOException { verifyDataRows( result, rows("json scalar boolean true", true), rows("json scalar boolean false", false)); } + + @Test + public void test_json_extract() throws IOException { + JSONObject result; + result = + executeQuery( + String.format( + "source=%s | where json_valid(json_string) | eval" + + " extracted=json_extract(json_string, '$.b') | fields test_name, extracted", + TEST_INDEX_JSON_TEST)); + verifySchema( + result, schema("test_name", null, "string"), schema("extracted", null, "undefined")); + verifyDataRows( + result, + rows("json nested object", new JSONObject(Map.of("c", "3"))), + rows("json object", "2"), + rows("json array", null), + rows("json nested array", null), + rows("json scalar string", null), + rows("json scalar int", null), + rows("json scalar float", null), + rows("json scalar double", null), + rows("json scalar boolean true", null), + rows("json scalar boolean false", null), + rows("json empty string", null), + rows("json nested list", new JSONArray(List.of(Map.of("c", "2"), Map.of("c", "3"))))); + } + + @Test + public void test_json_set() throws IOException { + JSONObject result; + + result = + executeQuery( + String.format( + "source=%s | eval updated=json_set(json_string, \\\"$.c.innerProperty\\\"," + + " \\\"test_value\\\") | fields test_name, updated", + TEST_INDEX_JSON_TEST)); + verifySchema(result, schema("test_name", null, "string"), schema("updated", null, "undefined")); + verifyDataRows( + result, + rows( + "json nested object", + "{\"a\":\"1\",\"b\":{\"c\":\"3\"},\"d\":[1,2,3],\"c\":{\"innerProperty\":\"test_value\"}}"), + rows("json object", "{\"a\":\"1\",\"b\":\"2\",\"c\":{\"innerProperty\":\"test_value\"}}"), + rows("json array", null), + rows("json scalar string", null), + rows("json empty string", null), + rows("json invalid object", null), + rows("json null", null)); + } } diff --git a/integ-test/src/test/resources/json_test.json b/integ-test/src/test/resources/json_test.json index 6fd92112295..9d6984720e7 100644 --- a/integ-test/src/test/resources/json_test.json +++ b/integ-test/src/test/resources/json_test.json @@ -24,3 +24,5 @@ {"test_name":"json invalid object", "json_string":"{\"invalid\":\"json\", \"string\"}"} {"index":{"_id":"12"}} {"test_name":"json null", "json_string":null} +{"index":{"_id":"13"}} +{"test_name":"json nested list", "json_string":"{\"a\":\"1\",\"b\":[{\"c\":\"2\"}, {\"c\":\"3\"}]}"} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 0307fb4ca10..aa69d566f80 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -334,6 +334,8 @@ CIDRMATCH: 'CIDRMATCH'; // JSON FUNCTIONS JSON_VALID: 'JSON_VALID'; JSON: 'JSON'; +JSON_EXTRACT: 'JSON_EXTRACT'; +JSON_SET: 'JSON_SET'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 451edeb29b9..5ee2df900b0 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -670,6 +670,7 @@ conditionFunctionName | ISNOTNULL | CIDRMATCH | JSON_VALID + | JSON_SET ; // flow control function return non-boolean value @@ -709,6 +710,7 @@ positionFunctionName jsonFunctionName : JSON + | JSON_EXTRACT ; // operators