From fc65639e6e0901b7c0de182a6e2480380550012c Mon Sep 17 00:00:00 2001 From: Lasse Mammen Date: Fri, 16 Feb 2024 15:29:00 +0000 Subject: [PATCH] feat: json_merge expression and sql function --- docs/querying/math-expr.md | 1 + docs/querying/sql-functions.md | 7 ++ .../apache/druid/guice/ExpressionModule.java | 1 + .../expression/NestedDataExpressions.java | 112 ++++++++++++++++++ .../expression/NestedDataExpressionsTest.java | 58 +++++++++ .../NestedDataOperatorConversions.java | 46 +++++++ .../calcite/planner/DruidOperatorTable.java | 1 + .../calcite/CalciteNestedDataQueryTest.java | 51 ++++++++ website/.spelling | 1 + 9 files changed, 278 insertions(+) diff --git a/docs/querying/math-expr.md b/docs/querying/math-expr.md index ee47fc7c2db1..2b494060a736 100644 --- a/docs/querying/math-expr.md +++ b/docs/querying/math-expr.md @@ -245,6 +245,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX` into one. Right-most being preserved on key overlaps | ### JSONPath syntax diff --git a/docs/querying/sql-functions.md b/docs/querying/sql-functions.md index 2d0c51f6c128..b04f24cf5069 100644 --- a/docs/querying/sql-functions.md +++ b/docs/querying/sql-functions.md @@ -860,6 +860,13 @@ Extracts an `ARRAY>` value from `expr` at the specified `path`. If Extracts a literal value from `expr` at the specified `path`. If you specify `RETURNING` and an SQL type name (such as `VARCHAR`, `BIGINT`, `DOUBLE`, etc) the function plans the query using the suggested type. Otherwise, it attempts to infer the type based on the context. If it can't infer the type, it defaults to `VARCHAR`. +## JSON_MERGE + +**Function type:** [JSON](sql-json-functions.md) + +`json_merge(expr1, expr2[, expr3 ...])` +Merges two or more JSON `STRING` or `COMPLEX` into one. Right-most being preserved on key overlaps. Returning always a `COMPLEX` type. + ## LATEST `LATEST(expr, [maxBytesPerValue])` diff --git a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java index 917cf967f14c..e1064234e56e 100644 --- a/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java +++ b/processing/src/main/java/org/apache/druid/guice/ExpressionModule.java @@ -81,6 +81,7 @@ public class ExpressionModule implements Module .add(HyperUniqueExpressions.HllEstimateExprMacro.class) .add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class) .add(NestedDataExpressions.JsonObjectExprMacro.class) + .add(NestedDataExpressions.JsonMergeExprMacro.class) .add(NestedDataExpressions.JsonKeysExprMacro.class) .add(NestedDataExpressions.JsonPathsExprMacro.class) .add(NestedDataExpressions.JsonValueExprMacro.class) diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index 873b4f831883..0926ce78e0a5 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import org.apache.druid.guice.annotations.Json; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; @@ -99,6 +100,117 @@ public ExpressionType getOutputType(InputBindingInspector inspector) } } + public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro + { + public static final String NAME = "json_merge"; + + private final ObjectMapper jsonMapper; + + @Inject + public JsonMergeExprMacro( + @Json ObjectMapper jsonMapper + ) + { + this.jsonMapper = jsonMapper; + } + + @Override + public String name() + { + return NAME; + } + + @Override + public Expr apply(List args) + { + if (args.size() < 2) { + throw validationFailed("must have at least two arguments"); + } + + final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr + { + public ParseJsonExpr(List args) + { + super(JsonMergeExprMacro.this, args); + } + + @Override + public ExprEval eval(ObjectBinding bindings) + { + ExprEval arg = args.get(0).eval(bindings); + Object obj; + + if (arg.value() == null) { + throw JsonMergeExprMacro.this.validationFailed( + "invalid input expected %s but got %s instead", + ExpressionType.STRING, + arg.type() + ); + } + + try { + obj = jsonMapper.readValue(getArgAsJson(arg), Object.class); + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", arg.asString()); + } + + ObjectReader updater = jsonMapper.readerForUpdating(obj); + + for (int i = 1; i < args.size(); i++) { + ExprEval argSub = args.get(i).eval(bindings); + + try { + String str = getArgAsJson(argSub); + if (str != null) { + obj = updater.readValue(str); + } + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", argSub.asString()); + } + } + + return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj); + } + + @Nullable + @Override + public ExpressionType getOutputType(InputBindingInspector inspector) + { + return ExpressionType.NESTED_DATA; + } + + private String getArgAsJson(ExprEval arg) + { + if (arg.value() == null) { + return null; + } + + if (arg.type().is(ExprType.STRING)) { + return arg.asString(); + } + + if (arg.type().is(ExprType.COMPLEX)) { + try { + return jsonMapper.writeValueAsString(unwrap(arg)); + } + catch (JsonProcessingException e) { + throw JsonMergeExprMacro.this.processingFailed(e, "bad complex input [%s]", arg.asString()); + } + } + + throw JsonMergeExprMacro.this.validationFailed( + "invalid input expected %s but got %s instead", + ExpressionType.STRING, + arg.type() + ); + } + } + return new ParseJsonExpr(args); + } + } + public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro { public static final String NAME = "to_json_string"; diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index b14edb2d17b8..c9fe553469a7 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest new NestedDataExpressions.JsonPathsExprMacro(), new NestedDataExpressions.JsonKeysExprMacro(), new NestedDataExpressions.JsonObjectExprMacro(), + new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER), new NestedDataExpressions.JsonValueExprMacro(), new NestedDataExpressions.JsonQueryExprMacro(), new NestedDataExpressions.JsonQueryArrayExprMacro(), @@ -112,6 +113,63 @@ public void testJsonObjectExpression() Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y")); } + @Test + public void testJsonMergeExpression() throws JsonProcessingException + { + Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", MACRO_TABLE); + ExprEval eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 'y'))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("{\"a\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + + expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", MACRO_TABLE); + eval = expr.eval(inputBindings); + Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + } + + @Test + public void testJsonMergeOverflow() throws JsonProcessingException + { + Expr.ObjectBinding input1 = InputBindings.forInputSuppliers( + new ImmutableMap.Builder>() + .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah", "value", "blahblah"))) + .build() + ); + Expr.ObjectBinding input2 = InputBindings.forInputSuppliers( + new ImmutableMap.Builder>() + .put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah2", "value", "blahblah2"))) + .build() + ); + + Expr expr = Parser.parse("json_merge(json_object(), json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", MACRO_TABLE); + ExprEval eval = expr.eval(input1); + Assert.assertEquals("{\"blah\":\"blahblah\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + eval = expr.eval(input2); + Assert.assertEquals("{\"blah2\":\"blahblah2\"}", JSON_MAPPER.writeValueAsString(eval.value())); + Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type()); + } + @Test public void testJsonKeysExpression() { diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java index a51a3d713757..790652b100b2 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/builtin/NestedDataOperatorConversions.java @@ -778,6 +778,52 @@ public DruidExpression toDruidExpression(PlannerContext plannerContext, RowSigna } } + public static class JsonMergeOperatorConversion implements SqlOperatorConversion + { + private static final String FUNCTION_NAME = "json_merge"; + private static final SqlFunction SQL_FUNCTION = OperatorConversions + .operatorBuilder(FUNCTION_NAME) + .operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1))) + .operandTypeInference((callBinding, returnType, operandTypes) -> { + RelDataTypeFactory typeFactory = callBinding.getTypeFactory(); + for (int i = 0; i < operandTypes.length; i++) { + operandTypes[i] = typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.ANY), + true + ); + } + }) + .returnTypeInference(NESTED_RETURN_TYPE_INFERENCE) + .functionCategory(SqlFunctionCategory.SYSTEM) + .build(); + + @Override + public SqlOperator calciteOperator() + { + return SQL_FUNCTION; + } + + @Nullable + @Override + public DruidExpression toDruidExpression( + PlannerContext plannerContext, + RowSignature rowSignature, + RexNode rexNode + ) + { + return OperatorConversions.convertCall( + plannerContext, + rowSignature, + rexNode, + druidExpressions -> DruidExpression.ofExpression( + ColumnType.NESTED_DATA, + DruidExpression.functionCall("json_merge"), + druidExpressions + ) + ); + } + } + public static class ToJsonStringOperatorConversion implements SqlOperatorConversion { private static final String FUNCTION_NAME = "to_json_string"; diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java index 0fda139868b7..ab7ec712ae71 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidOperatorTable.java @@ -349,6 +349,7 @@ public class DruidOperatorTable implements SqlOperatorTable .add(new NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion()) .add(new NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion()) .add(new NestedDataOperatorConversions.JsonObjectOperatorConversion()) + .add(new NestedDataOperatorConversions.JsonMergeOperatorConversion()) .add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion()) .add(new NestedDataOperatorConversions.ParseJsonOperatorConversion()) .add(new NestedDataOperatorConversions.TryParseJsonOperatorConversion()) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index 6c177e76e7ba..5b48aaac9484 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -66,6 +66,7 @@ import org.apache.druid.segment.column.RowSignature; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.join.JoinableFactoryWrapper; +import org.apache.druid.segment.nested.NestedPathField; import org.apache.druid.segment.virtual.ExpressionVirtualColumn; import org.apache.druid.segment.virtual.NestedFieldVirtualColumn; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; @@ -4685,6 +4686,56 @@ public void testJsonQueryAndJsonObject() ); } + @Test + public void testJsonMerging() + { + testQuery( + "SELECT " + + "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x')))\n" + + "FROM druid.nested", + ImmutableList.of( + Druids.newScanQueryBuilder() + .dataSource(DATA_SOURCE) + .intervals(querySegmentSpec(Filtration.eternity())) + .virtualColumns( + new ExpressionVirtualColumn( + "v0", + "json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))", + ColumnType.NESTED_DATA, + queryFramework().macroTable() + ), + new NestedFieldVirtualColumn( + "nest", + "v1", + ColumnType.STRING, + ImmutableList.of( + new NestedPathField("x") + ), + false, + null, + false + ) + ) + .columns("v0") + .resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST) + .legacy(false) + .build() + ), + ImmutableList.of( + new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"}, + new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"}, + new Object[]{"{\"a\":\"x\",\"x\":null}"} + ), + RowSignature.builder() + .add("EXPR$0", ColumnType.NESTED_DATA) + .build() + ); + } + @Test public void testCompositionTyping() { diff --git a/website/.spelling b/website/.spelling index 037e6b50bc15..e9b0c629bbf6 100644 --- a/website/.spelling +++ b/website/.spelling @@ -368,6 +368,7 @@ json_paths json_query json_query_array json_value +json_merge karlkfi kerberos keystore