Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/querying/math-expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
| json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields |
| json_paths(expr) | Get array of all JSONPath paths available from `expr` |
| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Right-most being preserved on key overlaps |
Comment thread
lkm marked this conversation as resolved.

### JSONPath syntax

Expand Down
7 changes: 7 additions & 0 deletions docs/querying/sql-functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,13 @@ Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If

Extracts a literal value from `expr` at the specified `path`. If you specify `RETURNING` and an SQL type name (such as `VARCHAR`, `BIGINT`, `DOUBLE`, etc) the function plans the query using the suggested type. Otherwise, it attempts to infer the type based on the context. If it can't infer the type, it defaults to `VARCHAR`.

## JSON_MERGE
Comment thread
lkm marked this conversation as resolved.

**Function type:** [JSON](sql-json-functions.md)
Comment thread
lkm marked this conversation as resolved.

`json_merge(expr1, expr2[, expr3 ...])`
Comment thread
lkm marked this conversation as resolved.
Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Right-most being preserved on key overlaps. Returning always a `COMPLEX<json>` type.

## LATEST

`LATEST(expr, [maxBytesPerValue])`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ public class ExpressionModule implements Module
.add(HyperUniqueExpressions.HllEstimateExprMacro.class)
.add(HyperUniqueExpressions.HllRoundEstimateExprMacro.class)
.add(NestedDataExpressions.JsonObjectExprMacro.class)
.add(NestedDataExpressions.JsonMergeExprMacro.class)
.add(NestedDataExpressions.JsonKeysExprMacro.class)
.add(NestedDataExpressions.JsonPathsExprMacro.class)
.add(NestedDataExpressions.JsonValueExprMacro.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import org.apache.druid.guice.annotations.Json;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
Expand Down Expand Up @@ -99,6 +100,117 @@ public ExpressionType getOutputType(InputBindingInspector inspector)
}
}

public static class JsonMergeExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_merge";

private final ObjectMapper jsonMapper;

@Inject
public JsonMergeExprMacro(
@Json ObjectMapper jsonMapper
)
{
this.jsonMapper = jsonMapper;
}

@Override
public String name()
{
return NAME;
}

@Override
public Expr apply(List<Expr> args)
{
if (args.size() < 2) {
throw validationFailed("must have at least two arguments");
}

final class ParseJsonExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public ParseJsonExpr(List<Expr> args)
{
super(JsonMergeExprMacro.this, args);
}

@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval arg = args.get(0).eval(bindings);
Object obj;

if (arg.value() == null) {
throw JsonMergeExprMacro.this.validationFailed(
"invalid input expected %s but got %s instead",
ExpressionType.STRING,
arg.type()
Comment on lines +144 to +147
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error is misleading since the issue is that the value is null. I am ok with strict validation, assuming there is a way to get past these null values somehow.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could tolerate null in the first argument, so essentially just return the 2nd argument unmodified.

);
}

try {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there could be a nice optimization where any literals are parsed into a map (again assuming that parsing string to json would happen inside readValue) only once.

obj = jsonMapper.readValue(getArgAsJson(arg), Object.class);
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", arg.asString());
}

ObjectReader updater = jsonMapper.readerForUpdating(obj);

for (int i = 1; i < args.size(); i++) {
ExprEval argSub = args.get(i).eval(bindings);

try {
String str = getArgAsJson(argSub);
if (str != null) {
obj = updater.readValue(str);
}
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad string input [%s]", argSub.asString());
}
}

return ExprEval.ofComplex(ExpressionType.NESTED_DATA, obj);
}

@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
return ExpressionType.NESTED_DATA;
}

private String getArgAsJson(ExprEval arg)
{
if (arg.value() == null) {
return null;
}

if (arg.type().is(ExprType.STRING)) {
return arg.asString();
}

if (arg.type().is(ExprType.COMPLEX)) {
try {
return jsonMapper.writeValueAsString(unwrap(arg));
}
catch (JsonProcessingException e) {
throw JsonMergeExprMacro.this.processingFailed(e, "bad complex input [%s]", arg.asString());
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message should also tell what is bad about this input and what column does this message correspond to.

}
}
Comment on lines +194 to +201
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a way to avoid the hop of json -> string -> json when arg is already a json?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There isn't a way for the second parameter, because I'm using the Jackson updater, but for the first there is. However, that leads to the first variable being used again somehow on the second runs, that's the reason for the test case testJsonMergeOverflow(). I'm not sure why this is happening but essentially for the second run you get {"blah":"blahblah","blah2":"blahblah2"}.

Happy to take any pointers here. I suspect I would need to copy the object in memory to avoid the reference in which case I'm not sure how much of an optimisation it really would be.


throw JsonMergeExprMacro.this.validationFailed(
"invalid input expected %s but got %s instead",
ExpressionType.STRING,
arg.type()
);
}
}
return new ParseJsonExpr(args);
}
}

public static class ToJsonStringExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "to_json_string";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
new NestedDataExpressions.JsonPathsExprMacro(),
new NestedDataExpressions.JsonKeysExprMacro(),
new NestedDataExpressions.JsonObjectExprMacro(),
new NestedDataExpressions.JsonMergeExprMacro(JSON_MAPPER),
new NestedDataExpressions.JsonValueExprMacro(),
new NestedDataExpressions.JsonQueryExprMacro(),
new NestedDataExpressions.JsonQueryArrayExprMacro(),
Expand Down Expand Up @@ -112,6 +113,63 @@ public void testJsonObjectExpression()
Assert.assertEquals(ImmutableMap.of("a", "hello", "b", "world"), ((Map) eval.value()).get("y"));
}

@Test
public void testJsonMergeExpression() throws JsonProcessingException
{
Expr expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());

expr = Parser.parse("json_merge('{\"a\":\"x\"}', null)", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());

expr = Parser.parse("json_merge('{\"a\":\"x\"}','{\"b\":\"y\"}','{\"c\":[1,2,3]}')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\",\"c\":[1,2,3]}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());

expr = Parser.parse("json_merge(json_object('a', 'x'),json_object('b', 'y'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"x\",\"b\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());

expr = Parser.parse("json_merge('{\"a\":\"x\"}',json_merge('{\"a\":\"z\"}','{\"a\":\"y\"}'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("{\"a\":\"y\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());

expr = Parser.parse("json_merge('[\"a\", \"b\"]', '[\"c\", \"d\"]')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals("[\"a\",\"b\",\"c\",\"d\"]", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
}

@Test
public void testJsonMergeOverflow() throws JsonProcessingException
{
Expr.ObjectBinding input1 = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah", "value", "blahblah")))
.build()
);
Expr.ObjectBinding input2 = InputBindings.forInputSuppliers(
new ImmutableMap.Builder<String, InputBindings.InputSupplier<?>>()
.put("attr", InputBindings.inputSupplier(ExpressionType.NESTED_DATA, () -> ImmutableMap.of("key", "blah2", "value", "blahblah2")))
.build()
);

Expr expr = Parser.parse("json_merge(json_object(), json_object(json_value(attr, '$.key'), json_value(attr, '$.value')))", MACRO_TABLE);
ExprEval eval = expr.eval(input1);
Assert.assertEquals("{\"blah\":\"blahblah\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
eval = expr.eval(input2);
Assert.assertEquals("{\"blah2\":\"blahblah2\"}", JSON_MAPPER.writeValueAsString(eval.value()));
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
}

@Test
public void testJsonKeysExpression()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,52 @@ public DruidExpression toDruidExpression(PlannerContext plannerContext, RowSigna
}
}

public static class JsonMergeOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "json_merge";
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(FUNCTION_NAME)
.operandTypeChecker(OperandTypes.variadic(SqlOperandCountRanges.from(1)))
.operandTypeInference((callBinding, returnType, operandTypes) -> {
RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
for (int i = 0; i < operandTypes.length; i++) {
operandTypes[i] = typeFactory.createTypeWithNullability(
typeFactory.createSqlType(SqlTypeName.ANY),
true
);
}
})
.returnTypeInference(NESTED_RETURN_TYPE_INFERENCE)
.functionCategory(SqlFunctionCategory.SYSTEM)
.build();

@Override
public SqlOperator calciteOperator()
{
return SQL_FUNCTION;
}

@Nullable
@Override
public DruidExpression toDruidExpression(
PlannerContext plannerContext,
RowSignature rowSignature,
RexNode rexNode
)
{
return OperatorConversions.convertCall(
plannerContext,
rowSignature,
rexNode,
druidExpressions -> DruidExpression.ofExpression(
ColumnType.NESTED_DATA,
DruidExpression.functionCall("json_merge"),
druidExpressions
)
);
}
}

public static class ToJsonStringOperatorConversion implements SqlOperatorConversion
{
private static final String FUNCTION_NAME = "to_json_string";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new NestedDataOperatorConversions.JsonValueReturningArrayDoubleOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueReturningArrayVarcharOperatorConversion())
.add(new NestedDataOperatorConversions.JsonObjectOperatorConversion())
.add(new NestedDataOperatorConversions.JsonMergeOperatorConversion())
.add(new NestedDataOperatorConversions.ToJsonStringOperatorConversion())
.add(new NestedDataOperatorConversions.ParseJsonOperatorConversion())
.add(new NestedDataOperatorConversions.TryParseJsonOperatorConversion())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.nested.NestedPathField;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
Expand Down Expand Up @@ -4685,6 +4686,56 @@ public void testJsonQueryAndJsonObject()
);
}

@Test
public void testJsonMerging()
{
testQuery(
"SELECT "
+ "JSON_MERGE('{\"a\":\"x\"}',JSON_OBJECT(KEY 'x' VALUE JSON_VALUE(nest, '$.x')))\n"
+ "FROM druid.nested",
ImmutableList.of(
Druids.newScanQueryBuilder()
.dataSource(DATA_SOURCE)
.intervals(querySegmentSpec(Filtration.eternity()))
.virtualColumns(
new ExpressionVirtualColumn(
"v0",
"json_merge('{\\u0022a\\u0022:\\u0022x\\u0022}',json_object('x',\"v1\"))",
ColumnType.NESTED_DATA,
queryFramework().macroTable()
),
new NestedFieldVirtualColumn(
"nest",
"v1",
ColumnType.STRING,
ImmutableList.of(
new NestedPathField("x")
),
false,
null,
false
)
)
.columns("v0")
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.legacy(false)
.build()
),
ImmutableList.of(
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":\"200\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"},
new Object[]{"{\"a\":\"x\",\"x\":\"100\"}"},
new Object[]{"{\"a\":\"x\",\"x\":null}"}
),
RowSignature.builder()
.add("EXPR$0", ColumnType.NESTED_DATA)
.build()
);
}

@Test
public void testCompositionTyping()
{
Expand Down
1 change: 1 addition & 0 deletions website/.spelling
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ json_paths
json_query
json_query_array
json_value
json_merge
karlkfi
kerberos
keystore
Expand Down