Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/querying/math-expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
|---|---|
| json_value(expr, path[, type]) | Extract a Druid literal (`STRING`, `LONG`, `DOUBLE`, `ARRAY<STRING>`, `ARRAY<LONG>`, or `ARRAY<DOUBLE>`) value from `expr` using JSONPath syntax of `path`. The optional `type` argument can be set to `'LONG'`,`'DOUBLE'`, `'STRING'`, `'ARRAY<LONG>'`, `'ARRAY<DOUBLE>'`, or `'ARRAY<STRING>'` to cast values to that type. |
| json_query(expr, path) | Extract a `COMPLEX<json>` value from `expr` using JSONPath syntax of `path` |
| json_query_array(expr, path) | Extract an `ARRAY<COMPLEX<json>>` value from `expr` using JSONPath syntax of `path`. If value is not an `ARRAY`, it will be translated into a single element `ARRAY` containing the value at `path`. |
| json_object(expr1, expr2[, expr3, expr4 ...]) | Construct a `COMPLEX<json>` with alternating 'key' and 'value' arguments|
| parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX<json>`. If the input is not a `STRING` or it is invalid JSON, this function will result in an error.|
| try_parse_json(expr) | Deserialize a JSON `STRING` into a `COMPLEX<json>`. If the input is not a `STRING` or it is invalid JSON, this function will result in a `NULL` value. |
Expand Down
1 change: 1 addition & 0 deletions docs/querying/sql-json-functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ You can use the following JSON functions to extract, transform, and create `COMP
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. |
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. |
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr`, at the specified `path`. If value is not an `ARRAY`, it will be translated into a single element `ARRAY` containing the value at `path`.|
|`JSON_VALUE(expr, path [RETURNING sqlType])`| Extracts a literal value from `expr` at the specified `path`. If you specify `RETURNING` and an SQL type name (such as `VARCHAR`, `BIGINT`, `DOUBLE`, etc) the function plans the query using the suggested type. Otherwise, it attempts to infer the type based on the context. If it can't infer the type, it defaults to `VARCHAR`.|
|`PARSE_JSON(expr)`|Parses `expr` into a `COMPLEX<json>` object. This operator deserializes JSON values when processing them, translating stringified JSON into a nested structure. If the input is not a `VARCHAR` or it is invalid JSON, this function will result in an error.|
|`TRY_PARSE_JSON(expr)`|Parses `expr` into a `COMPLEX<json>` object. This operator deserializes JSON values when processing them, translating stringified JSON into a nested structure. If the input is not a `VARCHAR` or it is invalid JSON, this function will result in a `NULL` value.|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public class ExpressionModule implements Module
.add(NestedDataExpressions.JsonPathsExprMacro.class)
.add(NestedDataExpressions.JsonValueExprMacro.class)
.add(NestedDataExpressions.JsonQueryExprMacro.class)
.add(NestedDataExpressions.JsonQueryArrayExprMacro.class)
.add(NestedDataExpressions.ToJsonStringExprMacro.class)
.add(NestedDataExpressions.ParseJsonExprMacro.class)
.add(NestedDataExpressions.TryParseJsonExprMacro.class)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExprType;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.ExpressionTypeFactory;
import org.apache.druid.math.expr.NamedFunction;
import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart;
Expand All @@ -44,6 +45,8 @@

public class NestedDataExpressions
{
private static ExpressionType JSON_ARRAY = ExpressionTypeFactory.getInstance().ofArray(ExpressionType.NESTED_DATA);

public static class JsonObjectExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_object";
Expand Down Expand Up @@ -591,6 +594,120 @@ public ExpressionType getOutputType(InputBindingInspector inspector)
}
}

public static class JsonQueryArrayExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_query_array";

@Override
public String name()
{
return NAME;
}

@Override
public Expr apply(List<Expr> args)
{
if (args.get(1).isLiteral()) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should add some validation on args count.

return new JsonQueryArrayExpr(args);
} else {
return new JsonQueryArrayDynamicExpr(args);
}
}

final class JsonQueryArrayExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
private final List<NestedPathPart> parts;

public JsonQueryArrayExpr(List<Expr> args)
{
super(name(), args);
this.parts = getJsonPathPartsFromLiteral(JsonQueryArrayExprMacro.this, args.get(1));
}

@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval input = args.get(0).eval(bindings);
final Object value = NestedPathFinder.find(unwrap(input), parts);
if (value instanceof List) {
return ExprEval.ofArray(
JSON_ARRAY,
ExprEval.bestEffortArray((List) value).asArray()
);
}
return ExprEval.ofArray(
JSON_ARRAY,
ExprEval.bestEffortOf(value).asArray()
);
}

@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
if (newArgs.get(1).isLiteral()) {
return shuttle.visit(new JsonQueryArrayExpr(newArgs));
} else {
return shuttle.visit(new JsonQueryArrayDynamicExpr(newArgs));
}
}

@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
// call all the output JSON typed
return ExpressionType.NESTED_DATA;
}
}

final class JsonQueryArrayDynamicExpr extends ExprMacroTable.BaseScalarMacroFunctionExpr
{
public JsonQueryArrayDynamicExpr(List<Expr> args)
{
super(name(), args);
}

@Override
public ExprEval eval(ObjectBinding bindings)
{
ExprEval input = args.get(0).eval(bindings);
ExprEval path = args.get(1).eval(bindings);
final List<NestedPathPart> parts = NestedPathFinder.parseJsonPath(path.asString());
final Object value = NestedPathFinder.find(unwrap(input), parts);
if (value instanceof List) {
return ExprEval.ofArray(
JSON_ARRAY,
ExprEval.bestEffortArray((List) value).asArray()
);
}
return ExprEval.ofArray(
JSON_ARRAY,
ExprEval.bestEffortOf(value).asArray()
);
}

@Override
public Expr visit(Shuttle shuttle)
{
List<Expr> newArgs = args.stream().map(x -> x.visit(shuttle)).collect(Collectors.toList());
if (newArgs.get(1).isLiteral()) {
return shuttle.visit(new JsonQueryArrayExpr(newArgs));
} else {
return shuttle.visit(new JsonQueryArrayDynamicExpr(newArgs));
}
}

@Nullable
@Override
public ExpressionType getOutputType(InputBindingInspector inspector)
{
// call all the output ARRAY<COMPLEX<json>> typed
return JSON_ARRAY;
}
}
}

public static class JsonPathsExprMacro implements ExprMacroTable.ExprMacro
{
public static final String NAME = "json_paths";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static ColumnType ofType(TypeSignature<ValueType> type)
case STRING:
return ColumnType.STRING_ARRAY;
default:
throw new ISE("Unsupported expression type[%s]", type.asTypeString());
return ColumnType.ofArray(ofType(type.getElementType()));
}
case COMPLEX:
return INTERNER.intern(new ColumnType(ValueType.COMPLEX, type.getComplexTypeName(), null));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1228,6 +1228,13 @@ public ColumnCapabilities capabilities(String columnName)
public ColumnCapabilities capabilities(ColumnInspector inspector, String columnName)
{
if (processFromRaw) {
if (expectedType != null && expectedType.isArray() && ColumnType.NESTED_DATA.equals(expectedType.getElementType())) {
// arrays of objects!
return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.ofArray(ColumnType.NESTED_DATA))
.setHasMultipleValues(false)
.setHasNulls(true);
}
// JSON_QUERY always returns a StructuredData
return ColumnCapabilitiesImpl.createDefault()
.setType(ColumnType.NESTED_DATA)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.ExpressionProcessingException;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.ExpressionTypeFactory;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.segment.nested.StructuredData;
Expand All @@ -37,6 +38,7 @@
import org.junit.Test;

import java.util.Arrays;
import java.util.List;
import java.util.Map;

public class NestedDataExpressionsTest extends InitializedNullHandlingTest
Expand All @@ -49,6 +51,7 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest
new NestedDataExpressions.JsonObjectExprMacro(),
new NestedDataExpressions.JsonValueExprMacro(),
new NestedDataExpressions.JsonQueryExprMacro(),
new NestedDataExpressions.JsonQueryArrayExprMacro(),
new NestedDataExpressions.ToJsonStringExprMacro(JSON_MAPPER),
new NestedDataExpressions.ParseJsonExprMacro(JSON_MAPPER),
new NestedDataExpressions.TryParseJsonExprMacro(JSON_MAPPER)
Expand Down Expand Up @@ -329,6 +332,37 @@ public void testJsonQueryExpression()
Assert.assertEquals(ExpressionType.NESTED_DATA, eval.type());
}

@Test
public void testJsonQueryArrayExpression()
{
final ExpressionType nestedArray = ExpressionTypeFactory.getInstance().ofArray(ExpressionType.NESTED_DATA);

Expr expr = Parser.parse("json_query_array(nest, '$.x')", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertArrayEquals(new Object[]{100L}, (Object[]) eval.value());
Assert.assertEquals(nestedArray, eval.type());

expr = Parser.parse("json_query_array(nester, '$.x')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertArrayEquals(((List) NESTER.get("x")).toArray(), (Object[]) eval.value());
Assert.assertEquals(nestedArray, eval.type());

expr = Parser.parse("json_query_array(nester, array_offset(json_paths(nester), 0))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertArrayEquals(((List) NESTER.get("x")).toArray(), (Object[]) eval.value());
Assert.assertEquals(nestedArray, eval.type());

expr = Parser.parse("json_query_array(nesterer, '$.y')", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertArrayEquals(((List) NESTERER.get("y")).toArray(), (Object[]) eval.value());
Assert.assertEquals(nestedArray, eval.type());

expr = Parser.parse("array_length(json_query_array(nesterer, '$.y'))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(3L, eval.value());
Assert.assertEquals(ExpressionType.LONG, eval.type());
}

@Test
public void testParseJsonTryParseJson() throws JsonProcessingException
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.expression.NestedDataExpressions;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.sql.calcite.expression.DirectOperatorConversion;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.expression.OperatorConversions;
Expand All @@ -78,6 +80,16 @@ public class NestedDataOperatorConversions
true
);

public static final SqlReturnTypeInference NESTED_ARRAY_RETURN_TYPE_INFERENCE = opBinding ->
opBinding.getTypeFactory().createArrayType(
RowSignatures.makeComplexType(
opBinding.getTypeFactory(),
ColumnType.NESTED_DATA,
true
),
-1
);

public static class JsonPathsOperatorConversion implements SqlOperatorConversion
{
private static final SqlFunction SQL_FUNCTION = OperatorConversions
Expand Down Expand Up @@ -231,6 +243,26 @@ public DruidExpression toDruidExpression(
}
}

public static class JsonQueryArrayOperatorConversion extends DirectOperatorConversion
{
private static final SqlFunction SQL_FUNCTION = OperatorConversions
.operatorBuilder(StringUtils.toUpperCase(NestedDataExpressions.JsonQueryArrayExprMacro.NAME))
.operandTypeChecker(
OperandTypes.family(
SqlTypeFamily.ANY,
SqlTypeFamily.CHARACTER
)
)
.returnTypeInference(NESTED_ARRAY_RETURN_TYPE_INFERENCE)
.functionCategory(SqlFunctionCategory.SYSTEM)
.build();

public JsonQueryArrayOperatorConversion()
{
super(SQL_FUNCTION, NestedDataExpressions.JsonQueryArrayExprMacro.NAME);
}
}

/**
* The {@link org.apache.calcite.sql2rel.StandardConvertletTable} converts json_value(.. RETURNING type) into
* cast(json_value_any(..), type).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ public class DruidOperatorTable implements SqlOperatorTable
.add(new NestedDataOperatorConversions.JsonKeysOperatorConversion())
.add(new NestedDataOperatorConversions.JsonPathsOperatorConversion())
.add(new NestedDataOperatorConversions.JsonQueryOperatorConversion())
.add(new NestedDataOperatorConversions.JsonQueryArrayOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueAnyOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueBigintOperatorConversion())
.add(new NestedDataOperatorConversions.JsonValueDoubleOperatorConversion())
Expand Down
Loading