From ef3726c713b3408d26c03722156242eef7332964 Mon Sep 17 00:00:00 2001 From: amory Date: Tue, 11 Feb 2025 21:09:28 +0800 Subject: [PATCH] [improve](functon) improve json_object with complex type (#47627) This PR enhances the value parameter of the json_object function as a complex type which refer to postgresql https://www.postgresql.org/docs/current/functions-json.html ``` json_object ( [ { key_expression { VALUE | ':' } value_expression [ FORMAT JSON [ ENCODING UTF8 ] ] }[, ...] ] [ { NULL | ABSENT } ON NULL ] [ { WITH | WITHOUT } UNIQUE [ KEYS ] ] [ RETURNING data_type [ FORMAT JSON [ ENCODING UTF8 ] ] ]) Constructs a JSON object of all the key/value pairs given, or an empty object if none are given. key_expression is a scalar expression defining the JSON key, which is converted to the text type. It cannot be NULL nor can it belong to a type that has a cast to the json type. If WITH UNIQUE KEYS is specified, there must not be any duplicate key_expression. Any pair for which the value_expression evaluates to NULL is omitted from the output if ABSENT ON NULL is specified; if NULL ON NULL is specified or the clause omitted, the key is included with value NULL. ``` --- be/src/vec/functions/function_json.cpp | 17 ++++- .../org/apache/doris/analysis/CastExpr.java | 4 ++ .../doris/analysis/FunctionCallExpr.java | 3 + .../functions/scalar/JsonObject.java | 36 ++++++---- .../cast/test_complextype_to_json.out | 28 ++++---- .../json_function/test_query_json_object.out | 67 +++++++++++++++++++ .../test_query_json_object.groovy | 56 ++++++++++++++++ 7 files changed, 181 insertions(+), 30 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 0d863783bb9e4c..923e269904fde9 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -567,7 +567,7 @@ struct JsonParser<'1'> { StringRef data, rapidjson::Document::AllocatorType& allocator) { DCHECK(data.size == 1 || strncmp(data.data, "true", 4) == 0 || strncmp(data.data, "false", 5) == 0); - value.SetBool((*data.data == '1' || *data.data == 't') ? true : false); + value.SetBool(*data.data == '1' || *data.data == 't'); } }; @@ -608,6 +608,18 @@ struct JsonParser<'5'> { } }; +template <> +struct JsonParser<'7'> { + // json string + static void update_value(StringParser::ParseResult& result, rapidjson::Value& value, + StringRef data, rapidjson::Document::AllocatorType& allocator) { + rapidjson::Document document; + JsonbValue* json_val = JsonbDocument::createValue(data.data, data.size); + convert_jsonb_to_rapidjson(*json_val, document, allocator); + value.CopyFrom(document, allocator); + } +}; + template struct ExecuteReducer { template @@ -669,7 +681,8 @@ struct FunctionJsonObjectImpl { } for (int i = 0; i + 1 < data_columns.size() - 1; i += 2) { - constexpr_int_match<'0', '6', Reducer>::run(type_flags[i + 1], objects, allocator, + // last is for old type definition + constexpr_int_match<'0', '7', Reducer>::run(type_flags[i + 1], objects, allocator, data_columns[i], data_columns[i + 1], nullmaps[i + 1]); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index de257991ca6ba4..75bc129b523d0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -335,6 +335,10 @@ public void analyze() throws AnalysisException { if ((type.isMapType() || type.isStructType()) && childType.isStringType()) { return; } + // same with Type.canCastTo() can be cast to jsonb + if (childType.isComplexType() && type.isJsonbType()) { + return; + } if (childType.isNull() && Type.canCastTo(childType, type)) { return; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 92eb1f5172a7e4..40249ae5486aab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -490,7 +490,10 @@ public static int computeJsonDataType(Type type) { return 3; } else if (type.isTime()) { return 4; + } else if (type.isComplexType() || type.isJsonbType()) { + return 7; } else { + // default is string for BE execution return 6; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonObject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonObject.java index 6d3d1536eb821a..fabe8acc65fc21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonObject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonObject.java @@ -21,24 +21,23 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; -import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.CustomSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.util.ExpressionUtils; -import com.google.common.collect.ImmutableList; - +import java.util.ArrayList; import java.util.List; /** * ScalarFunction 'json_object'. This class is generated by GenerateFunction. + * Builds a JSON object out of a variadic argument list. + * By convention, the argument list consists of alternating keys and values. + * Key arguments are coerced to text; value arguments are converted as per to_json or to_jsonb. */ -public class JsonObject extends ScalarFunction - implements ExplicitlyCastableSignature, AlwaysNotNullable { - - public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(VarcharType.SYSTEM_DEFAULT) - ); +public class JsonObject extends ScalarFunction implements CustomSignature, AlwaysNotNullable { /** * constructor with 0 or more arguments. @@ -47,6 +46,20 @@ public JsonObject(Expression... varArgs) { super("json_object", ExpressionUtils.mergeArguments(varArgs)); } + @Override + public FunctionSignature customSignature() { + List arguments = new ArrayList<>(); + for (int i = 0; i < arity(); i++) { + if ((i & 1) == 1 && (getArgumentType(i).isComplexType() || getArgumentType(i).isJsonType())) { + // keep origin type for BE Serialization + arguments.add(JsonType.INSTANCE); + } else { + arguments.add(VarcharType.SYSTEM_DEFAULT); + } + } + return FunctionSignature.of(VarcharType.SYSTEM_DEFAULT, arguments); + } + @Override public void checkLegalityBeforeTypeCoercion() { if ((arity() & 1) == 1) { @@ -67,11 +80,6 @@ public JsonObject withChildren(List children) { return new JsonObject(children.toArray(new Expression[0])); } - @Override - public List getSignatures() { - return SIGNATURES; - } - @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitJsonObject(this, context); diff --git a/regression-test/data/query_p0/cast/test_complextype_to_json.out b/regression-test/data/query_p0/cast/test_complextype_to_json.out index 7412966f76729a..c209c387923a8b 100644 --- a/regression-test/data/query_p0/cast/test_complextype_to_json.out +++ b/regression-test/data/query_p0/cast/test_complextype_to_json.out @@ -39,13 +39,13 @@ 7 [null,"LC","LC","LC","LC"] ["V7_3",null,"V7_3",null,"V7_3"] -- !sql_arr_agg_cast_json_object -- -{"id":1,"label":"[\\"LC\\",\\"LB\\",\\"alex\\"]","field":"[\\"V1_3\\",\\"V1_2\\",null]"} -{"id":2,"label":"[\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V2_3\\",\\"V2_2\\",\\"V2_1\\"]"} -{"id":3,"label":"[\\"LC\\",null,\\"LA\\"]","field":"[\\"V3_3\\",null,\\"V3_1\\"]"} -{"id":4,"label":"[\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V4_3\\",\\"V4_2\\",\\"V4_1\\"]"} -{"id":5,"label":"[null,\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V5_3\\",\\"V5_3\\",\\"V5_2\\",\\"V5_1\\"]"} -{"id":6,"label":"[null,\\"LC\\",\\"LC\\",\\"LC\\",\\"LC\\"]","field":"[\\"V6_3\\",null,\\"V6_3\\",null,\\"V6_3\\"]"} -{"id":7,"label":"[null,\\"LC\\",\\"LC\\",\\"LC\\",\\"LC\\"]","field":"[\\"V7_3\\",null,\\"V7_3\\",null,\\"V7_3\\"]"} +{"id":1,"label":["LC","LB","alex"],"field":["V1_3","V1_2",null]} +{"id":2,"label":["LC","LB","LA"],"field":["V2_3","V2_2","V2_1"]} +{"id":3,"label":["LC",null,"LA"],"field":["V3_3",null,"V3_1"]} +{"id":4,"label":["LC","LB","LA"],"field":["V4_3","V4_2","V4_1"]} +{"id":5,"label":[null,"LC","LB","LA"],"field":["V5_3","V5_3","V5_2","V5_1"]} +{"id":6,"label":[null,"LC","LC","LC","LC"],"field":["V6_3",null,"V6_3",null,"V6_3"]} +{"id":7,"label":[null,"LC","LC","LC","LC"],"field":["V7_3",null,"V7_3",null,"V7_3"]} -- !sql_map_agg_cast -- 1 {"LC":"V1_3","LB":"V1_2","alex":null} @@ -57,11 +57,11 @@ 7 {"LC":null} -- !sql_map_agg_cast_json_object -- -{"id":1,"map_label":"{\\"LC\\":\\"V1_3\\",\\"LB\\":\\"V1_2\\",\\"alex\\":null}"} -{"id":2,"map_label":"{\\"LC\\":\\"V2_3\\",\\"LB\\":\\"V2_2\\",\\"LA\\":\\"V2_1\\"}"} -{"id":3,"map_label":"{\\"LC\\":\\"V3_3\\",\\"LA\\":\\"V3_1\\"}"} -{"id":4,"map_label":"{\\"LC\\":\\"V4_3\\",\\"LB\\":\\"V4_2\\",\\"LA\\":\\"V4_1\\"}"} -{"id":5,"map_label":"{\\"LC\\":\\"V5_3\\",\\"LB\\":\\"V5_2\\",\\"LA\\":\\"V5_1\\"}"} -{"id":6,"map_label":"{\\"LC\\":null}"} -{"id":7,"map_label":"{\\"LC\\":null}"} +{"id":1,"map_label":{"LC":"V1_3","LB":"V1_2","alex":null}} +{"id":2,"map_label":{"LC":"V2_3","LB":"V2_2","LA":"V2_1"}} +{"id":3,"map_label":{"LC":"V3_3","LA":"V3_1"}} +{"id":4,"map_label":{"LC":"V4_3","LB":"V4_2","LA":"V4_1"}} +{"id":5,"map_label":{"LC":"V5_3","LB":"V5_2","LA":"V5_1"}} +{"id":6,"map_label":{"LC":null}} +{"id":7,"map_label":{"LC":null}} diff --git a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_object.out b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_object.out index 13d30f6e75eaea..f4f51b3d69bf7a 100644 --- a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_object.out +++ b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_object.out @@ -10,3 +10,70 @@ {"k0":"k00"} {"k1":"k11"} +-- !sql_array -- +{"id":1,"level":["\\"aaa\\"","\\"bbb\\""]} + +-- !sql_array -- +{"id":1,"level":["aaa","bbb"]} + +-- !sql_array -- +{"id":1,"level":[1,2]} + +-- !sql_array -- +{"id":1,"level":[1.1,2.2]} + +-- !sql_array -- +{"id":1,"level":[1.1,2.0]} + +-- !sql_array -- +{"id":1,"level":[1.0,1.2]} + +-- !sql_map -- +{"id":1,"level":{"a":"b","c":"d"}} + +-- !sql_map -- +{"id":1,"level":{"a":1,"c":2}} + +-- !sql_map -- +{"id":1,"level":{"a":1.1,"c":2.2}} + +-- !sql_map -- +{"id":1,"level":{"a":1.1,"c":2.0}} + +-- !sql_map -- +{"id":1,"level":{"a":1.0,"c":1.2}} + +-- !sql_struct -- +{"id":1,"level":{"name":"a","age":1}} + +-- !sql_struct -- +{"id":1,"level":{"name":"a","age":1.1}} + +-- !sql_struct -- +{"id":1,"level":{"name":"a","age":1}} + +-- !sql_struct -- +{"id":1,"level":{"name":"a","age":1.1}} + +-- !sql_json -- +{"id":1,"level":{"a":"b"}} + +-- !sql_json -- +{"id":1,"level":{"a":1}} + +-- !sql_json -- +{"id":1,"level":{"a":1.1}} + +-- !sql_json -- +{"id":1,"level":{"a":1.1}} + +-- !sql_json -- +{"id":1,"level":{"a":1.1}} + +-- !sql2 -- +{"k0":1,"k1":null,"k2":null,"k3":null,"k4":null} +{"k0":2,"k1":["a","b"],"k2":{"a":"b"},"k3":{"name":"a","age":1},"k4":{"a":"b"}} +{"k0":3,"k1":["\\"a\\"","\\"b\\""],"k2":{"\\"a\\"":"\\"b\\"","\\"c\\"":"\\"d\\""},"k3":{"name":"\\"a\\"","age":1},"k4":{"c":"d"}} +{"k0":4,"k1":["1","2"],"k2":{"1":"2"},"k3":{"name":"2","age":1},"k4":{"a":"b"}} +{"k0":5,"k1":["1","2","3","3"],"k2":{"1":"2","3":"4"},"k3":{"name":"a","age":1},"k4":{"a":"b"}} + diff --git a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_object.groovy b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_object.groovy index 2ee0c64276c6ad..60c08780c7c2a7 100644 --- a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_object.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_object.groovy @@ -48,4 +48,60 @@ suite("test_query_json_object", "query") { qt_sql2 """select json_object ( CONCAT('k',t.number%30926%3000 + 0),CONCAT('k',t.number%30926%3000 + 0,t.number%1000000) ) from numbers("number" = "2") t order by 1;""" sql "DROP TABLE ${tableName};" + + // test json_object with complex type + // literal cases + // array + qt_sql_array """ SELECT json_object('id', 1, 'level', array('"aaa"','"bbb"')); """ + qt_sql_array """ SELECT json_object('id', 1, 'level', array('aaa','bbb')); """ + qt_sql_array """ SELECT json_object('id', 1, 'level', array(1,2)); """ + qt_sql_array """ SELECT json_object('id', 1, 'level', array(1.1,2.2)); """ + qt_sql_array """ SELECT json_object('id', 1, 'level', array(1.1,2)); """ + qt_sql_array """ SELECT json_object('id', 1, 'level', array(cast(1 as decimal), cast(1.2 as decimal))); """ + // map + qt_sql_map """ SELECT json_object('id', 1, 'level', map('a', 'b', 'c', 'd')); """ + qt_sql_map """ SELECT json_object('id', 1, 'level', map('a', 1, 'c', 2)); """ + qt_sql_map """ SELECT json_object('id', 1, 'level', map('a', 1.1, 'c', 2.2)); """ + qt_sql_map """ SELECT json_object('id', 1, 'level', map('a', 1.1, 'c', 2)); """ + qt_sql_map """ SELECT json_object('id', 1, 'level', map('a', cast(1 as decimal), 'c', cast(1.2 as decimal))); """ + // struct + qt_sql_struct """ SELECT json_object('id', 1, 'level', named_struct('name', 'a', 'age', 1)); """ + qt_sql_struct """ SELECT json_object('id', 1, 'level', named_struct('name', 'a', 'age', 1.1)); """ + qt_sql_struct """ SELECT json_object('id', 1, 'level', named_struct('name', 'a', 'age', 1)); """ + qt_sql_struct """ SELECT json_object('id', 1, 'level', named_struct('name', 'a', 'age', 1.1)); """ + // json + qt_sql_json """ SELECT json_object('id', 1, 'level', cast('{\"a\":\"b\"}' as JSON)); """ + qt_sql_json """ SELECT json_object('id', 1, 'level', cast('{\"a\":1}' as JSON)); """ + qt_sql_json """ SELECT json_object('id', 1, 'level', cast('{\"a\":1.1}' as JSON)); """ + qt_sql_json """ SELECT json_object('id', 1, 'level', cast('{\"a\":1.1}' as JSON)); """ + qt_sql_json """ SELECT json_object('id', 1, 'level', cast('{\"a\":1.1}' as JSON)); """ + + + + tableName = "test_query_json_object_complex" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE test_query_json_object_complex ( + `k0` int(11) not null, + `k1` array NULL, + `k2` map NULL, + `k3` struct NULL, + `k4` json NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k0`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + sql "insert into ${tableName} values(1,null,null,null,null);" + sql "insert into ${tableName} values(2, array('a','b'), map('a','b'), named_struct('name','a','age',1), '{\"a\":\"b\"}');" + sql """insert into ${tableName} values(3, array('"a"', '"b"'), map('"a"', '"b"', '"c"', '"d"'), named_struct('name','"a"','age', 1), '{\"c\":\"d\"}');""" + sql """insert into ${tableName} values(4, array(1,2), map(1,2), named_struct('name', 2, 'age',1), '{\"a\":\"b\"}');""" + sql """insert into ${tableName} values(5, array(1,2,3,3), map(1,2,3,4), named_struct('name',\"a\",'age',1), '{\"a\":\"b\"}');""" + qt_sql2 "select json_object('k0',k0,'k1',k1,'k2',k2,'k3',k3,'k4',k4) from ${tableName} order by k0;" + }