From 842e8e19dec2eaef2cc7d011ebc26af853598df2 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 19 Jun 2024 17:20:37 +0800 Subject: [PATCH 1/4] support arr/map-agg result cast to json --- .../java/org/apache/doris/catalog/Type.java | 4 + .../org/apache/doris/analysis/CastExpr.java | 23 ++++ .../doris/nereids/trees/expressions/Cast.java | 2 +- .../cast/test_complextype_to_json.out | 67 ++++++++++ .../cast/test_complextype_to_json.groovy | 119 ++++++++++++++++++ 5 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/query_p0/cast/test_complextype_to_json.out create mode 100644 regression-test/suites/query_p0/cast/test_complextype_to_json.groovy diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index ef1ead2a153a84..0b366cf2ce6f04 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -834,6 +834,10 @@ public static boolean isImplicitlyCastable(Type t1, Type t2, boolean strict, boo } public static boolean canCastTo(Type sourceType, Type targetType) { + // In BE Code , we make any data type can cast to jsonb + if (targetType.isJsonbType()) { + return true; + } if (sourceType.isVariantType() && (targetType.isScalarType() || targetType.isArrayType())) { // variant could cast to scalar types and array return true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index ac871c726084fc..b2cece4f672cae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -20,13 +20,17 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.Function.NullableMode; import org.apache.doris.catalog.FunctionSet; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.TypeUtils; import org.apache.doris.common.AnalysisException; @@ -115,6 +119,19 @@ public CastExpr(Type targetType, Expr e) { analysisDone(); } + public boolean checkMapKeyIsStringLikeForJson(Type complexType) { + if (complexType.isMapType()) { + return ((MapType) complexType).getKeyType().isStringType(); + } else if (complexType.isArrayType()) { + return checkMapKeyIsStringLikeForJson(((ArrayType) complexType).getItemType()); + } else if (complexType.isStructType()) { + for (StructField f : ((StructType) complexType).getFields()) { + return checkMapKeyIsStringLikeForJson(f.getType()); + } + } + return true; + } + /** * Just use for nereids, put analyze() in finalizeImplForNereids */ @@ -153,6 +170,12 @@ public CastExpr(Type targetType, Expr e, Void v) { Type from = getActualArgTypes(collectChildReturnTypes())[0]; Type to = getActualType(type); NullableMode nullableMode = TYPE_NULLABLE_MODE.get(Pair.of(from, to)); + // for complex type cast to jsonb we make ret is always nullable + if (from.isComplexType() && type.isJsonbType()) { + Preconditions.checkState(checkMapKeyIsStringLikeForJson(from), + "check type " + from + " cast to json failed"); + nullableMode = Function.NullableMode.ALWAYS_NULLABLE; + } Preconditions.checkState(nullableMode != null, "cannot find nullable node for cast from " + from + " to " + to); fn = new Function(new FunctionName(getFnName(type)), Lists.newArrayList(e.type), type, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java index 62bd3639b5a244..124ed589d494e7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Cast.java @@ -79,7 +79,7 @@ public boolean nullable() { return true; } else if (!childDataType.isTimeLikeType() && targetType.isTimeLikeType()) { return true; - } else if (childDataType.isJsonType()) { + } else if (childDataType.isJsonType() || targetType.isJsonType()) { return true; } else { return child().nullable(); diff --git a/regression-test/data/query_p0/cast/test_complextype_to_json.out b/regression-test/data/query_p0/cast/test_complextype_to_json.out new file mode 100644 index 00000000000000..7412966f76729a --- /dev/null +++ b/regression-test/data/query_p0/cast/test_complextype_to_json.out @@ -0,0 +1,67 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +[[],[]] + +-- !select -- +{"k1":"v31","k2":"300"} + +-- !select -- +[] + +-- !select -- +[123,456] + +-- !select -- +["abc","def"] + +-- !select -- +[null,"1","0","100","6.18","abc"] + +-- !select -- +[{"k1":"v41","k2":"400"},{"k1":"v41","k2":"400"}] + +-- !select -- +["[['k1', 'k2'], ['v41', '400']]","1","a","3.14"] + +-- !select -- +{"k1":"v31","k2":"300","a1":"['[['k1', 'k2'], ['v41', '400']]', '1', 'a', '3.14']"} + +-- !select -- +{"col1":"a","col2":1,"col3":"doris","col4":"aaaaa","col5":1.32} + +-- !sql_arr_agg_cast -- +1 ["LC","LB","alex"] ["V1_3","V1_2",null] +2 ["LC","LB","LA"] ["V2_3","V2_2","V2_1"] +3 ["LC",null,"LA"] ["V3_3",null,"V3_1"] +4 ["LC","LB","LA"] ["V4_3","V4_2","V4_1"] +5 [null,"LC","LB","LA"] ["V5_3","V5_3","V5_2","V5_1"] +6 [null,"LC","LC","LC","LC"] ["V6_3",null,"V6_3",null,"V6_3"] +7 [null,"LC","LC","LC","LC"] ["V7_3",null,"V7_3",null,"V7_3"] + +-- !sql_arr_agg_cast_json_object -- +{"id":1,"label":"[\\"LC\\",\\"LB\\",\\"alex\\"]","field":"[\\"V1_3\\",\\"V1_2\\",null]"} +{"id":2,"label":"[\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V2_3\\",\\"V2_2\\",\\"V2_1\\"]"} +{"id":3,"label":"[\\"LC\\",null,\\"LA\\"]","field":"[\\"V3_3\\",null,\\"V3_1\\"]"} +{"id":4,"label":"[\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V4_3\\",\\"V4_2\\",\\"V4_1\\"]"} +{"id":5,"label":"[null,\\"LC\\",\\"LB\\",\\"LA\\"]","field":"[\\"V5_3\\",\\"V5_3\\",\\"V5_2\\",\\"V5_1\\"]"} +{"id":6,"label":"[null,\\"LC\\",\\"LC\\",\\"LC\\",\\"LC\\"]","field":"[\\"V6_3\\",null,\\"V6_3\\",null,\\"V6_3\\"]"} +{"id":7,"label":"[null,\\"LC\\",\\"LC\\",\\"LC\\",\\"LC\\"]","field":"[\\"V7_3\\",null,\\"V7_3\\",null,\\"V7_3\\"]"} + +-- !sql_map_agg_cast -- +1 {"LC":"V1_3","LB":"V1_2","alex":null} +2 {"LC":"V2_3","LB":"V2_2","LA":"V2_1"} +3 {"LC":"V3_3","LA":"V3_1"} +4 {"LC":"V4_3","LB":"V4_2","LA":"V4_1"} +5 {"LC":"V5_3","LB":"V5_2","LA":"V5_1"} +6 {"LC":null} +7 {"LC":null} + +-- !sql_map_agg_cast_json_object -- +{"id":1,"map_label":"{\\"LC\\":\\"V1_3\\",\\"LB\\":\\"V1_2\\",\\"alex\\":null}"} +{"id":2,"map_label":"{\\"LC\\":\\"V2_3\\",\\"LB\\":\\"V2_2\\",\\"LA\\":\\"V2_1\\"}"} +{"id":3,"map_label":"{\\"LC\\":\\"V3_3\\",\\"LA\\":\\"V3_1\\"}"} +{"id":4,"map_label":"{\\"LC\\":\\"V4_3\\",\\"LB\\":\\"V4_2\\",\\"LA\\":\\"V4_1\\"}"} +{"id":5,"map_label":"{\\"LC\\":\\"V5_3\\",\\"LB\\":\\"V5_2\\",\\"LA\\":\\"V5_1\\"}"} +{"id":6,"map_label":"{\\"LC\\":null}"} +{"id":7,"map_label":"{\\"LC\\":null}"} + diff --git a/regression-test/suites/query_p0/cast/test_complextype_to_json.groovy b/regression-test/suites/query_p0/cast/test_complextype_to_json.groovy new file mode 100644 index 00000000000000..18d83d11124bc4 --- /dev/null +++ b/regression-test/suites/query_p0/cast/test_complextype_to_json.groovy @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite('test_complextype_to_json', "query_p0") { + // do support in nereids + sql """ set experimental_enable_nereids_planner=true""" + sql """ set enable_fallback_to_original_planner=false; """ + + // literal cast + qt_select """SELECT CAST({} AS JSON)""" + qt_select """SELECT CAST({"k1":"v31", "k2": 300} AS JSON)""" + qt_select """SELECT CAST([] AS JSON)""" + qt_select """SELECT CAST([123, 456] AS JSON)""" + qt_select """SELECT CAST(["abc", "def"] AS JSON)""" + qt_select """SELECT CAST([null, true, false, 100, 6.18, "abc"] AS JSON)""" + qt_select """SELECT CAST([{"k1":"v41", "k2": 400}, {"k1":"v41", "k2": 400}] AS JSON)""" + qt_select """SELECT CAST([{"k1":"v41", "k2": 400}, 1, "a", 3.14] AS JSON)""" + qt_select """SELECT CAST({"k1":"v31", "k2": 300, "a1": [{"k1":"v41", "k2": 400}, 1, "a", 3.14]} AS JSON)""" + qt_select """SELECT CAST(struct('a', 1, 'doris', 'aaaaa', 1.32) AS JSON)""" + // invalid map key cast + test { + sql """SELECT CAST(map(1, 'a', 2, 'b') AS JSON)""" + exception "errCode = 2," + } + test { + sql """SELECT CAST([{1:"v41", 2: 400}] AS JSON)""" + exception "errCode = 2," + } + + + sql """ DROP TABLE IF EXISTS test_agg_to_json; """ + sql """ + CREATE TABLE `test_agg_to_json` ( + `id` int(11) NOT NULL, + `label_name` varchar(32) default null, + `value_field` string default null + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ + insert into `test_agg_to_json` values + (1, "alex",NULL), + (1, "LB", "V1_2"), + (1, "LC", "V1_3"), + (2, "LA", "V2_1"), + (2, "LB", "V2_2"), + (2, "LC", "V2_3"), + (3, "LA", "V3_1"), + (3, NULL, NULL), + (3, "LC", "V3_3"), + (4, "LA", "V4_1"), + (4, "LB", "V4_2"), + (4, "LC", "V4_3"), + (5, "LA", "V5_1"), + (5, "LB", "V5_2"), + (5, "LC", "V5_3"), + (5, NULL, "V5_3"), + (6, "LC", "V6_3"), + (6, "LC", NULL), + (6, "LC", "V6_3"), + (6, "LC", NULL), + (6, NULL, "V6_3"), + (7, "LC", "V7_3"), + (7, "LC", NULL), + (7, "LC", "V7_3"), + (7, "LC", NULL), + (7, NULL, "V7_3"); + """ + + // array_agg result cast to json then combination to json_object + qt_sql_arr_agg_cast """ select t.id, cast(t.label_name as json), cast(t.value_field as json) from (select id, array_agg(label_name) as label_name, array_agg(value_field) as value_field from test_agg_to_json group by id) t order by t.id; """ + qt_sql_arr_agg_cast_json_object """ select json_object("id", t.id, "label", cast(t.label_name as json), "field", cast(t.value_field as json)) from (select id, array_agg(label_name) as label_name, array_agg(value_field) as value_field from test_agg_to_json group by id) t order by t.id; """ + + // map_agg result cast to json then combination to json_object + qt_sql_map_agg_cast """ + WITH `labels` as ( + SELECT `id`, map_agg(`label_name`, `value_field`) m FROM test_agg_to_json GROUP BY `id` + ) + SELECT + id, + cast(m as json) + FROM `labels` + ORDER BY `id`; + """ + qt_sql_map_agg_cast_json_object """ + WITH `labels` as ( + SELECT `id`, map_agg(`label_name`, `value_field`) m FROM test_agg_to_json GROUP BY `id` + ) + SELECT + json_object("id", id, "map_label", cast(m as json)) + FROM `labels` + ORDER BY `id`; + """ + +} \ No newline at end of file From 5e4b75d8fc60f19b9cabffa2794dc88f4078510c Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 19 Jun 2024 17:27:57 +0800 Subject: [PATCH 2/4] add limit --- fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 0b366cf2ce6f04..3f6bb3fb647b51 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -835,7 +835,7 @@ public static boolean isImplicitlyCastable(Type t1, Type t2, boolean strict, boo public static boolean canCastTo(Type sourceType, Type targetType) { // In BE Code , we make any data type can cast to jsonb - if (targetType.isJsonbType()) { + if (targetType.isJsonbType() && sourceType.isComplexType()) { return true; } if (sourceType.isVariantType() && (targetType.isScalarType() || targetType.isArrayType())) { From 4d16e74be016868feeba4042391a20a71fa0b473 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 19 Jun 2024 18:01:24 +0800 Subject: [PATCH 3/4] fix comment --- .../main/java/org/apache/doris/catalog/Type.java | 1 - .../java/org/apache/doris/analysis/CastExpr.java | 15 --------------- .../rules/expression/check/CheckCast.java | 16 ++++++++++++++++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 3f6bb3fb647b51..3d3653fe4902b1 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -834,7 +834,6 @@ public static boolean isImplicitlyCastable(Type t1, Type t2, boolean strict, boo } public static boolean canCastTo(Type sourceType, Type targetType) { - // In BE Code , we make any data type can cast to jsonb if (targetType.isJsonbType() && sourceType.isComplexType()) { return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index cca7e6fe68b3cf..70f3243f084cef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -119,19 +119,6 @@ public CastExpr(Type targetType, Expr e) { analysisDone(); } - public boolean checkMapKeyIsStringLikeForJson(Type complexType) { - if (complexType.isMapType()) { - return ((MapType) complexType).getKeyType().isStringType(); - } else if (complexType.isArrayType()) { - return checkMapKeyIsStringLikeForJson(((ArrayType) complexType).getItemType()); - } else if (complexType.isStructType()) { - for (StructField f : ((StructType) complexType).getFields()) { - return checkMapKeyIsStringLikeForJson(f.getType()); - } - } - return true; - } - /** * Just use for nereids, put analyze() in finalizeImplForNereids */ @@ -172,8 +159,6 @@ public CastExpr(Type targetType, Expr e, Void v) { NullableMode nullableMode = TYPE_NULLABLE_MODE.get(Pair.of(from, to)); // for complex type cast to jsonb we make ret is always nullable if (from.isComplexType() && type.isJsonbType()) { - Preconditions.checkState(checkMapKeyIsStringLikeForJson(from), - "check type " + from + " cast to json failed"); nullableMode = Function.NullableMode.ALWAYS_NULLABLE; } Preconditions.checkState(nullableMode != null, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java index 69a9105d653d81..45acc7d25998b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java @@ -57,6 +57,19 @@ private static Expression check(Cast cast) { return cast; } + public static boolean checkMapKeyIsStringLikeForJson(DataType complexType) { + if (complexType.isMapType()) { + return ((MapType) complexType).getKeyType().isStringType(); + } else if (complexType.isArrayType()) { + return checkMapKeyIsStringLikeForJson(((ArrayType) complexType).getItemType()); + } else if (complexType.isStructType()) { + for (StructField f : ((StructType) complexType).getFields()) { + return checkMapKeyIsStringLikeForJson(f.getDataType()); + } + } + return true; + } + private static boolean check(DataType originalType, DataType targetType) { if (originalType.isVariantType() && (targetType instanceof PrimitiveType || targetType.isArrayType())) { // variant could cast to primitive types and array @@ -92,6 +105,9 @@ private static boolean check(DataType originalType, DataType targetType) { } return true; } else if (originalType instanceof JsonType || targetType instanceof JsonType) { + if (originalType.isComplexType() && !checkMapKeyIsStringLikeForJson(originalType)) { + return false; + } return true; } else { return checkPrimitiveType(originalType, targetType); From 65d09a3e2ab7b6e2a92a5fd51e27140570e9fc4f Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 19 Jun 2024 21:46:37 +0800 Subject: [PATCH 4/4] fix code --- .../org/apache/doris/analysis/CastExpr.java | 4 --- .../rules/expression/check/CheckCast.java | 32 +++++++++++-------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java index 70f3243f084cef..9f6a319224d98d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java @@ -20,17 +20,13 @@ package org.apache.doris.analysis; -import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.Function.NullableMode; import org.apache.doris.catalog.FunctionSet; -import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.ScalarType; -import org.apache.doris.catalog.StructField; -import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.TypeUtils; import org.apache.doris.common.AnalysisException; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java index 45acc7d25998b6..e76d7ef344d8ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java @@ -57,19 +57,6 @@ private static Expression check(Cast cast) { return cast; } - public static boolean checkMapKeyIsStringLikeForJson(DataType complexType) { - if (complexType.isMapType()) { - return ((MapType) complexType).getKeyType().isStringType(); - } else if (complexType.isArrayType()) { - return checkMapKeyIsStringLikeForJson(((ArrayType) complexType).getItemType()); - } else if (complexType.isStructType()) { - for (StructField f : ((StructType) complexType).getFields()) { - return checkMapKeyIsStringLikeForJson(f.getDataType()); - } - } - return true; - } - private static boolean check(DataType originalType, DataType targetType) { if (originalType.isVariantType() && (targetType instanceof PrimitiveType || targetType.isArrayType())) { // variant could cast to primitive types and array @@ -143,4 +130,23 @@ private static boolean checkPrimitiveType(DataType originalType, DataType target } return true; } + + /** + * check if complexType type which contains map, make sure key is string like for json + * + * @param complexType need to check + * @return true if complexType can cast to json + */ + public static boolean checkMapKeyIsStringLikeForJson(DataType complexType) { + if (complexType.isMapType()) { + return ((MapType) complexType).getKeyType().isStringLikeType(); + } else if (complexType.isArrayType()) { + return checkMapKeyIsStringLikeForJson(((ArrayType) complexType).getItemType()); + } else if (complexType.isStructType()) { + for (StructField f : ((StructType) complexType).getFields()) { + return checkMapKeyIsStringLikeForJson(f.getDataType()); + } + } + return true; + } }