From ed7c0411ccf969cf74686632df873c29d2c4953f Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 4 Jul 2024 15:00:38 +0800 Subject: [PATCH 1/6] change explode-json-array-xx func signature from string to json type --- .../functions/generator/ExplodeJsonArrayDouble.java | 3 ++- .../functions/generator/ExplodeJsonArrayDoubleOuter.java | 3 ++- .../expressions/functions/generator/ExplodeJsonArrayInt.java | 4 ++-- .../functions/generator/ExplodeJsonArrayIntOuter.java | 3 ++- .../expressions/functions/generator/ExplodeJsonArrayJson.java | 3 ++- .../functions/generator/ExplodeJsonArrayJsonOuter.java | 3 ++- .../functions/generator/ExplodeJsonArrayString.java | 3 ++- .../functions/generator/ExplodeJsonArrayStringOuter.java | 3 ++- 8 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java index e6072931325f4d..30fbbf5dde6e8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -36,7 +37,7 @@ public class ExplodeJsonArrayDouble extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DoubleType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java index 1bb8c0383f4353..5ddc66aa327c30 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -36,7 +37,7 @@ public class ExplodeJsonArrayDoubleOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DoubleType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java index 17277e1c9672f5..f10440e48dc1f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java @@ -23,7 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.JsonType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -36,7 +36,7 @@ public class ExplodeJsonArrayInt extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java index 2a8820c717c06e..3592ad53260978 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -36,7 +37,7 @@ public class ExplodeJsonArrayIntOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java index a07e0e5d8fae22..0a0465ea7c44e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -35,7 +36,7 @@ */ public class ExplodeJsonArrayJson extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java index bb4a34905a4be0..83405dd801e5a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -35,7 +36,7 @@ */ public class ExplodeJsonArrayJsonOuter extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java index 653cb36ca213d0..b7acc8383c2119 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -35,7 +36,7 @@ public class ExplodeJsonArrayString extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java index dfb21ab826f09a..0c97f190e70c1d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java @@ -22,6 +22,7 @@ import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.JsonType; import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; @@ -35,7 +36,7 @@ public class ExplodeJsonArrayStringOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE) ); /** From b8e9aa18b742c1b63d978a0eff60a041b88c2054 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 4 Jul 2024 15:11:01 +0800 Subject: [PATCH 2/6] fix unuse imports --- .../expressions/functions/generator/ExplodeJsonArrayDouble.java | 1 - .../functions/generator/ExplodeJsonArrayDoubleOuter.java | 1 - .../functions/generator/ExplodeJsonArrayIntOuter.java | 1 - .../expressions/functions/generator/ExplodeJsonArrayJson.java | 1 - .../functions/generator/ExplodeJsonArrayJsonOuter.java | 1 - 5 files changed, 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java index 30fbbf5dde6e8d..9af6eb99fcd49a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java index 5ddc66aa327c30..14e202adad40d3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java index 3592ad53260978..d3a42b2a2ab0e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java @@ -24,7 +24,6 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java index 0a0465ea7c44e1..2f8d27d2e4aeee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java index 83405dd801e5a9..acfc3209963cb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; From d9fe3d5b156c2dd9ee9980bac0bb0be7d0c777ac Mon Sep 17 00:00:00 2001 From: amorynan Date: Tue, 6 Aug 2024 19:01:57 +0800 Subject: [PATCH 3/6] change be from rapid json to jsonb --- .../table_function/vexplode_json_array.cpp | 10 +- .../table_function/vexplode_json_array.h | 136 ++++++++++++++++++ be/src/vec/functions/function_fake.cpp | 3 +- 3 files changed, 143 insertions(+), 6 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 00c4d92a359e4f..75ab71dbf6068a 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -26,6 +26,8 @@ #include #include "common/status.h" +#include "util/jsonb_parser.h" +#include "util/jsonb_utils.h" #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/columns/columns_number.h" @@ -59,10 +61,10 @@ void VExplodeJsonArrayTableFunction::process_row(size_t row_idx) { StringRef text = _text_column->get_data_at(row_idx); if (text.data != nullptr) { - rapidjson::Document document; - document.Parse(text.data, text.size); - if (!document.HasParseError() && document.IsArray() && document.GetArray().Size()) { - _cur_size = _parsed_data.set_output(document, document.GetArray().Size()); + JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); + if (doc->getValue()->isArray() && doc->getValue()->size() > 0) { + auto* a = (ArrayVal*)doc->getValue(); + _cur_size = _parsed_data.set_output(*a, a->numElem()); } } } diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h index 968fa5e91a402d..4ef17cf2bbc07c 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.h +++ b/be/src/vec/exprs/table_function/vexplode_json_array.h @@ -32,6 +32,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/exprs/table_function/table_function.h" +#include "vec/functions/function_string.h" namespace doris::vectorized { @@ -44,6 +45,7 @@ struct ParsedData { _values_null_flag.clear(); } virtual int set_output(rapidjson::Document& document, int value_size) = 0; + virtual int set_output(ArrayVal& array_doc, int value_size) = 0; virtual void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset, int max_step) = 0; virtual void insert_many_same_value_from_parsed_data(MutableColumnPtr& column, @@ -90,6 +92,36 @@ struct ParsedDataInt : public ParsedData { } return value_size; } + int set_output(ArrayVal& array_doc, int value_size) override { + _values_null_flag.resize(value_size, 0); + _backup_data.resize(value_size); + int i = 0; + for (auto& val : array_doc) { + if (val.isInt8()) { + _backup_data[i] = static_cast(val).val(); + } else if (val.isInt16()) { + _backup_data[i] = static_cast(val).val(); + } else if (val.isInt32()) { + _backup_data[i] = static_cast(val).val(); + } else if (val.isInt64()) { + _backup_data[i] = static_cast(val).val(); + } else if (val.isDouble()) { + auto value = static_cast(val).val(); + if (value > MAX_VALUE) { + _backup_data[i] = MAX_VALUE; + } else if (value < MIN_VALUE) { + _backup_data[i] = MIN_VALUE; + } else { + _backup_data[i] = long(value); + } + } else { + _values_null_flag[i] = 1; + _backup_data[i] = 0; + } + ++i; + } + return value_size; + } void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset, int max_step) override { @@ -121,6 +153,22 @@ struct ParsedDataDouble : public ParsedData { return value_size; } + int set_output(ArrayVal& array_doc, int value_size) override { + _values_null_flag.resize(value_size, 0); + _backup_data.resize(value_size); + int i = 0; + for (auto& val : array_doc) { + if (val.isDouble()) { + _backup_data[i] = static_cast(val).val(); + } else { + _backup_data[i] = 0; + _values_null_flag[i] = 1; + } + ++i; + } + return value_size; + } + void insert_result_from_parsed_data(MutableColumnPtr& column, int64_t cur_offset, int max_step) override { assert_cast(column.get()) @@ -220,6 +268,69 @@ struct ParsedDataString : public ParsedDataStringBase { } return value_size; } + + int set_output(ArrayVal& array_doc, int value_size) override { + _data_string_ref.clear(); + _backup_data.clear(); + _values_null_flag.clear(); + int32_t wbytes = 0; + for (auto& val : array_doc) { + switch (val.type()) { + case JsonbType::T_String: { + _backup_data.emplace_back(static_cast(val).getBlob(), + static_cast(val).getBlobLen()); + _values_null_flag.emplace_back(false); + break; + // do not set _data_string here. + // Because the address of the string stored in `_backup_data` may + // change each time `emplace_back()` is called. + } + case JsonbType::T_Int64: { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, + static_cast(val).val()); + _backup_data.emplace_back(tmp_buf, wbytes); + _values_null_flag.emplace_back(false); + break; + } + case JsonbType::T_Double: { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%f", + static_cast(val).val()); + _backup_data.emplace_back(tmp_buf, wbytes); + _values_null_flag.emplace_back(false); + break; + } + case JsonbType::T_Int32: { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", + static_cast(val).val()); + _backup_data.emplace_back(tmp_buf, wbytes); + _values_null_flag.emplace_back(false); + break; + } + case JsonbType::T_True: + _backup_data.emplace_back(TRUE_VALUE); + _values_null_flag.emplace_back(false); + break; + case JsonbType::T_False: + _backup_data.emplace_back(FALSE_VALUE); + _values_null_flag.emplace_back(false); + break; + case JsonbType::T_Null: + _backup_data.emplace_back(); + _values_null_flag.emplace_back(true); + break; + default: + _backup_data.emplace_back(); + _values_null_flag.emplace_back(true); + break; + } + } + // Must set _data_string at the end, so that we can + // save the real addr of string in `_backup_data` to `_data_string`. + for (auto& str : _backup_data) { + _data_string_ref.emplace_back(str.data(), str.length()); + } + return value_size; + } }; struct ParsedDataJSON : public ParsedDataStringBase { @@ -246,6 +357,31 @@ struct ParsedDataJSON : public ParsedDataStringBase { } return value_size; } + + int set_output(ArrayVal& array_doc, int value_size) override { + _data_string_ref.clear(); + _backup_data.clear(); + _values_null_flag.clear(); + auto writer = std::make_unique(); + for (auto& v : array_doc) { + if (v.isObject()) { + writer->reset(); + writer->writeValue(&v); + _backup_data.emplace_back(writer->getOutput()->getBuffer(), + writer->getOutput()->getSize()); + _values_null_flag.emplace_back(false); + } else { + _backup_data.emplace_back(); + _values_null_flag.emplace_back(true); + } + } + // Must set _data_string at the end, so that we can + // save the real addr of string in `_backup_data` to `_data_string`. + for (auto& str : _backup_data) { + _data_string_ref.emplace_back(str); + } + return value_size; + } }; template diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 0353b3a2a7cfc5..62d5fe4e893bc0 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -132,8 +132,7 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer_default(factory, "explode_json_array_int"); register_table_function_expand_outer_default(factory, "explode_json_array_string"); - register_table_function_expand_outer_default(factory, - "explode_json_array_json"); + register_table_function_expand_outer_default(factory, "explode_json_array_json"); register_table_function_expand_outer_default(factory, "explode_json_array_double"); register_table_function_expand_outer_default(factory, "explode_bitmap"); From 7c005fb50fff8e434041131b14170de3e89f8845 Mon Sep 17 00:00:00 2001 From: amorynan Date: Tue, 6 Aug 2024 22:47:16 +0800 Subject: [PATCH 4/6] fix be --- .../exprs/table_function/vexplode_json_array.cpp | 6 ++++-- .../vec/exprs/table_function/vexplode_json_array.h | 14 ++++++++++++++ .../table_function/explode_json_array.out | 4 ++-- .../table_function/explode_json_array.groovy | 2 +- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 75ab71dbf6068a..3f8bf9154bfe0b 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -62,9 +62,11 @@ void VExplodeJsonArrayTableFunction::process_row(size_t row_idx) { StringRef text = _text_column->get_data_at(row_idx); if (text.data != nullptr) { JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); - if (doc->getValue()->isArray() && doc->getValue()->size() > 0) { + if (doc && doc->getValue() && doc->getValue()->isArray()) { auto* a = (ArrayVal*)doc->getValue(); - _cur_size = _parsed_data.set_output(*a, a->numElem()); + if (a->numElem() > 0) { + _cur_size = _parsed_data.set_output(*a, a->numElem()); + } } } } diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h index 4ef17cf2bbc07c..3b251b2d3da84f 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.h +++ b/be/src/vec/exprs/table_function/vexplode_json_array.h @@ -285,6 +285,20 @@ struct ParsedDataString : public ParsedDataStringBase { // Because the address of the string stored in `_backup_data` may // change each time `emplace_back()` is called. } + case JsonbType::T_Int8: { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", + static_cast(val).val()); + _backup_data.emplace_back(tmp_buf, wbytes); + _values_null_flag.emplace_back(false); + break; + } + case JsonbType::T_Int16: { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", + static_cast(val).val()); + _backup_data.emplace_back(tmp_buf, wbytes); + _values_null_flag.emplace_back(false); + break; + } case JsonbType::T_Int64: { wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, static_cast(val).val()); diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out index ccc012e1121861..0f64e4f61c7d77 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out @@ -94,10 +94,10 @@ \N 80 {"id":3,"name":"Bob"} -- !explode_json_array12 -- -9223372036854775807 8 +0 4 +9223372036854775807 4 -- !explode_json_array13 -- --9223372036854775808 8 -- !explode_json_array14 -- 100 John 30 1 Street 1 1.23 -1273982982312333 diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy index e6ed1b62a24060..e4b13c96dd558d 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy @@ -62,7 +62,7 @@ suite("explode_json_array") { TMP AS e1) AS T ORDER BY age, e1""" qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_JSON('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') - TMP AS e1) AS T ORDER BY age, e1""" + TMP AS e1) AS T ORDER BY age, cast(e1 as string)""" qt_explode_json_array12 """ SELECT c_age, COUNT(1) FROM person LATERAL VIEW EXPLODE_JSON_ARRAY_INT('[9223372036854775807,9223372036854775808]') t1 as c_age From 32532f9701b622176e0a873412bc3f2aadc16d05 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 7 Aug 2024 20:17:53 +0800 Subject: [PATCH 5/6] fixed --- .../table_function/vexplode_json_array.cpp | 20 ++++++++++++++----- .../table_function/vexplode_json_array.h | 1 + .../generator/ExplodeJsonArrayDouble.java | 4 +++- .../ExplodeJsonArrayDoubleOuter.java | 4 +++- .../generator/ExplodeJsonArrayInt.java | 4 +++- .../generator/ExplodeJsonArrayIntOuter.java | 4 +++- .../generator/ExplodeJsonArrayString.java | 3 ++- .../ExplodeJsonArrayStringOuter.java | 3 ++- .../table_function/explode_json_array.out | 4 ++-- 9 files changed, 34 insertions(+), 13 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 3f8bf9154bfe0b..f72c8ec25aee02 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -52,6 +52,7 @@ Status VExplodeJsonArrayTableFunction::process_init(Block* block, Runt RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, &text_column_idx)); _text_column = block->get_by_position(text_column_idx).column; + _text_datatype = block->get_by_position(text_column_idx).type; return Status::OK(); } @@ -61,11 +62,19 @@ void VExplodeJsonArrayTableFunction::process_row(size_t row_idx) { StringRef text = _text_column->get_data_at(row_idx); if (text.data != nullptr) { - JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); - if (doc && doc->getValue() && doc->getValue()->isArray()) { - auto* a = (ArrayVal*)doc->getValue(); - if (a->numElem() > 0) { - _cur_size = _parsed_data.set_output(*a, a->numElem()); + if (WhichDataType(_text_datatype).is_json()) { + JsonbDocument* doc = JsonbDocument::createDocument(text.data, text.size); + if (doc && doc->getValue() && doc->getValue()->isArray()) { + auto* a = (ArrayVal*)doc->getValue(); + if (a->numElem() > 0) { + _cur_size = _parsed_data.set_output(*a, a->numElem()); + } + } + } else { + rapidjson::Document document; + document.Parse(text.data, text.size); + if (!document.HasParseError() && document.IsArray() && document.GetArray().Size()) { + _cur_size = _parsed_data.set_output(document, document.GetArray().Size()); } } } @@ -74,6 +83,7 @@ void VExplodeJsonArrayTableFunction::process_row(size_t row_idx) { template void VExplodeJsonArrayTableFunction::process_close() { _text_column = nullptr; + _text_datatype = nullptr; _parsed_data.reset(); } diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h index 3b251b2d3da84f..28428b8f89f475 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.h +++ b/be/src/vec/exprs/table_function/vexplode_json_array.h @@ -417,6 +417,7 @@ class VExplodeJsonArrayTableFunction final : public TableFunction { void _insert_values_into_column(MutableColumnPtr& column, int max_step); DataImpl _parsed_data; ColumnPtr _text_column; + DataTypePtr _text_datatype; }; } // namespace doris::vectorized \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java index 9af6eb99fcd49a..c24477c20d199c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDouble.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -36,7 +37,8 @@ public class ExplodeJsonArrayDouble extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(DoubleType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java index 14e202adad40d3..a2e3609c48cbf5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayDoubleOuter.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -36,7 +37,8 @@ public class ExplodeJsonArrayDoubleOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(DoubleType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(DoubleType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java index f10440e48dc1f0..86db75733755f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayInt.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -36,7 +37,8 @@ public class ExplodeJsonArrayInt extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java index d3a42b2a2ab0e1..a0eb24e7453ac7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -36,7 +37,8 @@ public class ExplodeJsonArrayIntOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java index b7acc8383c2119..04717cd5c09b85 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayString.java @@ -36,7 +36,8 @@ public class ExplodeJsonArrayString extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE) + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java index 0c97f190e70c1d..03507aa9799424 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayStringOuter.java @@ -36,7 +36,8 @@ public class ExplodeJsonArrayStringOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE) + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out index 0f64e4f61c7d77..ccc012e1121861 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out @@ -94,10 +94,10 @@ \N 80 {"id":3,"name":"Bob"} -- !explode_json_array12 -- -0 4 -9223372036854775807 4 +9223372036854775807 8 -- !explode_json_array13 -- +-9223372036854775808 8 -- !explode_json_array14 -- 100 John 30 1 Street 1 1.23 -1273982982312333 From 90cfcd4fd3334347544f6c9cf5a23e66c0a574a9 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 8 Aug 2024 18:18:35 +0800 Subject: [PATCH 6/6] fix intOuter --- .../functions/generator/ExplodeJsonArrayIntOuter.java | 2 +- .../suites/nereids_function_p0/gen_function/gen.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java index a0eb24e7453ac7..fb7360959a4a47 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayIntOuter.java @@ -38,7 +38,7 @@ public class ExplodeJsonArrayIntOuter extends TableGeneratingFunction implements public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE).args(JsonType.INSTANCE), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) + FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy index 547ee40a220d4f..7fa0ea5c681583 100644 --- a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy +++ b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy @@ -60,7 +60,7 @@ suite("nereids_gen_fn") { select id, e from fn_test lateral view explode_json_array_string('["1", "2", "3"]') lv as e order by id, e''' qt_sql_explode_json_array_json_Varchar ''' - select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, e''' + select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, cast(e as string)''' // explode order_qt_sql_explode_Double "select id, e from fn_test lateral view explode(kadbl) lv as e order by id, e"