From 4286df2a32dcc8645230a03863ee659e1c4fdf0d Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Fri, 28 Jun 2024 17:44:07 +0800 Subject: [PATCH 1/3] [bug](function)fix json_replace check return type error --- be/src/vec/functions/function_json.cpp | 72 +++++++++++-------- .../json_function/test_query_json_replace.out | 5 ++ .../test_query_json_replace.groovy | 23 ++++++ 3 files changed, 71 insertions(+), 29 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index e7c2fc1781dfc5..6c8c76ab3e82d4 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -1275,11 +1275,13 @@ class FunctionJsonModifyImpl : public IFunction { Status get_parsed_path_columns(std::vector>>& json_paths, const std::vector& data_columns, - size_t input_rows_count) const { + size_t input_rows_count, + std::vector& column_is_consts) const { for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) { json_paths.emplace_back(std::vector>()); for (auto row = 0; row < input_rows_count; row++) { - const auto path = data_columns[col]->get_data_at(row); + const auto& path = data_columns[col]->get_data_at( + index_check_const(row, column_is_consts[col])); std::string_view path_string(path.data, path.size); std::vector parsed_paths; @@ -1313,7 +1315,7 @@ class FunctionJsonModifyImpl : public IFunction { DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { bool is_nullable = false; // arguments: (json_str, path, val[, path, val...], type_flag) - for (auto col = 2; col < arguments.size() - 1; col += 2) { + for (auto col = 0; col < arguments.size() - 1; col += 1) { if (arguments[col]->is_nullable()) { is_nullable = true; break; @@ -1329,34 +1331,38 @@ class FunctionJsonModifyImpl : public IFunction { bool is_nullable = false; auto ret_null_map = ColumnUInt8::create(0, 0); - std::vector column_ptrs; // prevent converted column destruct std::vector data_columns; std::vector nullmaps; + std::vector column_is_consts; for (int i = 0; i < arguments.size(); i++) { - auto column = block.get_by_position(arguments[i]).column; - column_ptrs.push_back(column->convert_to_full_column_if_const()); - const ColumnNullable* col_nullable = - check_and_get_column(column_ptrs.back().get()); + ColumnPtr arg_col; + bool arg_const; + std::tie(arg_col, arg_const) = + unpack_if_const(block.get_by_position(arguments[i]).column); + const auto* col_nullable = check_and_get_column(arg_col.get()); + column_is_consts.push_back(arg_const); if (col_nullable) { if (!is_nullable) { is_nullable = true; - ret_null_map = ColumnUInt8::create(input_rows_count, 0); } - const ColumnUInt8* col_nullmap = check_and_get_column( + const ColumnUInt8* col_nullmap = assert_cast( col_nullable->get_null_map_column_ptr().get()); nullmaps.push_back(col_nullmap); - const ColumnString* col = check_and_get_column( + const ColumnString* col = assert_cast( col_nullable->get_nested_column_ptr().get()); data_columns.push_back(col); } else { nullmaps.push_back(nullptr); - data_columns.push_back(assert_cast(column_ptrs.back().get())); + data_columns.push_back(assert_cast(arg_col.get())); } } - - RETURN_IF_ERROR(execute_process( - data_columns, *assert_cast(result_column.get()), input_rows_count, - nullmaps, is_nullable, *assert_cast(ret_null_map.get()))); + if (is_nullable) { + ret_null_map = ColumnUInt8::create(input_rows_count, 0); + } + RETURN_IF_ERROR( + execute_process(data_columns, *assert_cast(result_column.get()), + input_rows_count, nullmaps, is_nullable, + *assert_cast(ret_null_map.get()), column_is_consts)); if (is_nullable) { block.replace_by_position(result, ColumnNullable::create(std::move(result_column), @@ -1370,13 +1376,14 @@ class FunctionJsonModifyImpl : public IFunction { Status execute_process(const std::vector& data_columns, ColumnString& result_column, size_t input_rows_count, const std::vector nullmaps, bool is_nullable, - ColumnUInt8& ret_null_map) const { + ColumnUInt8& ret_null_map, std::vector& column_is_consts) const { std::string type_flags = data_columns.back()->get_data_at(0).to_string(); std::vector objects; for (auto row = 0; row < input_rows_count; row++) { objects.emplace_back(rapidjson::kNullType); - const auto json_doc = data_columns[0]->get_data_at(row); + const auto json_doc = + data_columns[0]->get_data_at(index_check_const(row, column_is_consts[0])); std::string_view json_str(json_doc.data, json_doc.size); objects[row].Parse(json_str.data(), json_str.size()); if (UNLIKELY(objects[row].HasParseError())) { @@ -1386,9 +1393,10 @@ class FunctionJsonModifyImpl : public IFunction { } std::vector>> json_paths; - RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, input_rows_count)); + RETURN_IF_ERROR(get_parsed_path_columns(json_paths, data_columns, input_rows_count, + column_is_consts)); - execute_parse(type_flags, data_columns, objects, json_paths, nullmaps); + execute_parse(type_flags, data_columns, objects, json_paths, nullmaps, column_is_consts); rapidjson::StringBuffer buf; rapidjson::Writer writer(buf); @@ -1412,11 +1420,12 @@ class FunctionJsonModifyImpl : public IFunction { const std::vector& data_columns, std::vector& objects, std::vector>>& json_paths, - const std::vector& nullmaps) { + const std::vector& nullmaps, + std::vector& column_is_consts) { for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) { - constexpr_int_match<'0', '6', Reducer>::run(type_flags[col + 1], objects, - json_paths[col / 2], data_columns[col + 1], - nullmaps[col + 1]); + constexpr_int_match<'0', '6', Reducer>::run( + type_flags[col + 1], objects, json_paths[col / 2], data_columns[col + 1], + nullmaps[col + 1], column_is_consts[col + 1]); } } @@ -1498,18 +1507,23 @@ class FunctionJsonModifyImpl : public IFunction { template static void execute_type(std::vector& objects, std::vector>& paths_column, - const ColumnString* value_column, const ColumnUInt8* nullmap) { + const ColumnString* value_column, const ColumnUInt8* nullmap, + bool column_is_const) { StringParser::ParseResult result; rapidjson::Value value; for (auto row = 0; row < objects.size(); row++) { std::vector* parsed_paths = &paths_column[row]; if (nullmap != nullptr && nullmap->get_data()[row]) { - JsonParser<'0'>::update_value(result, value, value_column->get_data_at(row), - objects[row].GetAllocator()); + JsonParser<'0'>::update_value( + result, value, + value_column->get_data_at(index_check_const(row, column_is_const)), + objects[row].GetAllocator()); } else { - TypeImpl::update_value(result, value, value_column->get_data_at(row), - objects[row].GetAllocator()); + TypeImpl::update_value( + result, value, + value_column->get_data_at(index_check_const(row, column_is_const)), + objects[row].GetAllocator()); } switch (Kind::modify_type) { diff --git a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out index 408aa2ecb77fa4..3ba95483878667 100644 --- a/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out +++ b/regression-test/data/query_p0/sql_functions/json_function/test_query_json_replace.out @@ -11,3 +11,8 @@ null {"id":3,"time":null,"a1":[1,9],"a2":[1,2]} {"id":4,"time":null,"a1":[1,null],"a2":[1,2]} +-- !sql2 -- +{"id":1,"time":"2022-01-01 11:45:14","a1":[1,9],"a2":[1,2]} +{"id":2,"time":"2022-01-01 11:45:14","a1":[1,null],"a2":[1,2]} +{"id":3,"time":null,"a1":[1,9],"a2":[1,2]} + diff --git a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy index bdf26abecb4131..a870169d962870 100644 --- a/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_function/test_query_json_replace.groovy @@ -42,4 +42,27 @@ suite("test_query_json_replace", "query") { sql "insert into ${tableName} values(4,null,null);" qt_sql1 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, '\$.a1[1]', k, '\$.a2[3]', k) from ${tableName} order by id;" sql "DROP TABLE ${tableName};" + + sql "DROP TABLE IF EXISTS test_query_json_replace_nullable" + sql """ + CREATE TABLE test_query_json_replace_nullable ( + `id` int(11) null, + `time` datetime, + `k` int(11) + ) ENGINE=OLAP + DUPLICATE KEY(`id`,`time`,`k`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ); + """ + sql "insert into test_query_json_replace_nullable values(1,'2022-01-01 11:45:14',9);" + sql "insert into test_query_json_replace_nullable values(2,'2022-01-01 11:45:14',null);" + sql "insert into test_query_json_replace_nullable values(3,null,9);" + qt_sql2 "select json_replace('{\"id\": 0, \"time\": \"1970-01-01 00:00:00\", \"a1\": [1, 2], \"a2\": [1, 2]}', '\$.id', id, '\$.time', time, '\$.a1[1]', k, '\$.a2[3]', k) from test_query_json_replace_nullable order by id;" + + sql "DROP TABLE test_query_json_replace_nullable;" } From 0ca062bab4511a9bad2a41de8a36e0f66a9e439d Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Mon, 1 Jul 2024 16:36:38 +0800 Subject: [PATCH 2/3] update review2 --- be/src/vec/functions/function_json.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 6c8c76ab3e82d4..991354a0cddfa1 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -1280,7 +1280,7 @@ class FunctionJsonModifyImpl : public IFunction { for (auto col = 1; col + 1 < data_columns.size() - 1; col += 2) { json_paths.emplace_back(std::vector>()); for (auto row = 0; row < input_rows_count; row++) { - const auto& path = data_columns[col]->get_data_at( + const auto path = data_columns[col]->get_data_at( index_check_const(row, column_is_consts[col])); std::string_view path_string(path.data, path.size); std::vector parsed_paths; @@ -1329,7 +1329,7 @@ class FunctionJsonModifyImpl : public IFunction { size_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); bool is_nullable = false; - auto ret_null_map = ColumnUInt8::create(0, 0); + MutableColumnPtr ret_null_map = nullptr; std::vector data_columns; std::vector nullmaps; From c9e6208c3d8ccf0ce1622323ddc9bea32c8b8c5b Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Mon, 1 Jul 2024 21:46:55 +0800 Subject: [PATCH 3/3] update2 --- be/src/vec/functions/function_json.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 991354a0cddfa1..c25d4d6329f386 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -1329,7 +1329,8 @@ class FunctionJsonModifyImpl : public IFunction { size_t result, size_t input_rows_count) const override { auto result_column = ColumnString::create(); bool is_nullable = false; - MutableColumnPtr ret_null_map = nullptr; + ColumnUInt8::MutablePtr ret_null_map = nullptr; + ColumnUInt8::Container* ret_null_map_data = nullptr; std::vector data_columns; std::vector nullmaps; @@ -1356,13 +1357,14 @@ class FunctionJsonModifyImpl : public IFunction { data_columns.push_back(assert_cast(arg_col.get())); } } + //*assert_cast(ret_null_map.get()) if (is_nullable) { ret_null_map = ColumnUInt8::create(input_rows_count, 0); + ret_null_map_data = &(ret_null_map->get_data()); } - RETURN_IF_ERROR( - execute_process(data_columns, *assert_cast(result_column.get()), - input_rows_count, nullmaps, is_nullable, - *assert_cast(ret_null_map.get()), column_is_consts)); + RETURN_IF_ERROR(execute_process( + data_columns, *assert_cast(result_column.get()), input_rows_count, + nullmaps, is_nullable, ret_null_map_data, column_is_consts)); if (is_nullable) { block.replace_by_position(result, ColumnNullable::create(std::move(result_column), @@ -1376,7 +1378,8 @@ class FunctionJsonModifyImpl : public IFunction { Status execute_process(const std::vector& data_columns, ColumnString& result_column, size_t input_rows_count, const std::vector nullmaps, bool is_nullable, - ColumnUInt8& ret_null_map, std::vector& column_is_consts) const { + ColumnUInt8::Container* ret_null_map_data, + std::vector& column_is_consts) const { std::string type_flags = data_columns.back()->get_data_at(0).to_string(); std::vector objects; @@ -1406,7 +1409,7 @@ class FunctionJsonModifyImpl : public IFunction { writer.Reset(buf); objects[i].Accept(writer); if (is_nullable && objects[i].IsNull()) { - ret_null_map.get_data()[i] = 1; + (*ret_null_map_data)[i] = 1; } result_column.insert_data(buf.GetString(), buf.GetSize()); }