From 4bfc3771009b274fc3c255f0c03f4feaf764de11 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 24 Oct 2023 15:35:51 +0800 Subject: [PATCH 1/5] [improvement](function) improve date_trunc function performance when timeunit is const --- be/src/vec/functions/function_timestamp.cpp | 105 +++++++++++--------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index fb2340ffaaa8f0..3ae8814772aa2a 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -412,36 +412,19 @@ struct DateTrunc { auto datetime_column = static_cast(argument_columns[0].get()); auto str_column = static_cast(argument_columns[1].get()); - auto& rdata = str_column->get_chars(); - auto& roffsets = str_column->get_offsets(); ColumnPtr res = ColumnType::create(); - if (col_const[1]) { - execute_impl_right_const( - datetime_column->get_data(), str_column->get_data_at(0), - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } else { - execute_impl(datetime_column->get_data(), rdata, roffsets, - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } + DCHECK(col_const[1]) + << "the argument[1] must be const string literal, have check function in FE."; + execute_impl_right_const(datetime_column->get_data(), str_column->get_data_at(0), + static_cast(res->assume_mutable().get())->get_data(), + null_map->get_data(), input_rows_count); block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } private: - static void execute_impl(const PaddedPODArray& ldata, const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, PaddedPODArray& res, - NullMap& null_map, size_t input_rows_count) { - res.resize(input_rows_count); - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - const char* str_data = reinterpret_cast(&rdata[roffsets[i - 1]]); - _execute_inner_loop(dt, str_data, res, null_map, i); - } - } static void execute_impl_right_const(const PaddedPODArray& ldata, const StringRef& rdata, PaddedPODArray& res, NullMap& null_map, size_t input_rows_count) { @@ -449,34 +432,58 @@ struct DateTrunc { std::string lower_str(rdata.data, rdata.size); std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { return std::tolower(c); }); - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - _execute_inner_loop(dt, lower_str.data(), res, null_map, i); - } - } - template - static void _execute_inner_loop(T& dt, const char* str_data, PaddedPODArray& res, - NullMap& null_map, size_t index) { - if (std::strncmp("year", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("quarter", str_data, 7) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("month", str_data, 5) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("week", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("day", str_data, 3) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("hour", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("minute", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("second", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else { - null_map[index] = 1; + + auto _execute_inner_loop = [&]() { + for (size_t i = 0; i < input_rows_count; ++i) { + auto dt = binary_cast(ldata[i]); + null_map[i] = !dt.template datetime_trunc(); + res[i] = binary_cast(dt); + } + }; + + auto execute_impl = [&](const TimeUnit& UNIT) { + if (TimeUnit::YEAR == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::QUARTER == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::MONTH == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::WEEK == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::DAY == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::HOUR == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::MINUTE == UNIT) { + _execute_inner_loop.template operator()(); + } else if (TimeUnit::SECOND == UNIT) { + _execute_inner_loop.template operator()(); + } + }; + + if (std::strncmp("year", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::YEAR); + } else if (std::strncmp("quarter", lower_str.data(), 7) == 0) { + execute_impl(TimeUnit::QUARTER); + } else if (std::strncmp("month", lower_str.data(), 5) == 0) { + execute_impl(TimeUnit::MONTH); + } else if (std::strncmp("week", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::WEEK); + } else if (std::strncmp("day", lower_str.data(), 3) == 0) { + execute_impl(TimeUnit::DAY); + } else if (std::strncmp("hour", lower_str.data(), 4) == 0) { + execute_impl(TimeUnit::HOUR); + } else if (std::strncmp("minute", lower_str.data(), 6) == 0) { + execute_impl(TimeUnit::MINUTE); + } else if (std::strncmp("second", lower_str.data(), 6) == 0) { + execute_impl(TimeUnit::SECOND); + } else { //here maybe unreachable + for (size_t i = 0; i < input_rows_count; ++i) { + null_map[i] = 1; + auto dt = binary_cast(ldata[i]); + res[i] = binary_cast(dt); + } } - res[index] = binary_cast(dt); } }; From 5795cec62d6fff1f455c9f9570f2d17f7cbf0728 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 24 Oct 2023 15:47:59 +0800 Subject: [PATCH 2/5] update --- be/src/vec/functions/function_timestamp.cpp | 36 +++++---------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 3ae8814772aa2a..69544626b6f02b 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -441,42 +441,22 @@ struct DateTrunc { } }; - auto execute_impl = [&](const TimeUnit& UNIT) { - if (TimeUnit::YEAR == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::QUARTER == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::MONTH == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::WEEK == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::DAY == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::HOUR == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::MINUTE == UNIT) { - _execute_inner_loop.template operator()(); - } else if (TimeUnit::SECOND == UNIT) { - _execute_inner_loop.template operator()(); - } - }; - if (std::strncmp("year", lower_str.data(), 4) == 0) { - execute_impl(TimeUnit::YEAR); + _execute_inner_loop.template operator()(); } else if (std::strncmp("quarter", lower_str.data(), 7) == 0) { - execute_impl(TimeUnit::QUARTER); + _execute_inner_loop.template operator()(); } else if (std::strncmp("month", lower_str.data(), 5) == 0) { - execute_impl(TimeUnit::MONTH); + _execute_inner_loop.template operator()(); } else if (std::strncmp("week", lower_str.data(), 4) == 0) { - execute_impl(TimeUnit::WEEK); + _execute_inner_loop.template operator()(); } else if (std::strncmp("day", lower_str.data(), 3) == 0) { - execute_impl(TimeUnit::DAY); + _execute_inner_loop.template operator()(); } else if (std::strncmp("hour", lower_str.data(), 4) == 0) { - execute_impl(TimeUnit::HOUR); + _execute_inner_loop.template operator()(); } else if (std::strncmp("minute", lower_str.data(), 6) == 0) { - execute_impl(TimeUnit::MINUTE); + _execute_inner_loop.template operator()(); } else if (std::strncmp("second", lower_str.data(), 6) == 0) { - execute_impl(TimeUnit::SECOND); + _execute_inner_loop.template operator()(); } else { //here maybe unreachable for (size_t i = 0; i < input_rows_count; ++i) { null_map[i] = 1; From f624ad31530a2df94824c95a86ae02aaf0907138 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 24 Oct 2023 21:32:53 +0800 Subject: [PATCH 3/5] add open function --- be/src/vec/functions/function_timestamp.cpp | 128 +++++++++++--------- 1 file changed, 72 insertions(+), 56 deletions(-) diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 69544626b6f02b..ab261702171c4f 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -378,6 +379,12 @@ struct MakeDateImpl { } }; +struct DateTruncState { + using Callback_function = + std::function; + Callback_function callback_function; +}; + template struct DateTrunc { static constexpr auto name = "date_trunc"; @@ -396,73 +403,71 @@ struct DateTrunc { return make_nullable(std::make_shared()); } - static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) { - DCHECK_EQ(arguments.size(), 2); - - auto null_map = ColumnUInt8::create(input_rows_count, 0); - const auto& col0 = block.get_by_position(arguments[0]).column; - bool col_const[2] = {is_column_const(*col0)}; - ColumnPtr argument_columns[2] = { - col_const[0] ? static_cast(*col0).convert_to_full_column() - : col0}; - - std::tie(argument_columns[1], col_const[1]) = - unpack_if_const(block.get_by_position(arguments[1]).column); - - auto datetime_column = static_cast(argument_columns[0].get()); - auto str_column = static_cast(argument_columns[1].get()); - - ColumnPtr res = ColumnType::create(); - DCHECK(col_const[1]) - << "the argument[1] must be const string literal, have check function in FE."; - execute_impl_right_const(datetime_column->get_data(), str_column->get_data_at(0), - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - - block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); - return Status::OK(); - } - -private: - static void execute_impl_right_const(const PaddedPODArray& ldata, - const StringRef& rdata, PaddedPODArray& res, - NullMap& null_map, size_t input_rows_count) { - res.resize(input_rows_count); - std::string lower_str(rdata.data, rdata.size); + static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + if (!context->is_col_constant(1)) { + return Status::InvalidArgument( + "date_trunc function of time unit argument must be constant."); + } + const auto& data_str = context->get_constant_col(1)->column_ptr->get_data_at(0); + std::string lower_str(data_str.data, data_str.size); std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), [](unsigned char c) { return std::tolower(c); }); - auto _execute_inner_loop = [&]() { - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - null_map[i] = !dt.template datetime_trunc(); - res[i] = binary_cast(dt); - } - }; - + std::shared_ptr state = std::make_shared(); if (std::strncmp("year", lower_str.data(), 4) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("quarter", lower_str.data(), 7) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("month", lower_str.data(), 5) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("week", lower_str.data(), 4) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("day", lower_str.data(), 3) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("hour", lower_str.data(), 4) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("minute", lower_str.data(), 6) == 0) { - _execute_inner_loop.template operator()(); + state->callback_function = &execute_impl_right_const; } else if (std::strncmp("second", lower_str.data(), 6) == 0) { - _execute_inner_loop.template operator()(); - } else { //here maybe unreachable - for (size_t i = 0; i < input_rows_count; ++i) { - null_map[i] = 1; - auto dt = binary_cast(ldata[i]); - res[i] = binary_cast(dt); - } + state->callback_function = &execute_impl_right_const; + } else { + return Status::RuntimeError( + "Illegal second argument column of function date_trunc. now only support " + "[second,minute,hour,day,week,month,quarter,year]"); + } + context->set_function_state(scope, state); + return Status::OK(); + } + + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) { + DCHECK_EQ(arguments.size(), 2); + + auto null_map = ColumnUInt8::create(input_rows_count, 0); + const auto& datetime_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + ColumnPtr res = ColumnType::create(input_rows_count); + auto* state = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + DCHECK(state != nullptr); + state->callback_function(datetime_column, res, null_map->get_data(), input_rows_count); + block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); + return Status::OK(); + } + +private: + template + static void execute_impl_right_const(const ColumnPtr& datetime_column, ColumnPtr& result_column, + NullMap& null_map, size_t input_rows_count) { + auto& data = static_cast(datetime_column.get())->get_data(); + auto& res = static_cast(result_column->assume_mutable().get())->get_data(); + for (size_t i = 0; i < input_rows_count; ++i) { + auto dt = binary_cast(data[i]); + null_map[i] = !dt.template datetime_trunc(); + res[i] = binary_cast(dt); } } }; @@ -1250,6 +1255,17 @@ class FunctionOtherTypesToDateType : public IFunction { return Impl::get_return_type_impl(arguments); } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if constexpr (std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>) { + return Impl::open(context, scope); + } else { + return Status::OK(); + } + } + //TODO: add function below when we fixed be-ut. //ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } From 8268412d29dc9f186f7a63383c8eff28af47f7f3 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Wed, 25 Oct 2023 11:04:16 +0800 Subject: [PATCH 4/5] partition expr add open --- be/src/vec/sink/writer/vtablet_writer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp b/be/src/vec/sink/writer/vtablet_writer.cpp index 319c6dd8e2f29b..9b1d0c83662268 100644 --- a/be/src/vec/sink/writer/vtablet_writer.cpp +++ b/be/src/vec/sink/writer/vtablet_writer.cpp @@ -1233,7 +1233,8 @@ Status VTabletWriter::_init(RuntimeState* state, RuntimeProfile* profile) { // prepare for auto partition functions if (_vpartition->is_auto_partition()) { auto [part_ctx, part_func] = _get_partition_function(); - RETURN_IF_ERROR(part_func->prepare(_state, *_output_row_desc, part_ctx.get())); + RETURN_IF_ERROR(part_ctx->prepare(_state, *_output_row_desc)); + RETURN_IF_ERROR(part_ctx->open(_state)); } if (_group_commit) { RETURN_IF_ERROR(_state->exec_env()->wal_mgr()->add_wal_path(_db_id, _tb_id, _wal_id, From 79b22e9aa959985314c2a2f71f674c2a1c76b6fe Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Wed, 25 Oct 2023 15:21:38 +0800 Subject: [PATCH 5/5] update be test --- be/test/vec/function/function_time_test.cpp | 96 +++++++++++++++------ 1 file changed, 69 insertions(+), 27 deletions(-) diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index 22566f3f638912..5dfc2f2554a306 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -1475,40 +1475,82 @@ TEST(VTimestampFunctionsTest, dayname_test) { TEST(VTimestampFunctionsTest, datetrunc_test) { std::string func_name = "date_trunc"; { - InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::String}; - + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("second")}, - str_to_date_time("2022-10-08 11:44:23")}, - {{std::string("2022-10-08 11:44:23"), std::string("minute")}, - str_to_date_time("2022-10-08 11:44:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("hour")}, - str_to_date_time("2022-10-08 11:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("day")}, - str_to_date_time("2022-10-08 00:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("month")}, - str_to_date_time("2022-10-01 00:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("year")}, + str_to_date_time("2022-10-08 11:44:23")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("minute")}, + str_to_date_time("2022-10-08 11:44:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("hour")}, + str_to_date_time("2022-10-08 11:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("day")}, + str_to_date_time("2022-10-08 00:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("month")}, + str_to_date_time("2022-10-01 00:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("year")}, str_to_date_time("2022-01-01 00:00:00")}}; - static_cast(check_function(func_name, input_types, data_set)); } { - InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String}; - - DataSet data_set = {{{std::string("2022-10-08 11:44:23.123"), std::string("second")}, - str_to_datetime_v2("2022-10-08 11:44:23.000", "%Y-%m-%d %H:%i:%s.%f")}, - {{std::string("2022-10-08 11:44:23"), std::string("minute")}, - str_to_datetime_v2("2022-10-08 11:44:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("hour")}, - str_to_datetime_v2("2022-10-08 11:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("day")}, - str_to_datetime_v2("2022-10-08 00:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("month")}, - str_to_datetime_v2("2022-10-01 00:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("year")}, + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = { + {{std::string("2022-10-08 11:44:23.123"), std::string("second")}, + str_to_datetime_v2("2022-10-08 11:44:23.000", "%Y-%m-%d %H:%i:%s.%f")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("minute")}, + str_to_datetime_v2("2022-10-08 11:44:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("hour")}, + str_to_datetime_v2("2022-10-08 11:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("day")}, + str_to_datetime_v2("2022-10-08 00:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("month")}, + str_to_datetime_v2("2022-10-01 00:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("year")}, str_to_datetime_v2("2022-01-01 00:00:00", "%Y-%m-%d %H:%i:%s")}}; - static_cast( check_function(func_name, input_types, data_set)); }