diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index fb2340ffaaa8f0..ab261702171c4f 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -378,6 +379,12 @@ struct MakeDateImpl { } }; +struct DateTruncState { + using Callback_function = + std::function; + Callback_function callback_function; +}; + template struct DateTrunc { static constexpr auto name = "date_trunc"; @@ -396,88 +403,73 @@ struct DateTrunc { return make_nullable(std::make_shared()); } + static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) { + if (scope != FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + if (!context->is_col_constant(1)) { + return Status::InvalidArgument( + "date_trunc function of time unit argument must be constant."); + } + const auto& data_str = context->get_constant_col(1)->column_ptr->get_data_at(0); + std::string lower_str(data_str.data, data_str.size); + std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), + [](unsigned char c) { return std::tolower(c); }); + + std::shared_ptr state = std::make_shared(); + if (std::strncmp("year", lower_str.data(), 4) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("quarter", lower_str.data(), 7) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("month", lower_str.data(), 5) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("week", lower_str.data(), 4) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("day", lower_str.data(), 3) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("hour", lower_str.data(), 4) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("minute", lower_str.data(), 6) == 0) { + state->callback_function = &execute_impl_right_const; + } else if (std::strncmp("second", lower_str.data(), 6) == 0) { + state->callback_function = &execute_impl_right_const; + } else { + return Status::RuntimeError( + "Illegal second argument column of function date_trunc. now only support " + "[second,minute,hour,day,week,month,quarter,year]"); + } + context->set_function_state(scope, state); + return Status::OK(); + } + static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) { DCHECK_EQ(arguments.size(), 2); auto null_map = ColumnUInt8::create(input_rows_count, 0); - const auto& col0 = block.get_by_position(arguments[0]).column; - bool col_const[2] = {is_column_const(*col0)}; - ColumnPtr argument_columns[2] = { - col_const[0] ? static_cast(*col0).convert_to_full_column() - : col0}; - - std::tie(argument_columns[1], col_const[1]) = - unpack_if_const(block.get_by_position(arguments[1]).column); - - auto datetime_column = static_cast(argument_columns[0].get()); - auto str_column = static_cast(argument_columns[1].get()); - auto& rdata = str_column->get_chars(); - auto& roffsets = str_column->get_offsets(); - - ColumnPtr res = ColumnType::create(); - if (col_const[1]) { - execute_impl_right_const( - datetime_column->get_data(), str_column->get_data_at(0), - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } else { - execute_impl(datetime_column->get_data(), rdata, roffsets, - static_cast(res->assume_mutable().get())->get_data(), - null_map->get_data(), input_rows_count); - } - + const auto& datetime_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + ColumnPtr res = ColumnType::create(input_rows_count); + auto* state = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); + DCHECK(state != nullptr); + state->callback_function(datetime_column, res, null_map->get_data(), input_rows_count); block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map)); return Status::OK(); } private: - static void execute_impl(const PaddedPODArray& ldata, const ColumnString::Chars& rdata, - const ColumnString::Offsets& roffsets, PaddedPODArray& res, - NullMap& null_map, size_t input_rows_count) { - res.resize(input_rows_count); - for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - const char* str_data = reinterpret_cast(&rdata[roffsets[i - 1]]); - _execute_inner_loop(dt, str_data, res, null_map, i); - } - } - static void execute_impl_right_const(const PaddedPODArray& ldata, - const StringRef& rdata, PaddedPODArray& res, + template + static void execute_impl_right_const(const ColumnPtr& datetime_column, ColumnPtr& result_column, NullMap& null_map, size_t input_rows_count) { - res.resize(input_rows_count); - std::string lower_str(rdata.data, rdata.size); - std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(), - [](unsigned char c) { return std::tolower(c); }); + auto& data = static_cast(datetime_column.get())->get_data(); + auto& res = static_cast(result_column->assume_mutable().get())->get_data(); for (size_t i = 0; i < input_rows_count; ++i) { - auto dt = binary_cast(ldata[i]); - _execute_inner_loop(dt, lower_str.data(), res, null_map, i); + auto dt = binary_cast(data[i]); + null_map[i] = !dt.template datetime_trunc(); + res[i] = binary_cast(dt); } } - template - static void _execute_inner_loop(T& dt, const char* str_data, PaddedPODArray& res, - NullMap& null_map, size_t index) { - if (std::strncmp("year", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("quarter", str_data, 7) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("month", str_data, 5) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("week", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("day", str_data, 3) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("hour", str_data, 4) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("minute", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else if (std::strncmp("second", str_data, 6) == 0) { - null_map[index] = !dt.template datetime_trunc(); - } else { - null_map[index] = 1; - } - res[index] = binary_cast(dt); - } }; class FromDays : public IFunction { @@ -1263,6 +1255,17 @@ class FunctionOtherTypesToDateType : public IFunction { return Impl::get_return_type_impl(arguments); } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if constexpr (std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>) { + return Impl::open(context, scope); + } else { + return Status::OK(); + } + } + //TODO: add function below when we fixed be-ut. //ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp b/be/src/vec/sink/writer/vtablet_writer.cpp index 319c6dd8e2f29b..9b1d0c83662268 100644 --- a/be/src/vec/sink/writer/vtablet_writer.cpp +++ b/be/src/vec/sink/writer/vtablet_writer.cpp @@ -1233,7 +1233,8 @@ Status VTabletWriter::_init(RuntimeState* state, RuntimeProfile* profile) { // prepare for auto partition functions if (_vpartition->is_auto_partition()) { auto [part_ctx, part_func] = _get_partition_function(); - RETURN_IF_ERROR(part_func->prepare(_state, *_output_row_desc, part_ctx.get())); + RETURN_IF_ERROR(part_ctx->prepare(_state, *_output_row_desc)); + RETURN_IF_ERROR(part_ctx->open(_state)); } if (_group_commit) { RETURN_IF_ERROR(_state->exec_env()->wal_mgr()->add_wal_path(_db_id, _tb_id, _wal_id, diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index 22566f3f638912..5dfc2f2554a306 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -1475,40 +1475,82 @@ TEST(VTimestampFunctionsTest, dayname_test) { TEST(VTimestampFunctionsTest, datetrunc_test) { std::string func_name = "date_trunc"; { - InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::String}; - + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("second")}, - str_to_date_time("2022-10-08 11:44:23")}, - {{std::string("2022-10-08 11:44:23"), std::string("minute")}, - str_to_date_time("2022-10-08 11:44:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("hour")}, - str_to_date_time("2022-10-08 11:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("day")}, - str_to_date_time("2022-10-08 00:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("month")}, - str_to_date_time("2022-10-01 00:00:00")}, - {{std::string("2022-10-08 11:44:23"), std::string("year")}, + str_to_date_time("2022-10-08 11:44:23")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("minute")}, + str_to_date_time("2022-10-08 11:44:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("hour")}, + str_to_date_time("2022-10-08 11:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("day")}, + str_to_date_time("2022-10-08 00:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("month")}, + str_to_date_time("2022-10-01 00:00:00")}}; + static_cast(check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("year")}, str_to_date_time("2022-01-01 00:00:00")}}; - static_cast(check_function(func_name, input_types, data_set)); } { - InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String}; - - DataSet data_set = {{{std::string("2022-10-08 11:44:23.123"), std::string("second")}, - str_to_datetime_v2("2022-10-08 11:44:23.000", "%Y-%m-%d %H:%i:%s.%f")}, - {{std::string("2022-10-08 11:44:23"), std::string("minute")}, - str_to_datetime_v2("2022-10-08 11:44:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("hour")}, - str_to_datetime_v2("2022-10-08 11:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("day")}, - str_to_datetime_v2("2022-10-08 00:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("month")}, - str_to_datetime_v2("2022-10-01 00:00:00", "%Y-%m-%d %H:%i:%s")}, - {{std::string("2022-10-08 11:44:23"), std::string("year")}, + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = { + {{std::string("2022-10-08 11:44:23.123"), std::string("second")}, + str_to_datetime_v2("2022-10-08 11:44:23.000", "%Y-%m-%d %H:%i:%s.%f")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("minute")}, + str_to_datetime_v2("2022-10-08 11:44:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("hour")}, + str_to_datetime_v2("2022-10-08 11:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("day")}, + str_to_datetime_v2("2022-10-08 00:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("month")}, + str_to_datetime_v2("2022-10-01 00:00:00", "%Y-%m-%d %H:%i:%s")}}; + static_cast( + check_function(func_name, input_types, data_set)); + } + { + InputTypeSet input_types = {TypeIndex::DateTimeV2, Consted {TypeIndex::String}}; + DataSet data_set = {{{std::string("2022-10-08 11:44:23"), std::string("year")}, str_to_datetime_v2("2022-01-01 00:00:00", "%Y-%m-%d %H:%i:%s")}}; - static_cast( check_function(func_name, input_types, data_set)); }