From dec23c9c0de1065eb9217455a8b6601ea143f25c Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Fri, 13 Sep 2024 07:57:20 +0800 Subject: [PATCH 1/5] upd --- be/src/vec/functions/date_format_type.h | 160 ++++++++++++++++++ be/src/vec/functions/date_time_transforms.h | 45 ++++- .../function_datetime_string_to_string.h | 159 +++++++++++++---- 3 files changed, 328 insertions(+), 36 deletions(-) create mode 100644 be/src/vec/functions/date_format_type.h diff --git a/be/src/vec/functions/date_format_type.h b/be/src/vec/functions/date_format_type.h new file mode 100644 index 00000000000000..639b2f3f0bc5fe --- /dev/null +++ b/be/src/vec/functions/date_format_type.h @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/common/string_ref.h" +namespace doris::vectorized { + +namespace time_format_type { + +inline StringRef rewrite_specific_format(const char* raw_str, size_t str_size) { + const static std::string specific_format_strs[3] = {"%Y%m%d", "%Y-%m-%d", "%Y-%m-%d %H:%i:%s"}; + const static std::string specific_format_rewrite[3] = {"yyyyMMdd", "yyyy-MM-dd", + "yyyy-MM-dd HH:mm:ss"}; + for (int i = 0; i < 3; i++) { + const StringRef specific_format {specific_format_strs[i].data(), + specific_format_strs[i].size()}; + if (specific_format == StringRef {raw_str, str_size}) { + return {specific_format_rewrite[i].data(), specific_format_rewrite[i].size()}; + } + } + return {raw_str, str_size}; +} + +enum class TimeFormatType { + None = 0, + yyyyMMdd, + yyyy_MM_dd, + yyyy_MM_dd_HH_mm_ss, + yyyy_MM, + yyyyMM, + yyyy +}; + +inline TimeFormatType string_to_type(const std::string& format) { + if (format == "yyyyMMdd") { + return TimeFormatType::yyyyMMdd; + } else if (format == "yyyy-MM-dd") { + return TimeFormatType::yyyy_MM_dd; + } else if (format == "yyyy-MM-dd HH:mm:ss") { + return TimeFormatType::yyyy_MM_dd_HH_mm_ss; + } else if (format == "yyyy-MM") { + return TimeFormatType::yyyy_MM; + } else if (format == "yyyyMM") { + return TimeFormatType::yyyyMM; + } else if (format == "yyyy") { + return TimeFormatType::yyyy; + } else { + return TimeFormatType::None; + } +} + +template +void inline put_year(T y, char* buf, int& i) { + int t = y / 100; + buf[i++] = t / 10 + '0'; + buf[i++] = t % 10 + '0'; + + t = y % 100; + buf[i++] = t / 10 + '0'; + buf[i++] = t % 10 + '0'; +} + +template +void inline put_other(T m, char* buf, int& i) { + buf[i++] = m / 10 + '0'; + buf[i++] = m % 10 + '0'; +} + +struct NoneImpl {}; + +struct yyyyMMddImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + put_other(date_value.month(), buf, i); + put_other(date_value.day(), buf, i); + return i; + } +}; + +struct yyyy_MM_ddImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + buf[i++] = '-'; + put_other(date_value.month(), buf, i); + buf[i++] = '-'; + put_other(date_value.day(), buf, i); + return i; + } +}; + +struct yyyy_MM_dd_HH_mm_ssImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + buf[i++] = '-'; + put_other(date_value.month(), buf, i); + buf[i++] = '-'; + put_other(date_value.day(), buf, i); + buf[i++] = ' '; + put_other(date_value.hour(), buf, i); + buf[i++] = ':'; + put_other(date_value.minute(), buf, i); + buf[i++] = ':'; + put_other(date_value.second(), buf, i); + return i; + } +}; + +struct yyyy_MMImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + buf[i++] = '-'; + put_other(date_value.month(), buf, i); + return i; + } +}; +struct yyyyMMImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + put_other(date_value.month(), buf, i); + return i; + } +}; + +struct yyyyImpl { + template + size_t static apply(const DateType& date_value, char* buf) { + int i = 0; + put_year(date_value.year(), buf, i); + return i; + } +}; + +} // namespace time_format_type + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 266c9b5d272d38..a91c6e61d16ded 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -33,6 +33,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_string.h" +#include "vec/functions/date_format_type.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/util.hpp" @@ -184,8 +185,24 @@ struct DateFormatImpl { static constexpr auto name = "date_format"; + template static inline auto execute(const FromType& t, StringRef format, ColumnString::Chars& res_data, - size_t& offset) { + size_t& offset, const cctz::time_zone& time_zone) { + const auto& dt = (DateType&)t; + if (!dt.is_valid_date()) { + return std::pair {offset, true}; + } + auto len = Impl::apply(dt, (char*)res_data.data()); + offset += len; + res_data.resize(offset); + return std::pair {offset, false}; + } + + template <> + static inline auto execute(const FromType& t, StringRef format, + ColumnString::Chars& res_data, + size_t& offset, + const cctz::time_zone& time_zone) { const auto& dt = (DateType&)t; if (format.size > 128) { return std::pair {offset, true}; @@ -203,15 +220,11 @@ struct DateFormatImpl { } static DataTypes get_variadic_argument_types() { - return std::vector { - std::dynamic_pointer_cast( - std::make_shared::DateType>()), - std::dynamic_pointer_cast( - std::make_shared())}; + return std::vector {std::make_shared::DateType>(), + std::make_shared()}; } }; -// TODO: This function should be depend on arguments not always nullable template struct FromUnixTimeImpl { using FromType = Int64; @@ -220,9 +233,27 @@ struct FromUnixTimeImpl { static const int64_t TIMESTAMP_VALID_MAX = 32536771199; static constexpr auto name = "from_unixtime"; + template static inline auto execute(FromType val, StringRef format, ColumnString::Chars& res_data, size_t& offset, const cctz::time_zone& time_zone) { DateType dt; + if (val < 0 || val > TIMESTAMP_VALID_MAX) { + return std::pair {offset, true}; + } + dt.from_unixtime(val, time_zone); + + auto len = Impl::apply(dt, (char*)res_data.data()); + offset += len; + res_data.resize(offset); + return std::pair {offset, false}; + } + + template <> + static inline auto execute(FromType val, StringRef format, + ColumnString::Chars& res_data, + size_t& offset, + const cctz::time_zone& time_zone) { + DateType dt; if (format.size > 128 || val < 0 || val > TIMESTAMP_VALID_MAX) { return std::pair {offset, true}; } diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index 41eba51301c015..dddcbc9652871a 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -29,6 +29,7 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" @@ -38,6 +39,7 @@ #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/data_type_string.h" +#include "vec/functions/date_format_type.h" #include "vec/functions/date_time_transforms.h" #include "vec/functions/function.h" #include "vec/runtime/vdatetime_value.h" @@ -66,6 +68,50 @@ class FunctionDateTimeStringToString : public IFunction { return {}; } + struct FormatState { + std::string format_str; + // Check if the format string is null or exceeds the length limit. + bool is_valid = true; + time_format_type::TimeFormatType format_type = time_format_type::TimeFormatType::None; + }; + + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + std::shared_ptr state = std::make_shared(); + DCHECK((context->get_num_args() == 1) || (context->get_num_args() == 2)); + context->set_function_state(scope, state); + if (context->get_num_args() == 1) { + // default argument + state->format_str = "yyyy-MM-dd HH:mm:ss"; + state->format_type = time_format_type::TimeFormatType::yyyy_MM_dd_HH_mm_ss; + return IFunction::open(context, scope); + } + + const auto* column_string = context->get_constant_col(1); + if (column_string == nullptr) { + // func(col , null); + state->is_valid = false; + return IFunction::open(context, scope); + } + + auto string_vale = column_string->column_ptr->get_data_at(0).trim(); + auto format_str = + time_format_type::rewrite_specific_format(string_vale.data, string_vale.size); + if (format_str.size > 128) { + // exceeds the length limit. + state->is_valid = false; + return IFunction::open(context, scope); + } + + // Preprocess special format strings. + state->format_str = format_str; + state->format_type = time_format_type::string_to_type(state->format_str); + + return IFunction::open(context, scope); + } + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { return make_nullable(std::make_shared()); } @@ -78,41 +124,96 @@ class FunctionDateTimeStringToString : public IFunction { const ColumnPtr source_col = block.get_by_position(arguments[0]).column; const auto* nullable_column = check_and_get_column(source_col.get()); - const auto* sources = check_and_get_column>( + const auto* sources = assert_cast*>( nullable_column ? nullable_column->get_nested_column_ptr().get() : source_col.get()); - if (sources) { - auto col_res = ColumnString::create(); - ColumnUInt8::MutablePtr col_null_map_to; - col_null_map_to = ColumnUInt8::create(); - auto& vec_null_map_to = col_null_map_to->get_data(); - - if (arguments.size() == 2) { - const IColumn& source_col1 = *block.get_by_position(arguments[1]).column; - StringRef formatter = - source_col1.get_data_at(0); // for both ColumnString or ColumnConst. - TransformerToStringTwoArgument::vector_constant( - context, sources->get_data(), formatter, col_res->get_chars(), - col_res->get_offsets(), vec_null_map_to); - } else { //default argument - TransformerToStringTwoArgument::vector_constant( - context, sources->get_data(), StringRef("%Y-%m-%d %H:%i:%s"), - col_res->get_chars(), col_res->get_offsets(), vec_null_map_to); + auto col_res = ColumnString::create(); + ColumnUInt8::MutablePtr col_null_map_to; + col_null_map_to = ColumnUInt8::create(); + auto& vec_null_map_to = col_null_map_to->get_data(); + + RETURN_IF_ERROR(vector_constant(context, sources->get_data(), col_res->get_chars(), + col_res->get_offsets(), vec_null_map_to)); + + if (nullable_column) { + const auto& origin_null_map = nullable_column->get_null_map_column().get_data(); + for (int i = 0; i < origin_null_map.size(); ++i) { + vec_null_map_to[i] |= origin_null_map[i]; } + } + + block.get_by_position(result).column = + ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); + + return Status::OK(); + } + + Status vector_constant(FunctionContext* context, + const PaddedPODArray& ts, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, + PaddedPODArray& null_map) const { + auto* format_state = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + if (!format_state) { + return Status::RuntimeError("funciton context for function '{}' must have FormatState;", + get_name()); + } + + StringRef format(format_state->format_str); + + auto len = ts.size(); + res_offsets.resize(len); + res_data.reserve(len * format.size + len); + null_map.resize_fill(len, false); + + if (!format_state->is_valid) { + return Status::OK(); + } - if (nullable_column) { - const auto& origin_null_map = nullable_column->get_null_map_column().get_data(); - for (int i = 0; i < origin_null_map.size(); ++i) { - vec_null_map_to[i] |= origin_null_map[i]; - } + auto execute_for_format_type = [&]() { + size_t offset = 0; + for (int i = 0; i < len; ++i) { + const auto& t = ts[i]; + size_t new_offset; + bool is_null; + std::tie(new_offset, is_null) = Transform::template execute( + t, format, res_data, offset, context->state()->timezone_obj()); + res_offsets[i] = new_offset; + null_map[i] = is_null; } - block.get_by_position(result).column = - ColumnNullable::create(std::move(col_res), std::move(col_null_map_to)); - } else { - return Status::InternalError("Illegal column {} of first argument of function {}", - block.get_by_position(arguments[0]).column->get_name(), - name); + }; + + switch (format_state->format_type) { + case time_format_type::TimeFormatType::None: { + execute_for_format_type.template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyyMMdd: { + execute_for_format_type.template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyy_MM_dd: { + execute_for_format_type.template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyy_MM_dd_HH_mm_ss: { + execute_for_format_type + .template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyy_MM: { + execute_for_format_type.template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyyMM: { + execute_for_format_type.template operator()(); + break; + } + case time_format_type::TimeFormatType::yyyy: { + execute_for_format_type.template operator()(); + break; + } } return Status::OK(); } From d909faf53af7b7a5264a741bc75448b61626aa59 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Fri, 13 Sep 2024 15:54:02 +0800 Subject: [PATCH 2/5] fix-and-for-gcc --- be/src/vec/functions/date_time_transforms.h | 120 +++++++++--------- .../function_datetime_string_to_string.h | 11 +- 2 files changed, 66 insertions(+), 65 deletions(-) diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index a91c6e61d16ded..45e921e4cbf336 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -186,37 +186,38 @@ struct DateFormatImpl { static constexpr auto name = "date_format"; template - static inline auto execute(const FromType& t, StringRef format, ColumnString::Chars& res_data, + static inline bool execute(const FromType& t, StringRef format, ColumnString::Chars& res_data, size_t& offset, const cctz::time_zone& time_zone) { - const auto& dt = (DateType&)t; - if (!dt.is_valid_date()) { - return std::pair {offset, true}; - } - auto len = Impl::apply(dt, (char*)res_data.data()); - offset += len; - res_data.resize(offset); - return std::pair {offset, false}; - } + if constexpr (std::is_same_v) { + // Handle non-special formats. + const auto& dt = (DateType&)t; + if (format.size > 128) { + return true; + } + char buf[100 + SAFE_FORMAT_STRING_MARGIN]; + if (!dt.to_format_string_conservative(format.data, format.size, buf, + 100 + SAFE_FORMAT_STRING_MARGIN)) { + return true; + } - template <> - static inline auto execute(const FromType& t, StringRef format, - ColumnString::Chars& res_data, - size_t& offset, - const cctz::time_zone& time_zone) { - const auto& dt = (DateType&)t; - if (format.size > 128) { - return std::pair {offset, true}; - } - char buf[100 + SAFE_FORMAT_STRING_MARGIN]; - if (!dt.to_format_string_conservative(format.data, format.size, buf, - 100 + SAFE_FORMAT_STRING_MARGIN)) { - return std::pair {offset, true}; - } + auto len = strlen(buf); + res_data.insert(buf, buf + len); + offset += len; + return false; + } else { + const auto& dt = (DateType&)t; + + if (!dt.is_valid_date()) { + return true; + } - auto len = strlen(buf); - res_data.insert(buf, buf + len); - offset += len; - return std::pair {offset, false}; + // No buffer is needed here because these specially optimized formats have fixed lengths, + // and sufficient memory has already been reserved. + auto len = Impl::apply(dt, (char*)res_data.data() + offset); + offset += len; + + return false; + } } static DataTypes get_variadic_argument_types() { @@ -234,41 +235,44 @@ struct FromUnixTimeImpl { static constexpr auto name = "from_unixtime"; template - static inline auto execute(FromType val, StringRef format, ColumnString::Chars& res_data, + static inline bool execute(const FromType& val, StringRef format, ColumnString::Chars& res_data, size_t& offset, const cctz::time_zone& time_zone) { - DateType dt; - if (val < 0 || val > TIMESTAMP_VALID_MAX) { - return std::pair {offset, true}; - } - dt.from_unixtime(val, time_zone); + if constexpr (std::is_same_v) { + DateType dt; + if (format.size > 128 || val < 0 || val > TIMESTAMP_VALID_MAX) { + return true; + } + dt.from_unixtime(val, time_zone); - auto len = Impl::apply(dt, (char*)res_data.data()); - offset += len; - res_data.resize(offset); - return std::pair {offset, false}; - } + char buf[100 + SAFE_FORMAT_STRING_MARGIN]; + if (!dt.to_format_string_conservative(format.data, format.size, buf, + 100 + SAFE_FORMAT_STRING_MARGIN)) { + return true; + } - template <> - static inline auto execute(FromType val, StringRef format, - ColumnString::Chars& res_data, - size_t& offset, - const cctz::time_zone& time_zone) { - DateType dt; - if (format.size > 128 || val < 0 || val > TIMESTAMP_VALID_MAX) { - return std::pair {offset, true}; - } - dt.from_unixtime(val, time_zone); + auto len = strlen(buf); + res_data.insert(buf, buf + len); + offset += len; + return false; - char buf[100 + SAFE_FORMAT_STRING_MARGIN]; - if (!dt.to_format_string_conservative(format.data, format.size, buf, - 100 + SAFE_FORMAT_STRING_MARGIN)) { - return std::pair {offset, true}; - } + } else { + DateType dt; + if (val < 0 || val > TIMESTAMP_VALID_MAX) { + return true; + } + dt.from_unixtime(val, time_zone); + + if (!dt.is_valid_date()) { + return true; + } - auto len = strlen(buf); - res_data.insert(buf, buf + len); - offset += len; - return std::pair {offset, false}; + // No buffer is needed here because these specially optimized formats have fixed lengths, + // and sufficient memory has already been reserved. + auto len = Impl::apply(dt, (char*)res_data.data() + offset); + offset += len; + + return false; + } } }; diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index dddcbc9652871a..767e6395b7f184 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -174,14 +174,11 @@ class FunctionDateTimeStringToString : public IFunction { auto execute_for_format_type = [&]() { size_t offset = 0; for (int i = 0; i < len; ++i) { - const auto& t = ts[i]; - size_t new_offset; - bool is_null; - std::tie(new_offset, is_null) = Transform::template execute( - t, format, res_data, offset, context->state()->timezone_obj()); - res_offsets[i] = new_offset; - null_map[i] = is_null; + null_map[i] = Transform::template execute(ts[i], format, res_data, offset, + context->state()->timezone_obj()); + res_offsets[i] = offset; } + res_data.resize(offset); }; switch (format_state->format_type) { From f8110c95ce6d622fb20dcd1b9aa45ab8dd56f808 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Fri, 13 Sep 2024 19:51:34 +0800 Subject: [PATCH 3/5] use variant --- be/src/vec/functions/date_format_type.h | 23 +++++++ .../function_datetime_string_to_string.h | 60 +++++-------------- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/be/src/vec/functions/date_format_type.h b/be/src/vec/functions/date_format_type.h index 639b2f3f0bc5fe..40e9df94a5408a 100644 --- a/be/src/vec/functions/date_format_type.h +++ b/be/src/vec/functions/date_format_type.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "vec/common/string_ref.h" namespace doris::vectorized { @@ -155,6 +157,27 @@ struct yyyyImpl { } }; +using FormatImplVariant = std::variant; + +inline FormatImplVariant string_to_impl(const std::string& format) { + if (format == "yyyyMMdd") { + return yyyyMMddImpl {}; + } else if (format == "yyyy-MM-dd") { + return yyyy_MM_ddImpl {}; + } else if (format == "yyyy-MM-dd HH:mm:ss") { + return yyyy_MM_dd_HH_mm_ssImpl {}; + } else if (format == "yyyy-MM") { + return yyyy_MMImpl {}; + } else if (format == "yyyyMM") { + return yyyyMMImpl {}; + } else if (format == "yyyy") { + return yyyyImpl {}; + } else { + return NoneImpl {}; + } +} + } // namespace time_format_type } // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index 767e6395b7f184..595bb4de30972f 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -21,6 +21,7 @@ #include #include +#include #include "common/status.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -72,7 +73,7 @@ class FunctionDateTimeStringToString : public IFunction { std::string format_str; // Check if the format string is null or exceeds the length limit. bool is_valid = true; - time_format_type::TimeFormatType format_type = time_format_type::TimeFormatType::None; + time_format_type::FormatImplVariant format_type; }; Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { @@ -85,7 +86,7 @@ class FunctionDateTimeStringToString : public IFunction { if (context->get_num_args() == 1) { // default argument state->format_str = "yyyy-MM-dd HH:mm:ss"; - state->format_type = time_format_type::TimeFormatType::yyyy_MM_dd_HH_mm_ss; + state->format_type = time_format_type::yyyy_MM_dd_HH_mm_ssImpl {}; return IFunction::open(context, scope); } @@ -107,7 +108,7 @@ class FunctionDateTimeStringToString : public IFunction { // Preprocess special format strings. state->format_str = format_str; - state->format_type = time_format_type::string_to_type(state->format_str); + state->format_type = time_format_type::string_to_impl(state->format_str); return IFunction::open(context, scope); } @@ -171,47 +172,18 @@ class FunctionDateTimeStringToString : public IFunction { return Status::OK(); } - auto execute_for_format_type = [&]() { - size_t offset = 0; - for (int i = 0; i < len; ++i) { - null_map[i] = Transform::template execute(ts[i], format, res_data, offset, - context->state()->timezone_obj()); - res_offsets[i] = offset; - } - res_data.resize(offset); - }; - - switch (format_state->format_type) { - case time_format_type::TimeFormatType::None: { - execute_for_format_type.template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyyMMdd: { - execute_for_format_type.template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyy_MM_dd: { - execute_for_format_type.template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyy_MM_dd_HH_mm_ss: { - execute_for_format_type - .template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyy_MM: { - execute_for_format_type.template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyyMM: { - execute_for_format_type.template operator()(); - break; - } - case time_format_type::TimeFormatType::yyyy: { - execute_for_format_type.template operator()(); - break; - } - } + std::visit( + [&](auto type) { + using Impl = decltype(type); + size_t offset = 0; + for (int i = 0; i < len; ++i) { + null_map[i] = Transform::template execute( + ts[i], format, res_data, offset, context->state()->timezone_obj()); + res_offsets[i] = offset; + } + res_data.resize(offset); + }, + format_state->format_type); return Status::OK(); } }; From b40e4f69549cde64ebea04e6a07ecc1a0b480d34 Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Sat, 14 Sep 2024 07:48:17 +0800 Subject: [PATCH 4/5] upd --- be/src/vec/functions/date_format_type.h | 62 ++++--------------- be/src/vec/functions/date_time_transforms.h | 4 +- .../function_datetime_string_to_string.h | 8 +-- 3 files changed, 16 insertions(+), 58 deletions(-) diff --git a/be/src/vec/functions/date_format_type.h b/be/src/vec/functions/date_format_type.h index 40e9df94a5408a..bfaf4d5715f6a0 100644 --- a/be/src/vec/functions/date_format_type.h +++ b/be/src/vec/functions/date_format_type.h @@ -24,48 +24,6 @@ namespace doris::vectorized { namespace time_format_type { -inline StringRef rewrite_specific_format(const char* raw_str, size_t str_size) { - const static std::string specific_format_strs[3] = {"%Y%m%d", "%Y-%m-%d", "%Y-%m-%d %H:%i:%s"}; - const static std::string specific_format_rewrite[3] = {"yyyyMMdd", "yyyy-MM-dd", - "yyyy-MM-dd HH:mm:ss"}; - for (int i = 0; i < 3; i++) { - const StringRef specific_format {specific_format_strs[i].data(), - specific_format_strs[i].size()}; - if (specific_format == StringRef {raw_str, str_size}) { - return {specific_format_rewrite[i].data(), specific_format_rewrite[i].size()}; - } - } - return {raw_str, str_size}; -} - -enum class TimeFormatType { - None = 0, - yyyyMMdd, - yyyy_MM_dd, - yyyy_MM_dd_HH_mm_ss, - yyyy_MM, - yyyyMM, - yyyy -}; - -inline TimeFormatType string_to_type(const std::string& format) { - if (format == "yyyyMMdd") { - return TimeFormatType::yyyyMMdd; - } else if (format == "yyyy-MM-dd") { - return TimeFormatType::yyyy_MM_dd; - } else if (format == "yyyy-MM-dd HH:mm:ss") { - return TimeFormatType::yyyy_MM_dd_HH_mm_ss; - } else if (format == "yyyy-MM") { - return TimeFormatType::yyyy_MM; - } else if (format == "yyyyMM") { - return TimeFormatType::yyyyMM; - } else if (format == "yyyy") { - return TimeFormatType::yyyy; - } else { - return TimeFormatType::None; - } -} - template void inline put_year(T y, char* buf, int& i) { int t = y / 100; @@ -87,7 +45,7 @@ struct NoneImpl {}; struct yyyyMMddImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); put_other(date_value.month(), buf, i); @@ -98,7 +56,7 @@ struct yyyyMMddImpl { struct yyyy_MM_ddImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); buf[i++] = '-'; @@ -111,7 +69,7 @@ struct yyyy_MM_ddImpl { struct yyyy_MM_dd_HH_mm_ssImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); buf[i++] = '-'; @@ -130,7 +88,7 @@ struct yyyy_MM_dd_HH_mm_ssImpl { struct yyyy_MMImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); buf[i++] = '-'; @@ -140,7 +98,7 @@ struct yyyy_MMImpl { }; struct yyyyMMImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); put_other(date_value.month(), buf, i); @@ -150,7 +108,7 @@ struct yyyyMMImpl { struct yyyyImpl { template - size_t static apply(const DateType& date_value, char* buf) { + size_t static date_to_str(const DateType& date_value, char* buf) { int i = 0; put_year(date_value.year(), buf, i); return i; @@ -160,12 +118,14 @@ struct yyyyImpl { using FormatImplVariant = std::variant; +const static std::string default_format = "yyyy-MM-dd HH:mm:ss"; +const static auto default_impl = yyyy_MM_dd_HH_mm_ssImpl {}; inline FormatImplVariant string_to_impl(const std::string& format) { - if (format == "yyyyMMdd") { + if (format == "yyyyMMdd" || format == "%Y%m%d") { return yyyyMMddImpl {}; - } else if (format == "yyyy-MM-dd") { + } else if (format == "yyyy-MM-dd" || format == "%Y-%m-%d") { return yyyy_MM_ddImpl {}; - } else if (format == "yyyy-MM-dd HH:mm:ss") { + } else if (format == "yyyy-MM-dd HH:mm:ss" || format == "%Y-%m-%d %H:%i:%s") { return yyyy_MM_dd_HH_mm_ssImpl {}; } else if (format == "yyyy-MM") { return yyyy_MMImpl {}; diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 45e921e4cbf336..98c9ee67a3f64c 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -213,7 +213,7 @@ struct DateFormatImpl { // No buffer is needed here because these specially optimized formats have fixed lengths, // and sufficient memory has already been reserved. - auto len = Impl::apply(dt, (char*)res_data.data() + offset); + auto len = Impl::date_to_str(dt, (char*)res_data.data() + offset); offset += len; return false; @@ -268,7 +268,7 @@ struct FromUnixTimeImpl { // No buffer is needed here because these specially optimized formats have fixed lengths, // and sufficient memory has already been reserved. - auto len = Impl::apply(dt, (char*)res_data.data() + offset); + auto len = Impl::date_to_str(dt, (char*)res_data.data() + offset); offset += len; return false; diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index 595bb4de30972f..3ba769ee049081 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -85,8 +85,8 @@ class FunctionDateTimeStringToString : public IFunction { context->set_function_state(scope, state); if (context->get_num_args() == 1) { // default argument - state->format_str = "yyyy-MM-dd HH:mm:ss"; - state->format_type = time_format_type::yyyy_MM_dd_HH_mm_ssImpl {}; + state->format_str = time_format_type::default_format; + state->format_type = time_format_type::default_impl; return IFunction::open(context, scope); } @@ -97,9 +97,7 @@ class FunctionDateTimeStringToString : public IFunction { return IFunction::open(context, scope); } - auto string_vale = column_string->column_ptr->get_data_at(0).trim(); - auto format_str = - time_format_type::rewrite_specific_format(string_vale.data, string_vale.size); + auto format_str = column_string->column_ptr->get_data_at(0).trim(); if (format_str.size > 128) { // exceeds the length limit. state->is_valid = false; From 33bdf78800d4e5bef5b417a6f76aea811c61fc0d Mon Sep 17 00:00:00 2001 From: Mryange <2319153948@qq.com> Date: Sat, 14 Sep 2024 15:47:02 +0800 Subject: [PATCH 5/5] fix --- be/src/vec/functions/date_format_type.h | 27 ++++++++++++++----- be/src/vec/functions/date_time_transforms.h | 5 +--- .../function_datetime_string_to_string.h | 22 +++++++++++---- .../datetime_functions/test_date_function.out | 15 +++++++++++ .../test_date_function.groovy | 9 ++++++- 5 files changed, 61 insertions(+), 17 deletions(-) diff --git a/be/src/vec/functions/date_format_type.h b/be/src/vec/functions/date_format_type.h index bfaf4d5715f6a0..071ecf44853e1d 100644 --- a/be/src/vec/functions/date_format_type.h +++ b/be/src/vec/functions/date_format_type.h @@ -20,12 +20,26 @@ #include #include "vec/common/string_ref.h" -namespace doris::vectorized { -namespace time_format_type { +namespace doris::vectorized::time_format_type { +// Used to optimize commonly used date formats. + +inline StringRef rewrite_specific_format(const char* raw_str, size_t str_size) { + const static std::string specific_format_strs[3] = {"%Y%m%d", "%Y-%m-%d", "%Y-%m-%d %H:%i:%s"}; + const static std::string specific_format_rewrite[3] = {"yyyyMMdd", "yyyy-MM-dd", + "yyyy-MM-dd HH:mm:ss"}; + for (int i = 0; i < 3; i++) { + const StringRef specific_format {specific_format_strs[i].data(), + specific_format_strs[i].size()}; + if (specific_format == StringRef {raw_str, str_size}) { + return {specific_format_rewrite[i].data(), specific_format_rewrite[i].size()}; + } + } + return {raw_str, str_size}; +} template -void inline put_year(T y, char* buf, int& i) { +void put_year(T y, char* buf, int& i) { int t = y / 100; buf[i++] = t / 10 + '0'; buf[i++] = t % 10 + '0'; @@ -36,11 +50,12 @@ void inline put_year(T y, char* buf, int& i) { } template -void inline put_other(T m, char* buf, int& i) { +void put_other(T m, char* buf, int& i) { buf[i++] = m / 10 + '0'; buf[i++] = m % 10 + '0'; } +// NoneImpl indicates that no specific optimization has been applied, and the general logic is used for processing. struct NoneImpl {}; struct yyyyMMddImpl { @@ -138,6 +153,4 @@ inline FormatImplVariant string_to_impl(const std::string& format) { } } -} // namespace time_format_type - -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized::time_format_type diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 98c9ee67a3f64c..84824d74ff1208 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -191,9 +191,6 @@ struct DateFormatImpl { if constexpr (std::is_same_v) { // Handle non-special formats. const auto& dt = (DateType&)t; - if (format.size > 128) { - return true; - } char buf[100 + SAFE_FORMAT_STRING_MARGIN]; if (!dt.to_format_string_conservative(format.data, format.size, buf, 100 + SAFE_FORMAT_STRING_MARGIN)) { @@ -239,7 +236,7 @@ struct FromUnixTimeImpl { size_t& offset, const cctz::time_zone& time_zone) { if constexpr (std::is_same_v) { DateType dt; - if (format.size > 128 || val < 0 || val > TIMESTAMP_VALID_MAX) { + if (val < 0 || val > TIMESTAMP_VALID_MAX) { return true; } dt.from_unixtime(val, time_zone); diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index 3ba769ee049081..80fe6cf1f4174b 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -91,13 +91,22 @@ class FunctionDateTimeStringToString : public IFunction { } const auto* column_string = context->get_constant_col(1); + if (column_string == nullptr) { + return Status::InvalidArgument( + "The second parameter of the function {} must be a constant.", get_name()); + } + + auto string_vale = column_string->column_ptr->get_data_at(0); + if (string_vale.data == nullptr) { // func(col , null); state->is_valid = false; return IFunction::open(context, scope); } - auto format_str = column_string->column_ptr->get_data_at(0).trim(); + string_vale = string_vale.trim(); + auto format_str = + time_format_type::rewrite_specific_format(string_vale.data, string_vale.size); if (format_str.size > 128) { // exceeds the length limit. state->is_valid = false; @@ -136,6 +145,7 @@ class FunctionDateTimeStringToString : public IFunction { col_res->get_offsets(), vec_null_map_to)); if (nullable_column) { + // input column is nullable const auto& origin_null_map = nullable_column->get_null_map_column().get_data(); for (int i = 0; i < origin_null_map.size(); ++i) { vec_null_map_to[i] |= origin_null_map[i]; @@ -161,14 +171,16 @@ class FunctionDateTimeStringToString : public IFunction { StringRef format(format_state->format_str); - auto len = ts.size(); - res_offsets.resize(len); - res_data.reserve(len * format.size + len); - null_map.resize_fill(len, false); + const auto len = ts.size(); if (!format_state->is_valid) { + res_offsets.resize_fill(len, 0); + null_map.resize_fill(len, true); return Status::OK(); } + res_offsets.resize(len); + res_data.reserve(len * format.size + len); + null_map.resize_fill(len, false); std::visit( [&](auto type) { diff --git a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out index 8c256e42d57f0d..2aef8a1257aeb5 100644 --- a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_date_function.out @@ -147,6 +147,12 @@ -- !sql -- 2009-10-04 +-- !sql_date_format_long -- +\N + +-- !sql_date_format_long -- +\N + -- !sql -- 2008-11-30T23:59:59 @@ -476,6 +482,12 @@ February -- !sql -- 1 2022-08-01 17:00:31 +-- !sql -- +1 \N + +-- !sql -- +1 \N + -- !sql -- true @@ -494,6 +506,9 @@ true -- !sql_date_format_long -- \N +-- !sql_date_format_long -- +\N + -- !sql -- \N diff --git a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy index 0a986f249e563e..ae7489978397c7 100644 --- a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -248,6 +248,9 @@ suite("test_date_function") { sql """ truncate table ${tableName} """ sql """ insert into ${tableName} values ("2009-10-04 22:23:00") """ qt_sql """ select date_format(test_datetime, 'yyyy-MM-dd') from ${tableName}; """ + qt_sql_date_format_long """ select date_format(test_datetime, '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') from ${tableName};""" + qt_sql_date_format_long """ select date_format(non_nullable(test_datetime), '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') from ${tableName};""" + sql """ truncate table ${tableName} """ sql """ insert into ${tableName} values ("2010-11-30 23:59:59") """ @@ -465,7 +468,9 @@ suite("test_date_function") { qt_sql """ SELECT id,FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") FROM ${tableName} WHERE FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") <= '2022-08-01 00:00:00' ORDER BY id; """ qt_sql """ SELECT id,FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") FROM ${tableName} WHERE FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") LIKE '2022-08-01 00:00:00' ORDER BY id; """ qt_sql """ SELECT id,FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") FROM ${tableName} WHERE FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") = '2022-08-01 17:00:31' ORDER BY id; """ - + qt_sql """ SELECT id,FROM_UNIXTIME(update_time,null) FROM ${tableName} WHERE FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") = '2022-08-01 17:00:31' ORDER BY id; """ + qt_sql """ SELECT id,FROM_UNIXTIME(update_time,'%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') FROM ${tableName} WHERE FROM_UNIXTIME(update_time,"%Y-%m-%d %H:%i:%s") = '2022-08-01 17:00:31' ORDER BY id; """ + qt_sql """SELECT CURDATE() = CURRENT_DATE();""" qt_sql """SELECT unix_timestamp(CURDATE()) = unix_timestamp(CURRENT_DATE());""" @@ -475,6 +480,8 @@ suite("test_date_function") { qt_sql """ select date_format('2025-01-01', '%X %V'); """ qt_sql """ select date_format('2022-08-04', '%X %V %w'); """ qt_sql_date_format_long """ select date_format(cast('2011-06-24' as DATETIMEV2(0)), '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') """ + qt_sql_date_format_long """ select date_format(null, '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') """ + qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %H:%i:%s %Y'); """ qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %T CST %Y'); """ qt_sql """ select STR_TO_DATE('2018-4-2 15:3:28','%Y-%m-%d %H:%i:%s'); """