From d514b340a6eff530356c1d7a7d8467f811fbb909 Mon Sep 17 00:00:00 2001 From: linzhenqi Date: Tue, 2 Dec 2025 09:44:20 +0800 Subject: [PATCH 1/3] [Feature](function) Support function TIME_FORMAT --- be/src/vec/functions/date_time_transforms.h | 135 +---------- .../function_datetime_string_to_string.cpp | 6 + be/src/vec/runtime/time_value.h | 229 ++++++++++++++++++ .../doris/catalog/BuiltinScalarFunctions.java | 2 + .../DateTimeExtractAndTransform.java | 42 ++++ .../functions/scalar/TimeFormat.java | 109 +++++++++ .../visitor/ScalarFunctionVisitor.java | 5 + .../nereids/util/DateTimeFormatterUtils.java | 130 ++++++++++ .../sql-functions/doc_date_functions_test.out | 103 ++++++++ .../doc_date_functions_test.groovy | 161 +++++++++++- 10 files changed, 788 insertions(+), 134 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index b0e73b4aa7f9df..83598ba17b1b6a 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -31,6 +31,7 @@ #include "udf/udf.h" #include "util/binary_cast.hpp" #include "vec/columns/column_decimal.h" +#include "vec/columns/column.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" @@ -43,6 +44,7 @@ #include "vec/data_types/data_type_decimal.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/date_format_type.h" +#include "vec/runtime/time_value.h" #include "vec/runtime/vdatetime_value.h" #include "vec/utils/util.hpp" @@ -431,139 +433,6 @@ struct FromUnixTimeDecimalImpl { } }; -// Base template for optimized time field(HOUR, MINUTE, SECOND, MS) extraction from Unix timestamp -// Uses lookup_offset to avoid expensive civil_second construction -template -class FunctionTimeFieldFromUnixtime : public IFunction { -public: - static constexpr auto name = Impl::name; - static FunctionPtr create() { return std::make_shared>(); } - - String get_name() const override { return name; } - - size_t get_number_of_arguments() const override { return 1; } - - DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { - // microsecond_from_unixtime returns Int32, others (hour/minute/second) return Int8 - if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { - return make_nullable(std::make_shared()); - } else { - return make_nullable(std::make_shared()); - } - } - - // (UTC 9999-12-31 23:59:59) - 24 * 3600 - static const int64_t TIMESTAMP_VALID_MAX = 253402243199L; - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - uint32_t result, size_t input_rows_count) const override { - using ArgColType = PrimitiveTypeTraits::ColumnType; - using ResColType = std::conditional_t; - using ResItemType = typename ResColType::value_type; - auto res = ResColType::create(); - - const auto* ts_col = - assert_cast(block.get_by_position(arguments[0]).column.get()); - if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { - // microsecond_from_unixtime only - const auto scale = static_cast(ts_col->get_scale()); - - for (int i = 0; i < input_rows_count; ++i) { - const auto seconds = ts_col->get_intergral_part(i); - const auto fraction = ts_col->get_fractional_part(i); - - if (seconds < 0 || seconds > TIMESTAMP_VALID_MAX) { - return Status::InvalidArgument( - "The input value of TimeFiled(from_unixtime()) must between 0 and " - "253402243199L"); - } - - ResItemType value = Impl::extract_field(fraction, scale); - res->insert_value(value); - } - } else { - auto ctz = context->state()->timezone_obj(); - for (int i = 0; i < input_rows_count; ++i) { - auto date = ts_col->get_element(i); - - if (date < 0 || date > TIMESTAMP_VALID_MAX) { - return Status::InvalidArgument( - "The input value of TimeFiled(from_unixtime()) must between 0 and " - "253402243199L"); - } - - ResItemType value = Impl::extract_field(date, ctz); - res->insert_value(value); - } - } - block.replace_by_position(result, std::move(res)); - return Status::OK(); - } -}; - -struct HourFromUnixtimeImpl { - static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; - static constexpr auto name = "hour_from_unixtime"; - - static int8_t extract_field(int64_t local_time, const cctz::time_zone& ctz) { - static const auto epoch = std::chrono::time_point_cast( - std::chrono::system_clock::from_time_t(0)); - cctz::time_point t = epoch + cctz::seconds(local_time); - int offset = ctz.lookup_offset(t).offset; - local_time += offset; - - static const libdivide::divider fast_div_3600(3600); - static const libdivide::divider fast_div_86400(86400); - - int64_t remainder; - if (LIKELY(local_time >= 0)) { - remainder = local_time - local_time / fast_div_86400 * 86400; - } else { - remainder = local_time % 86400; - if (remainder < 0) { - remainder += 86400; - } - } - return static_cast(remainder / fast_div_3600); - } -}; - -struct MinuteFromUnixtimeImpl { - static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; - static constexpr auto name = "minute_from_unixtime"; - - static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { - static const libdivide::divider fast_div_60(60); - static const libdivide::divider fast_div_3600(3600); - - local_time = local_time - local_time / fast_div_3600 * 3600; - - return static_cast(local_time / fast_div_60); - } -}; - -struct SecondFromUnixtimeImpl { - static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; - static constexpr auto name = "second_from_unixtime"; - - static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { - return static_cast(local_time % 60); - } -}; - -struct MicrosecondFromUnixtimeImpl { - static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_DECIMAL64; - static constexpr auto name = "microsecond_from_unixtime"; - - static int32_t extract_field(int64_t fraction, int scale) { - if (scale < 6) { - fraction *= common::exp10_i64(6 - scale); - } - return static_cast(fraction); - } -}; - #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_datetime_string_to_string.cpp b/be/src/vec/functions/function_datetime_string_to_string.cpp index 53a472f31795c5..72fc5edd5a77bc 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.cpp +++ b/be/src/vec/functions/function_datetime_string_to_string.cpp @@ -37,6 +37,9 @@ using FunctionFromUnixTimeNewDecimalOneArg = FunctionDateTimeStringToString>; using FunctionFromUnixTimeNewDecimalTwoArg = FunctionDateTimeStringToString>; +using FunctionTimeFormatDate = FunctionTimeFormat; +using FunctionTimeFormatDateTime = FunctionTimeFormat; +using FunctionTimeFormatTime = FunctionTimeFormat; void register_function_date_time_string_to_string(SimpleFunctionFactory& factory) { factory.register_function(); @@ -47,6 +50,9 @@ void register_function_date_time_string_to_string(SimpleFunctionFactory& factory factory.register_function(); factory.register_function(); factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); } } // namespace doris::vectorized diff --git a/be/src/vec/runtime/time_value.h b/be/src/vec/runtime/time_value.h index 10c1a12bb03161..ebe20a9e680a85 100644 --- a/be/src/vec/runtime/time_value.h +++ b/be/src/vec/runtime/time_value.h @@ -28,6 +28,7 @@ #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" #include "util/date_func.h" +#include "vec/runtime/vdatetime_value.h" namespace doris { #include "common/compile_check_begin.h" @@ -150,6 +151,234 @@ class TimeValue { } static bool valid(double time) { return time <= MAX_TIME && time >= -MAX_TIME; } + + static bool to_format_string_conservative(const char* format, size_t len, char* to, + size_t max_valid_length, TimeType time) { + // If time is negative, we here only add a '-' to the begining of res + // This behavior is consistent with MySQL + if (time < 0) { + memcpy(to, "-", 1); + ++to; + time = -time; + } + + int32_t hour = TimeValue::hour(time); + int32_t minute = TimeValue::minute(time); + int32_t second = TimeValue::second(time); + int32_t microsecond = TimeValue::microsecond(time); + + char* const begin = to; + char buf[64]; + char* pos = nullptr; + char* cursor = buf; + const char* ptr = format; + const char* end = format + len; + char ch = '\0'; + + while (ptr < end) { + if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] { + return false; + } + if (*ptr != '%' || (ptr + 1) == end) { + *to++ = *ptr++; + continue; + } + ptr++; + switch (ch = *ptr++) { + case 'H': + // Hour (00..838 for TIME type, with at least 2 digits) + if (hour < 100) { + to = write_two_digits_to_string(hour, to); + } else { + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); + } + break; + case 'h': + case 'I': + // Hour (01..12) + to = write_two_digits_to_string((hour % 24 + 11) % 12 + 1, to); + break; + case 'i': + // Minutes, numeric (00..59) + to = write_two_digits_to_string(minute, to); + break; + case 'k': + // Hour (0..23) without leading zero + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 1, to); + break; + case 'l': + // Hour (1..12) without leading zero + pos = int_to_str((hour % 24 + 11) % 12 + 1, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 1, to); + break; + case 's': + case 'S': + // Seconds (00..59) + to = write_two_digits_to_string(second, to); + break; + case 'f': + // Microseconds (000000..999999) + pos = int_to_str(microsecond, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 6, to); + break; + case 'p': { + // AM or PM + if (hour % 24 >= 12) { + to = append_string("PM", to); + } else { + to = append_string("AM", to); + } + break; + } + case 'r': { + // Time, 12-hour (hh:mm:ss followed by AM or PM) + int32_t hour_12 = (hour + 11) % 12 + 1; + *to++ = (char)('0' + (hour_12 / 10)); + *to++ = (char)('0' + (hour_12 % 10)); + *to++ = ':'; + *to++ = (char)('0' + (minute / 10)); + *to++ = (char)('0' + (minute % 10)); + *to++ = ':'; + *to++ = (char)('0' + (second / 10)); + *to++ = (char)('0' + (second % 10)); + if (hour % 24 >= 12) { + to = append_string(" PM", to); + } else { + to = append_string(" AM", to); + } + break; + } + case 'T': { + // Time, 24-hour (hh:mm:ss or hhh:mm:ss for TIME type) + if (hour < 100) { + *to++ = (char)('0' + (hour / 10)); + *to++ = (char)('0' + (hour % 10)); + } else { + // For hours >= 100, convert to string with at least 2 digits + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); + } + *to++ = ':'; + *to++ = (char)('0' + (minute / 10)); + *to++ = (char)('0' + (minute % 10)); + *to++ = ':'; + *to++ = (char)('0' + (second / 10)); + *to++ = (char)('0' + (second % 10)); + break; + } + case '%': + *to++ = '%'; + break; + case 'Y': + // Year, 4 digits - 4 zeros + to = append_string("0000", to); + break; + case 'y': + case 'm': + case 'd': + // Year (2 digits), Month, Day - insert 2 zeros + to = write_two_digits_to_string(0, to); + break; + case 'c': + case 'e': + // Month (0..12) or Day without leading zero - insert 1 zero + to = append_string("0", to); + break; + case 'M': + case 'W': + case 'j': + case 'D': + case 'U': + case 'u': + case 'V': + case 'v': + case 'x': + case 'X': + case 'w': + // These specifiers are not supported for TIME type + return false; + default: + *to++ = ch; + break; + } + } + *to++ = '\0'; + return true; + } + +private: + static constexpr char digits100[201] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + + static char* int_to_str(uint64_t val, char* to) { + char buf[64]; + char* ptr = buf; + // Use do/while for 0 value + do { + *ptr++ = '0' + (val % 10); + val /= 10; + } while (val); + + while (ptr > buf) { + *to++ = *--ptr; + } + return to; + } + + static char* append_string(const char* from, char* to) { + while (*from) { + *to++ = *from++; + } + return to; + } + + static char* append_with_prefix(const char* str, int str_len, char prefix, int target_len, + char* to) { + // full_len is the lower bound. if less, use prefix to pad. if greater, accept all. + int diff = target_len - str_len; + // use prefix to pad + while (diff-- > 0) { + *to++ = prefix; + } + + memcpy(to, str, str_len); + return to + str_len; + } + + static char* write_two_digits_to_string(int number, char* dst) { + memcpy(dst, &digits100[number * 2], 2); + return dst + 2; + } + + static bool is_date_related_specifier(char spec) { + switch (spec) { + case 'Y': + case 'y': + case 'M': + case 'm': + case 'b': + case 'c': + case 'd': + case 'D': + case 'e': + case 'j': + case 'U': + return true; + default: + return false; + } + } }; } // namespace doris #include "common/compile_check_end.h" diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index c8c6bde4f94533..a32b7d19fb3453 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -489,6 +489,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh; import org.apache.doris.nereids.trees.expressions.functions.scalar.Time; import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeDiff; +import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeFormat; import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeToSec; import org.apache.doris.nereids.trees.expressions.functions.scalar.Timestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64; @@ -1046,6 +1047,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(Tanh.class, "tanh"), scalar(Time.class, "time"), scalar(TimeDiff.class, "timediff"), + scalar(TimeFormat.class, "time_format"), scalar(TimeToSec.class, "time_to_sec"), scalar(Timestamp.class, "timestamp"), scalar(ToBase64.class, "to_base64"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java index d63b935dedc605..cbeb4c34a3a7ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java @@ -44,6 +44,7 @@ import org.apache.doris.nereids.types.DecimalV3Type; import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.TimeV2Type; +import org.apache.doris.nereids.util.DateTimeFormatterUtils; import org.apache.doris.nereids.util.DateUtils; import org.apache.doris.qe.ConnectContext; @@ -306,6 +307,47 @@ public static Expression dateFormat(DateTimeV2Literal date, StringLikeLiteral fo ((int) date.getMicroSecond() * 1000)))); } + /** + * time_format constant folding for time literal. + */ + @ExecFunction(name = "time_format") + public static Expression timeFormat(TimeV2Literal time, StringLikeLiteral format) { + if (StringUtils.trim(format.getValue()).length() > 128) { + throw new AnalysisException("The length of format string in time_format() function should not be greater" + + " than 128."); + } + return new VarcharLiteral(DateTimeFormatterUtils.formatTimeLiteral(time, format.getValue())); + } + + /** + * time_format constant folding for datev2 literal. + */ + @ExecFunction(name = "time_format") + public static Expression timeFormat(DateV2Literal date, StringLikeLiteral format) { + if (StringUtils.trim(format.getValue()).length() > 128) { + throw new AnalysisException("The length of format string in time_format() function should not be greater" + + " than 128."); + } + DateTimeV2Literal dateTime = new DateTimeV2Literal(date.getYear(), date.getMonth(), date.getDay(), + 0, 0, 0, 0); + return timeFormat(dateTime, format); + } + + /** + * time_format constant folding for datetimev2 literal. + */ + @ExecFunction(name = "time_format") + public static Expression timeFormat(DateTimeV2Literal dateTime, StringLikeLiteral format) { + if (StringUtils.trim(format.getValue()).length() > 128) { + throw new AnalysisException("The length of format string in time_format() function should not be greater" + + " than 128."); + } + TimeV2Literal time = new TimeV2Literal((int) dateTime.getHour(), (int) dateTime.getMinute(), + (int) dateTime.getSecond(), (int) dateTime.getMicroSecond(), dateTime.getScale(), + false); + return timeFormat(time, format); + } + /** * datetime arithmetic function date */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java new file mode 100644 index 00000000000000..a2d86d4ee0f2d8 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.Monotonic; +import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; +import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.DateTimeV2Type; +import org.apache.doris.nereids.types.DateV2Type; +import org.apache.doris.nereids.types.TimeV2Type; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.util.DateUtils; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'time_format'. + */ +public class TimeFormat extends ScalarFunction + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable, Monotonic { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(TimeV2Type.WILDCARD, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(DateTimeV2Type.WILDCARD, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(DateV2Type.INSTANCE, VarcharType.SYSTEM_DEFAULT) + ); + + /** + * constructor with 2 arguments. + */ + public TimeFormat(Expression arg0, Expression arg1) { + super("time_format", arg0, arg1); + } + + /** constructor for withChildren and reuse signature */ + private TimeFormat(ScalarFunctionParams functionParams) { + super(functionParams); + } + + /** + * withChildren. + */ + @Override + public TimeFormat withChildren(List children) { + Preconditions.checkArgument(children.size() == 2); + return new TimeFormat(getFunctionParams(children)); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitTimeFormat(this, context); + } + + @Override + public boolean isMonotonic(Literal lower, Literal upper) { + Expression format = child(1); + if (!(format instanceof StringLikeLiteral)) { + return false; + } + String str = ((StringLikeLiteral) format).getValue(); + return DateUtils.monoFormat.contains(str); + } + + @Override + public boolean isPositive() { + return true; + } + + @Override + public int getMonotonicFunctionChildIndex() { + return 0; + } + + @Override + public Expression withConstantArgs(Expression literal) { + return new TimeFormat(literal, child(1)); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index c1db9cbd2888a2..eca03d3214cd7b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -494,6 +494,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Tanh; import org.apache.doris.nereids.trees.expressions.functions.scalar.Time; import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeDiff; +import org.apache.doris.nereids.trees.expressions.functions.scalar.TimeFormat; import org.apache.doris.nereids.trees.expressions.functions.scalar.Timestamp; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64; import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBase64Binary; @@ -2345,6 +2346,10 @@ default R visitTimeDiff(TimeDiff timeDiff, C context) { return visitScalarFunction(timeDiff, context); } + default R visitTimeFormat(TimeFormat timeFormat, C context) { + return visitScalarFunction(timeFormat, context); + } + default R visitTimestamp(Timestamp timestamp, C context) { return visitScalarFunction(timestamp, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java index dd342820343a3d..6f39ef7afbf572 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java @@ -17,6 +17,8 @@ package org.apache.doris.nereids.util; +import org.apache.doris.nereids.trees.expressions.literal.TimeV2Literal; + import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.format.ResolverStyle; @@ -106,4 +108,132 @@ public class DateTimeFormatterUtils { .append(TIME_FORMATTER) .append(ZONE_FORMATTER) .toFormatter().withResolverStyle(ResolverStyle.STRICT); + + /** + * Format TimeV2 literal according to MySQL time_format spec. + */ + public static String formatTimeLiteral(TimeV2Literal time, String pattern) { + double value = (double) time.getValue(); + int hour = Math.abs(time.getHour()); + int minute = Math.abs(time.getMinute()); + int second = Math.abs(time.getSecond()); + int microsecond = Math.abs(time.getMicroSecond()); + + StringBuilder builder = new StringBuilder(pattern.length() + 8); + if (value < 0) { + builder.append('-'); + } + + for (int i = 0; i < pattern.length(); i++) { + char c = pattern.charAt(i); + if (c != '%' || i == pattern.length() - 1) { + builder.append(c); + continue; + } + char spec = pattern.charAt(++i); + switch (spec) { + case 'H': + if (hour < 100) { + appendTwoDigits(builder, hour); + } else { + appendWithPad(builder, hour, 2, '0'); + } + break; + case 'h': + case 'I': { + int hour12 = (hour % 24 + 11) % 12 + 1; + appendTwoDigits(builder, hour12); + break; + } + case 'i': + appendTwoDigits(builder, minute); + break; + case 'k': + appendWithPad(builder, hour, 1, '0'); + break; + case 'l': { + int hour12 = (hour % 24 + 11) % 12 + 1; + appendWithPad(builder, hour12, 1, '0'); + break; + } + case 's': + case 'S': + appendTwoDigits(builder, second); + break; + case 'f': + appendWithPad(builder, microsecond, 6, '0'); + break; + case 'p': + builder.append((hour % 24 >= 12) ? "PM" : "AM"); + break; + case 'r': { + int hour12 = (hour % 24 + 11) % 12 + 1; + appendTwoDigits(builder, hour12); + builder.append(':'); + appendTwoDigits(builder, minute); + builder.append(':'); + appendTwoDigits(builder, second); + builder.append(' '); + builder.append((hour % 24 >= 12) ? "PM" : "AM"); + break; + } + case 'T': + if (hour < 100) { + appendTwoDigits(builder, hour); + } else { + appendWithPad(builder, hour, 2, '0'); + } + builder.append(':'); + appendTwoDigits(builder, minute); + builder.append(':'); + appendTwoDigits(builder, second); + break; + case 'Y': + // Year, 4 digits + builder.append("0000"); + break; + case 'y': + case 'm': + case 'd': + // Year (2 digits), Month, Day - insert 2 zeros + builder.append("00"); + break; + case 'c': + case 'e': + // Month (0..12) or Day without leading zero - insert 1 zero + builder.append('0'); + break; + case 'M': + case 'W': + case 'j': + case 'D': + case 'U': + case 'u': + case 'V': + case 'v': + case 'x': + case 'X': + case 'w': + // These specifiers are not supported for TIME type + return null; + default: + builder.append(spec); + break; + } + } + return builder.toString(); + } + + private static void appendTwoDigits(StringBuilder builder, int value) { + builder.append((char) ('0' + (value / 10) % 10)); + builder.append((char) ('0' + (value % 10))); + } + + private static void appendWithPad(StringBuilder builder, int value, int targetLength, char padChar) { + String str = Integer.toString(Math.abs(value)); + for (int i = str.length(); i < targetLength; i++) { + builder.append(padChar); + } + builder.append(str); + } } diff --git a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out index 25c5b1672aba9b..0c41e96aa35b3f 100644 --- a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out +++ b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out @@ -2065,6 +2065,109 @@ da fanadur -- !to_seconds_12 -- 63902953845 +-- !time_format_1 -- +1 00:00:00 00 0 12 12 12 00 00 00 000000 AM 12:00:00 AM 00:00:00 00:00:00.000000 0 00 12 12 12 00 000000 00 AM 00:00:00 12:00:00 AM 12:12 12 0 12 00 12 AM 000000 00 00 00:00:00 12:00:00 AM +2 00:00:00.123456 00 0 12 12 12 00 00 00 123456 AM 12:00:00 AM 00:00:00 00:00:00.123456 0 00 12 12 12 00 123456 00 AM 00:00:00 12:00:00 AM 12:12 12 0 12 00 12 AM 123456 00 00 00:00:00 12:00:00 AM +3 12:34:56 12 12 12 12 12 34 56 56 000000 PM 12:34:56 PM 12:34:56 12:34:56.000000 12 12 12 12 12 56 000000 34 PM 12:34:56 12:34:56 PM 12:12 12 12 12 12 12 PM 000000 56 34 12:34:56 12:34:56 PM +4 12:34:56.789012 12 12 12 12 12 34 56 56 789012 PM 12:34:56 PM 12:34:56 12:34:56.789012 12 12 12 12 12 56 789012 34 PM 12:34:56 12:34:56 PM 12:12 12 12 12 12 12 PM 789012 56 34 12:34:56 12:34:56 PM +5 23:59:59 23 23 11 11 11 59 59 59 000000 PM 11:59:59 PM 23:59:59 23:59:59.000000 23 23 11 11 11 59 000000 59 PM 23:59:59 11:59:59 PM 11:11 11 23 11 23 11 PM 000000 59 59 23:59:59 11:59:59 PM +6 23:59:59.999999 23 23 11 11 11 59 59 59 999999 PM 11:59:59 PM 23:59:59 23:59:59.999999 23 23 11 11 11 59 999999 59 PM 23:59:59 11:59:59 PM 11:11 11 23 11 23 11 PM 999999 59 59 23:59:59 11:59:59 PM +7 08:00:00 08 8 08 08 8 00 00 00 000000 AM 08:00:00 AM 08:00:00 08:00:00.000000 8 08 8 08 08 00 000000 00 AM 08:00:00 08:00:00 AM 08:08 8 8 08 08 08 AM 000000 00 00 08:00:00 08:00:00 AM +8 15:00:00 15 15 03 03 3 00 00 00 000000 PM 03:00:00 PM 15:00:00 15:00:00.000000 15 15 3 03 03 00 000000 00 PM 15:00:00 03:00:00 PM 03:03 3 15 03 15 03 PM 000000 00 00 15:00:00 03:00:00 PM +9 100:00:00 100 100 04 04 4 00 00 00 000000 AM 04:00:00 AM 100:00:00 100:00:00.000000 100 100 4 04 04 00 000000 00 AM 100:00:00 04:00:00 AM 04:04 4 100 04 100 04 AM 000000 00 00 100:00:00 04:00:00 AM +10 123:45:56 123 123 03 03 3 45 56 56 000000 AM 03:45:56 AM 123:45:56 123:45:56.000000 123 123 3 03 03 56 000000 45 AM 123:45:56 03:45:56 AM 03:03 3 123 03 123 03 AM 000000 56 45 123:45:56 03:45:56 AM +11 838:59:59.999999 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N +12 -00:00:01 -00 -0 -12 -12 -12 -00 -01 -01 -000000 -AM -12:00:01 AM -00:00:01 -00:00:01.000000 -0 00 12 12 12 -01 000000 00 AM -00:00:01 12:00:01 AM 12:12 -12 0 12 00 12 AM -000000 01 00 00:00:01 12:00:01 AM +13 -12:34:56.000001 -12 -12 -12 -12 -12 -34 -56 -56 -000001 -PM -12:34:56 PM -12:34:56 -12:34:56.000001 -12 12 12 12 12 -56 000001 34 PM -12:34:56 12:34:56 PM 12:12 -12 12 12 12 12 PM -000001 56 34 12:34:56 12:34:56 PM +14 -838:59:59.999999 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N + +-- !time_format_2 -- +00 0 12 12 12 AM + +-- !time_format_3 -- +00 123456 00 00:00:00 + +-- !time_format_4 -- +12:34:56 PM 12:34:56 12:12 12 + +-- !time_format_5 -- +12 12 12 12 12 PM 56 + +-- !time_format_6 -- +000000 59 59 PM 11:59:59 PM + +-- !time_format_7 -- +23:59:59 11:59:59 PM 23:59:59.999999 + +-- !time_format_8 -- +8 8 08 08 08 AM 000000 + +-- !time_format_9 -- +00 00 000000 15:00:00 03:00:00 PM PM + +-- !time_format_10 -- +\N + +-- !time_format_11 -- +\N + +-- !time_format_12 -- +\N + +-- !time_format_13 -- +00:00:01 00 12:00:01 AM 01 000000 12:12 + +-- !time_format_14 -- +PM 12 12 12 12 12:34:56 000001 + +-- !time_format_15 -- +\N + +-- !time_format_16 -- +0000-00-00 12:34:56 + +-- !time_format_17 -- +00-00-00 + +-- !time_format_18 -- +0000 00 00 + +-- !time_format_19 -- +0-0 + +-- !time_format_20 -- +0000/00/00 15:45:30.123456 + +-- !time_format_21 -- +\N + +-- !time_format_22 -- +\N + +-- !time_format_23 -- +\N + +-- !time_format_24 -- +\N + +-- !time_format_25 -- +\N + +-- !time_format_26 -- +\N + +-- !time_format_27 -- +\N + +-- !time_format_28 -- +\N + +-- !time_format_29 -- +\N + +-- !time_format_30 -- +\N + -- !dateceil -- 2025-10-10T12:34:56 2026-01-01T00:00 2025-11-01T00:00 2025-10-11T00:00 2025-10-10T13:00 2025-10-10T12:35 2025-10-10T12:34:56 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 diff --git a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy index 41930efff3ba91..5a56ed970e1c4a 100644 --- a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy +++ b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy @@ -1405,6 +1405,86 @@ suite("doc_date_functions_test") { testFoldConst("SELECT YEARWEEK('2023-01-02', 5) AS yearweek_mode5") testFoldConst("SELECT YEARWEEK('2023-12-25', 1) AS date_type_mode1") + //101. TIME_FORMAT function tests + sql """ DROP TABLE IF EXISTS test_time_format; """ + sql """CREATE TABLE test_time_format ( + id INT, + tm VARCHAR(32) + ) DUPLICATE KEY(id) + PROPERTIES ( 'replication_num' = '1' ); + """ + sql """ INSERT INTO test_time_format VALUES + ( 1, '00:00:00'), + ( 2, '00:00:00.123456'), + ( 3, '12:34:56'), + ( 4, '12:34:56.789012'), + ( 5, '23:59:59'), + ( 6, '23:59:59.999999'), + ( 7, '08:00:00'), + ( 8, '15:00:00'), + ( 9, '100:00:00'), + (10, '123:45:56'), + (11, '838:59:59.999999'), + (12, '-00:00:01'), + (13, '-12:34:56.000001'), + (14, '-838:59:59.999999') + """ + qt_time_format_1 """SELECT + id, + tm, + TIME_FORMAT(tm, '%H'), + TIME_FORMAT(tm, '%k'), + TIME_FORMAT(tm, '%h'), + TIME_FORMAT(tm, '%I'), + TIME_FORMAT(tm, '%l'), + TIME_FORMAT(tm, '%i'), + TIME_FORMAT(tm, '%s'), + TIME_FORMAT(tm, '%S'), + TIME_FORMAT(tm, '%f'), + TIME_FORMAT(tm, '%p'), + TIME_FORMAT(tm, '%r'), + TIME_FORMAT(tm, '%T'), + TIME_FORMAT(tm, '%H:%i:%s.%f'), + TIME_FORMAT(tm, '%k %H %l %I %h'), + TIME_FORMAT(tm, '%s %f %i %p'), + TIME_FORMAT(tm, '%T %r %h:%I'), + TIME_FORMAT(tm, '%l %k %I %H %h %p'), + TIME_FORMAT(tm, '%f %s %i %T %r') + FROM test_time_format + ORDER BY id; + """ + qt_time_format_2 """SELECT TIME_FORMAT('2023-01-01 00:00:00', '%H %k %l %I %h %p')""" + qt_time_format_3 """SELECT TIME_FORMAT('2023-01-01 00:00:00.123456', '%s %f %i %T')""" + qt_time_format_4 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%r %T %h:%I %l')""" + qt_time_format_5 """SELECT TIME_FORMAT('2023-01-01 12:34:56.789012', '%k %H %I %l %h %p %s')""" + qt_time_format_6 """SELECT TIME_FORMAT('2023-01-01 23:59:59', '%f %s %i %p %r')""" + qt_time_format_7 """SELECT TIME_FORMAT('2023-01-01 23:59:59.999999', '%T %r %H:%i:%s.%f')""" + qt_time_format_8 """SELECT TIME_FORMAT('2023-01-01 08:00:00', '%l %k %h %I %H %p %f')""" + qt_time_format_9 """SELECT TIME_FORMAT('2023-01-01 15:00:00', '%s %i %f %T %r %p')""" + qt_time_format_10 """SELECT TIME_FORMAT('2023-01-01 100:00:00', '%H %l %I %k %h %s %f')""" + qt_time_format_11 """SELECT TIME_FORMAT('2023-01-01 123:45:56', '%p %r %T %i %s %f %H')""" + qt_time_format_12 """SELECT TIME_FORMAT('2023-01-01 838:59:59.999999', '%k %f %s %I %l %H %p')""" + qt_time_format_13 """SELECT TIME_FORMAT('2023-01-01 00:00:01', '%T %i %r %s %f %h:%I')""" + qt_time_format_14 """SELECT TIME_FORMAT('2023-01-01 12:34:56.000001', '%p %H %k %l %I %T %f')""" + qt_time_format_15 """SELECT TIME_FORMAT('2023-01-01 838:59:59.999999', '%s %i %f %r %p %H:%i:%s')""" + + // Time format with date placeholders (Year, Month, Day return zeros or NULL) + qt_time_format_16 """SELECT TIME_FORMAT('2023-01-01 12:34:56.789012', '%Y-%m-%d %H:%i:%s')""" + qt_time_format_17 """SELECT TIME_FORMAT('2023-01-01 01:02:03.456789', '%y-%m-%d')""" + qt_time_format_18 """SELECT TIME_FORMAT('2023-01-01 23:59:59.999999', '%Y %m %d')""" + qt_time_format_19 """SELECT TIME_FORMAT('2023-01-01 00:00:00', '%c-%e')""" + qt_time_format_20 """SELECT TIME_FORMAT('2023-01-01 15:45:30.123456', '%Y/%m/%d %H:%i:%s.%f')""" + qt_time_format_21 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%M')""" + qt_time_format_22 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%W')""" + qt_time_format_23 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%j')""" + qt_time_format_24 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%D')""" + qt_time_format_25 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%U')""" + qt_time_format_26 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%u')""" + qt_time_format_27 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%V')""" + qt_time_format_28 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%v')""" + qt_time_format_29 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%x')""" + qt_time_format_30 """SELECT TIME_FORMAT('2023-01-01 12:34:56', '%X %w')""" + // TO_SECONDS function tests qt_to_seconds_1 """select to_seconds('2007-10-07')""" qt_to_seconds_2 """select to_seconds('2007-10-07 10:03:09')""" @@ -1431,7 +1511,7 @@ suite("doc_date_functions_test") { testFoldConst("SELECT to_seconds(20250101)") testFoldConst("SELECT to_seconds(20250101123045)") - // Test constant folding for Group 1 functions (基础日期函数) + // Test constant folding for Group 1 functions // 1. CONVERT_TZ function constant folding tests testFoldConst("SELECT CONVERT_TZ(CAST('2019-08-01 13:21:03' AS DATETIME), 'Asia/Shanghai', 'America/Los_Angeles')") @@ -2058,6 +2138,85 @@ suite("doc_date_functions_test") { testFoldConst("SELECT MAKETIME(123, -4, 40)") testFoldConst("SELECT MAKETIME(7, 8, -23)") + // 100. TIME_FORMAT function constant folding tests + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%H') AS zero_24hour") + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%k') AS zero_24hour_no_pad") + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%h') AS zero_12hour") + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%I') AS zero_12hour_alt") + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%l') AS zero_12hour_no_pad") + testFoldConst("SELECT TIME_FORMAT('838:59:59', '%k:%i:%S') AS max_k_format") + testFoldConst("SELECT TIME_FORMAT('838:59:59', '%H.%i.%s.%f') AS max_with_micro_sep") + testFoldConst("SELECT TIME_FORMAT('838:59:59', '%T') AS max_time_T") + testFoldConst("SELECT TIME_FORMAT('838:59:59', '%r') AS max_time_r") + testFoldConst("SELECT TIME_FORMAT('-838:59:59', '%k %i %S') AS min_k_format") + testFoldConst("SELECT TIME_FORMAT('-838:59:59', '%H%i%S%f') AS min_compact") + testFoldConst("SELECT TIME_FORMAT('839:00:00', '%T') AS beyond_max_T") + testFoldConst("SELECT TIME_FORMAT('-839:00:00', '%r') AS beyond_min_r") + testFoldConst("SELECT TIME_FORMAT('12:34:56.123456', '%f') AS only_microseconds") + testFoldConst("SELECT TIME_FORMAT('12:34:56.789012', '%k.%f') AS hour_microsec") + testFoldConst("SELECT TIME_FORMAT('23:59:59.999999', '%T.%f') AS T_format_micro") + testFoldConst("SELECT TIME_FORMAT('00:00:00.000001', '%f only') AS micro_with_text") + testFoldConst("SELECT TIME_FORMAT('13:45:30', '%H vs %k vs %h vs %I vs %l') AS all_hour_formats") + testFoldConst("SELECT TIME_FORMAT('03:07:09', '%H-%k-%h-%I-%l') AS morning_all_formats") + testFoldConst("SELECT TIME_FORMAT('00:30:45', '%H|%k|%h|%I|%l') AS midnight_all_formats") + testFoldConst("SELECT TIME_FORMAT('12:00:00', '%H/%k/%h/%I/%l') AS noon_all_formats") + testFoldConst("SELECT TIME_FORMAT('23:59:59', '%k,%h,%l,%p') AS late_night_formats") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%S') AS uppercase_S") + testFoldConst("SELECT TIME_FORMAT('12:34:09', '%S vs %s') AS both_seconds") + testFoldConst("SELECT TIME_FORMAT('12:34:05', '%k:%i:%S') AS k_i_S") + testFoldConst("SELECT TIME_FORMAT('15:30:45', '%T') AS T_afternoon") + testFoldConst("SELECT TIME_FORMAT('03:07:22', '%T') AS T_morning") + testFoldConst("SELECT TIME_FORMAT('15:30:45', '%r') AS r_afternoon") + testFoldConst("SELECT TIME_FORMAT('03:07:22', '%r') AS r_morning") + testFoldConst("SELECT TIME_FORMAT('00:00:00', '%T vs %r') AS T_vs_r_midnight") + testFoldConst("SELECT TIME_FORMAT('12:00:00', '%T vs %r') AS T_vs_r_noon") + testFoldConst("SELECT TIME_FORMAT('13:45:30', '%p') AS only_pm") + testFoldConst("SELECT TIME_FORMAT('09:15:20', '%p') AS only_am") + testFoldConst("SELECT TIME_FORMAT('23:59:59', '%p at %l:%i') AS pm_natural") + testFoldConst("SELECT TIME_FORMAT('00:30:45', '%p-%l-%i-%S') AS am_dashes") + testFoldConst("SELECT TIME_FORMAT('15:07:22', '%p%p%p') AS triple_pm") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%i') AS only_minutes") + testFoldConst("SELECT TIME_FORMAT('12:05:56', '%i') AS minutes_leading_zero") + testFoldConst("SELECT TIME_FORMAT('12:00:00', '%i:%S') AS min_sec_only") + testFoldConst("SELECT TIME_FORMAT('23:59:59', '%i%S') AS min_sec_compact") + testFoldConst("SELECT TIME_FORMAT('500:30:45', '%k:%i:%S') AS large_k") + testFoldConst("SELECT TIME_FORMAT('700:00:00', '%H-%k') AS large_H_k") + testFoldConst("SELECT TIME_FORMAT('100:15:30', '%T') AS large_T") + testFoldConst("SELECT TIME_FORMAT('838:00:00', '%k only') AS max_hour_k") + testFoldConst("SELECT TIME_FORMAT('-12:34:56', '%k:%i:%S') AS negative_k") + testFoldConst("SELECT TIME_FORMAT('-100:30:45', '%T') AS negative_T") + testFoldConst("SELECT TIME_FORMAT('-05:07:09', '%r') AS negative_r") + testFoldConst("SELECT TIME_FORMAT('-838:59:59', '%H%k%h%I%l') AS negative_max_all") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%%H=%%k') AS percent_escaped") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%% %T %%') AS percent_around_T") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%H\\:%i\\:%s') AS backslash_colon") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '%k-%i-%S-%f') AS all_with_dashes") + testFoldConst("SELECT TIME_FORMAT('12:34:56', '') AS empty_format") + testFoldConst("SELECT TIME_FORMAT('12:34:56', 'no specifiers at all') AS literal_only") + testFoldConst("SELECT TIME_FORMAT('15:45:30', '%k%i%S%f%p%T%r') AS everything_combined") + testFoldConst("SELECT TIME_FORMAT('03:07:09', '%l o clock %i minutes %S seconds %p') AS natural_lang") + testFoldConst("SELECT TIME_FORMAT('23:59:59', '%H=%k, %h=%I=%l, %p') AS hour_comparisons") + testFoldConst("SELECT TIME_FORMAT('12:00:00', 'Noon: %T or %r?') AS noon_question") + testFoldConst("SELECT TIME_FORMAT('00:00:00', 'Midnight: %k|%h|%l %p') AS midnight_formats") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%k:%i:%S') AS datetime_k") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 03:07:22', '%l:%i %p') AS datetime_12h") + testFoldConst("SELECT TIME_FORMAT(NULL, '%T') AS null_time_T") + testFoldConst("SELECT TIME_FORMAT('12:34:56', NULL) AS null_format") + testFoldConst("SELECT TIME_FORMAT(NULL, NULL) AS both_null") + + // TIME_FORMAT with date placeholders constant folding tests + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%Y-%m-%d %H:%i:%s') AS date_with_time") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 08:15:30.123456', '%y/%m/%d %T.%f') AS short_date_format") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 23:59:59', '%Y %m %d') AS year_month_day") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 12:34:56', '%c-%e') AS month_day_no_pad") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 10:20:30.987654', '%Y/%m/%d %H:%i:%s.%f') AS full_datetime") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%M') AS month_name") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%W') AS weekday_name") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%j') AS day_of_year") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%D') AS day_with_suffix") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%U %u') AS week_numbers") + testFoldConst("SELECT TIME_FORMAT('2023-12-25 15:30:45', '%V %v %w') AS week_variants") + // Additional NULL parameter tests for comprehensive coverage // MINUTE functions NULL tests From ce68452ffb998127a61b0d6e1ea461de3db840cb Mon Sep 17 00:00:00 2001 From: linzhenqi Date: Fri, 5 Dec 2025 23:24:20 +0800 Subject: [PATCH 2/3] reuse format logic --- be/src/vec/runtime/time_value.h | 219 +-------- be/src/vec/runtime/vdatetime_value.cpp | 176 +++++--- be/src/vec/runtime/vdatetime_value.h | 7 + .../DateTimeExtractAndTransform.java | 20 +- .../expressions/literal/TimeV2Literal.java | 4 + .../nereids/util/DateTimeFormatterUtils.java | 425 +++++++++++++++--- .../rules/expression/FoldConstantTest.java | 3 +- .../sql-functions/doc_date_functions_test.out | 75 ++-- .../doc_date_functions_test.groovy | 2 + 9 files changed, 548 insertions(+), 383 deletions(-) diff --git a/be/src/vec/runtime/time_value.h b/be/src/vec/runtime/time_value.h index ebe20a9e680a85..09e4c24d927599 100644 --- a/be/src/vec/runtime/time_value.h +++ b/be/src/vec/runtime/time_value.h @@ -162,222 +162,9 @@ class TimeValue { time = -time; } - int32_t hour = TimeValue::hour(time); - int32_t minute = TimeValue::minute(time); - int32_t second = TimeValue::second(time); - int32_t microsecond = TimeValue::microsecond(time); - - char* const begin = to; - char buf[64]; - char* pos = nullptr; - char* cursor = buf; - const char* ptr = format; - const char* end = format + len; - char ch = '\0'; - - while (ptr < end) { - if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] { - return false; - } - if (*ptr != '%' || (ptr + 1) == end) { - *to++ = *ptr++; - continue; - } - ptr++; - switch (ch = *ptr++) { - case 'H': - // Hour (00..838 for TIME type, with at least 2 digits) - if (hour < 100) { - to = write_two_digits_to_string(hour, to); - } else { - pos = int_to_str(hour, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); - } - break; - case 'h': - case 'I': - // Hour (01..12) - to = write_two_digits_to_string((hour % 24 + 11) % 12 + 1, to); - break; - case 'i': - // Minutes, numeric (00..59) - to = write_two_digits_to_string(minute, to); - break; - case 'k': - // Hour (0..23) without leading zero - pos = int_to_str(hour, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 1, to); - break; - case 'l': - // Hour (1..12) without leading zero - pos = int_to_str((hour % 24 + 11) % 12 + 1, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 1, to); - break; - case 's': - case 'S': - // Seconds (00..59) - to = write_two_digits_to_string(second, to); - break; - case 'f': - // Microseconds (000000..999999) - pos = int_to_str(microsecond, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 6, to); - break; - case 'p': { - // AM or PM - if (hour % 24 >= 12) { - to = append_string("PM", to); - } else { - to = append_string("AM", to); - } - break; - } - case 'r': { - // Time, 12-hour (hh:mm:ss followed by AM or PM) - int32_t hour_12 = (hour + 11) % 12 + 1; - *to++ = (char)('0' + (hour_12 / 10)); - *to++ = (char)('0' + (hour_12 % 10)); - *to++ = ':'; - *to++ = (char)('0' + (minute / 10)); - *to++ = (char)('0' + (minute % 10)); - *to++ = ':'; - *to++ = (char)('0' + (second / 10)); - *to++ = (char)('0' + (second % 10)); - if (hour % 24 >= 12) { - to = append_string(" PM", to); - } else { - to = append_string(" AM", to); - } - break; - } - case 'T': { - // Time, 24-hour (hh:mm:ss or hhh:mm:ss for TIME type) - if (hour < 100) { - *to++ = (char)('0' + (hour / 10)); - *to++ = (char)('0' + (hour % 10)); - } else { - // For hours >= 100, convert to string with at least 2 digits - pos = int_to_str(hour, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); - } - *to++ = ':'; - *to++ = (char)('0' + (minute / 10)); - *to++ = (char)('0' + (minute % 10)); - *to++ = ':'; - *to++ = (char)('0' + (second / 10)); - *to++ = (char)('0' + (second % 10)); - break; - } - case '%': - *to++ = '%'; - break; - case 'Y': - // Year, 4 digits - 4 zeros - to = append_string("0000", to); - break; - case 'y': - case 'm': - case 'd': - // Year (2 digits), Month, Day - insert 2 zeros - to = write_two_digits_to_string(0, to); - break; - case 'c': - case 'e': - // Month (0..12) or Day without leading zero - insert 1 zero - to = append_string("0", to); - break; - case 'M': - case 'W': - case 'j': - case 'D': - case 'U': - case 'u': - case 'V': - case 'v': - case 'x': - case 'X': - case 'w': - // These specifiers are not supported for TIME type - return false; - default: - *to++ = ch; - break; - } - } - *to++ = '\0'; - return true; - } - -private: - static constexpr char digits100[201] = - "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - - static char* int_to_str(uint64_t val, char* to) { - char buf[64]; - char* ptr = buf; - // Use do/while for 0 value - do { - *ptr++ = '0' + (val % 10); - val /= 10; - } while (val); - - while (ptr > buf) { - *to++ = *--ptr; - } - return to; - } - - static char* append_string(const char* from, char* to) { - while (*from) { - *to++ = *from++; - } - return to; - } - - static char* append_with_prefix(const char* str, int str_len, char prefix, int target_len, - char* to) { - // full_len is the lower bound. if less, use prefix to pad. if greater, accept all. - int diff = target_len - str_len; - // use prefix to pad - while (diff-- > 0) { - *to++ = prefix; - } - - memcpy(to, str, str_len); - return to + str_len; - } - - static char* write_two_digits_to_string(int number, char* dst) { - memcpy(dst, &digits100[number * 2], 2); - return dst + 2; - } - - static bool is_date_related_specifier(char spec) { - switch (spec) { - case 'Y': - case 'y': - case 'M': - case 'm': - case 'b': - case 'c': - case 'd': - case 'D': - case 'e': - case 'j': - case 'U': - return true; - default: - return false; - } + return DateV2Value::to_format_string_without_check( + format, len, to, max_valid_length, 0, 0, 0, TimeValue::hour(time), + TimeValue::minute(time), TimeValue::second(time), TimeValue::microsecond(time)); } }; } // namespace doris diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 3b77981e3eaac2..f25bd9c268fb7f 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2977,11 +2977,10 @@ void DateV2Value::set_microsecond(uint64_t microsecond) { } template -bool DateV2Value::to_format_string_conservative(const char* format, size_t len, char* to, - size_t max_valid_length) const { - if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { - return false; - } +bool DateV2Value::to_format_string_without_check(const char* format, size_t len, char* to, + size_t max_valid_length, int16_t year, + int8_t month, int8_t day, int hour, int minute, + int second, int ms) { char* const begin = to; // to check written bytes char buf[64]; char* pos = nullptr; @@ -3003,79 +3002,90 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le switch (ch = *ptr++) { case 'y': // Year, numeric (two digits) - to = write_two_digits_to_string(this->year() % 100, to); + to = write_two_digits_to_string(year % 100, to); cursor += 2; pos = cursor; break; case 'Y': // Year, numeric, four digits - to = write_four_digits_to_string(this->year(), to); + to = write_four_digits_to_string(year, to); cursor += 4; pos = cursor; break; case 'd': // Day of month (00...31) - to = write_two_digits_to_string(this->day(), to); + to = write_two_digits_to_string(day, to); cursor += 2; pos = cursor; break; case 'H': - to = write_two_digits_to_string(this->hour(), to); - cursor += 2; + // Hour (00...838) + if (hour < 100) { + to = write_two_digits_to_string(hour, to); + cursor += 2; + } else { + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); + cursor += (pos - cursor); + } pos = cursor; break; case 'i': // Minutes, numeric (00..59) - to = write_two_digits_to_string(this->minute(), to); + to = write_two_digits_to_string(minute, to); cursor += 2; pos = cursor; break; case 'm': - to = write_two_digits_to_string(this->month(), to); + to = write_two_digits_to_string(month, to); cursor += 2; pos = cursor; break; case 'h': case 'I': // Hour (01..12) - to = write_two_digits_to_string((this->hour() % 24 + 11) % 12 + 1, to); + to = write_two_digits_to_string((hour % 24 + 11) % 12 + 1, to); cursor += 2; pos = cursor; break; case 's': case 'S': // Seconds (00..59) - to = write_two_digits_to_string(this->second(), to); + to = write_two_digits_to_string(second, to); cursor += 2; pos = cursor; break; case 'a': // Abbreviated weekday name - if (this->year() == 0 && this->month() == 0) { + if (year == 0 && month == 0) { return false; } - to = append_string(s_ab_day_name[weekday()], to); + to = append_string(s_ab_day_name[calc_weekday(calc_daynr(year, month, day), false)], + to); break; case 'b': // Abbreviated month name - if (this->month() == 0) { + if (month == 0) { return false; } - to = append_string(s_ab_month_name[this->month()], to); + to = append_string(s_ab_month_name[month], to); break; case 'c': // Month, numeric (0...12) - pos = int_to_str(this->month(), cursor); + pos = int_to_str(month, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - pos = int_to_str(this->day(), cursor); + if (month == 0) { + return false; + } + pos = int_to_str(day, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); - if (this->day() >= 10 && this->day() <= 19) { + if (day >= 10 && day <= 19) { to = append_string("th", to); } else { - switch (this->day() % 10) { + switch (day % 10) { case 1: to = append_string("st", to); break; @@ -3093,39 +3103,42 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le break; case 'e': // Day of the month, numeric (0..31) - pos = int_to_str(this->day(), cursor); + pos = int_to_str(day, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'f': // Microseconds (000000..999999) - pos = int_to_str(this->microsecond(), cursor); + pos = int_to_str(ms, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 6, to); break; case 'j': // Day of year (001..366) - pos = int_to_str(daynr() - doris::calc_daynr(this->year(), 1, 1) + 1, cursor); + if (month == 0 || day == 0) { + return false; + } + pos = int_to_str(calc_daynr(year, month, day) - calc_daynr(year, 1, 1) + 1, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 3, to); break; case 'k': - // Hour (0..23) - pos = int_to_str(this->hour(), cursor); + // Hour (0..838) + pos = int_to_str(hour, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'l': // Hour (1..12) - pos = int_to_str((this->hour() % 24 + 11) % 12 + 1, cursor); + pos = int_to_str((hour % 24 + 11) % 12 + 1, cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'M': // Month name (January..December) - if (this->month() == 0) { + if (month == 0) { return false; } - to = append_string(s_month_name[this->month()], to); + to = append_string(s_month_name[month], to); break; case 'p': // AM or PM - if ((this->hour() % 24) >= 12) { + if ((hour % 24) >= 12) { to = append_string("PM", to); } else { to = append_string("AM", to); @@ -3133,17 +3146,17 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le break; case 'r': { // Time, 12-hour (hh:mm:ss followed by AM or PM) - *to++ = (char)('0' + (((this->hour() + 11) % 12 + 1) / 10)); - *to++ = (char)('0' + (((this->hour() + 11) % 12 + 1) % 10)); + *to++ = (char)('0' + (((hour + 11) % 12 + 1) / 10)); + *to++ = (char)('0' + (((hour + 11) % 12 + 1) % 10)); *to++ = ':'; // Minute - *to++ = (char)('0' + (this->minute() / 10)); - *to++ = (char)('0' + (this->minute() % 10)); + *to++ = (char)('0' + (minute / 10)); + *to++ = (char)('0' + (minute % 10)); *to++ = ':'; /* Second */ - *to++ = (char)('0' + (this->second() / 10)); - *to++ = (char)('0' + (this->second() % 10)); - if ((this->hour() % 24) >= 12) { + *to++ = (char)('0' + (second / 10)); + *to++ = (char)('0' + (second % 10)); + if ((hour % 24) >= 12) { to = append_string(" PM", to); } else { to = append_string(" AM", to); @@ -3151,66 +3164,89 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le break; } case 'T': { - // Time, 24-hour (hh:mm:ss) - *to++ = (char)('0' + ((this->hour() % 24) / 10)); - *to++ = (char)('0' + ((this->hour() % 24) % 10)); + // Time, 24-hour (hh:mm:ss or hhh:mm:ss for TIME type) + if (hour < 100) { + *to++ = (char)('0' + (hour / 10)); + *to++ = (char)('0' + (hour % 10)); + } else { + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); + } *to++ = ':'; // Minute - *to++ = (char)('0' + (this->minute() / 10)); - *to++ = (char)('0' + (this->minute() % 10)); + *to++ = (char)('0' + (minute / 10)); + *to++ = (char)('0' + (minute % 10)); *to++ = ':'; /* Second */ - *to++ = (char)('0' + (this->second() / 10)); - *to++ = (char)('0' + (this->second() % 10)); + *to++ = (char)('0' + (second / 10)); + *to++ = (char)('0' + (second % 10)); break; } case 'u': // Week (00..53), where Monday is the first day of the week; // WEEK() mode 1 - to = write_two_digits_to_string(week(mysql_week_mode(1)), to); + if (month == 0) { + return false; + } + to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(1)), to); cursor += 2; pos = cursor; break; case 'U': // Week (00..53), where Sunday is the first day of the week; // WEEK() mode 0 - to = write_two_digits_to_string(week(mysql_week_mode(0)), to); + if (month == 0) { + return false; + } + to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(0)), to); cursor += 2; pos = cursor; break; case 'v': // Week (01..53), where Monday is the first day of the week; // WEEK() mode 3; used with %x - to = write_two_digits_to_string(week(mysql_week_mode(3)), to); + if (month == 0) { + return false; + } + to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(3)), to); cursor += 2; pos = cursor; break; case 'V': // Week (01..53), where Sunday is the first day of the week; // WEEK() mode 2; used with %X - to = write_two_digits_to_string(week(mysql_week_mode(2)), to); + if (month == 0) { + return false; + } + to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(2)), to); cursor += 2; pos = cursor; break; case 'w': // Day of the week (0=Sunday..6=Saturday) - if (this->month() == 0 && this->year() == 0) { + if (month == 0 && year == 0) { return false; } - pos = int_to_str(doris::calc_weekday(daynr(), true), cursor); + pos = int_to_str(calc_weekday(calc_daynr(year, month, day), true), cursor); to = append_with_prefix(cursor, pos - cursor, '0', 1, to); break; case 'W': // Weekday name (Sunday..Saturday) - to = append_string(s_day_name[weekday()], to); + if (year == 0 && month == 0) { + return false; + } + to = append_string(s_day_name[calc_weekday(calc_daynr(year, month, day), false)], to); break; case 'x': { // Year for the week, where Monday is the first day of the week, // numeric, four digits; used with %v - uint16_t year = 0; - calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(3), - &year, true); - to = write_four_digits_to_string(year, to); + if (month == 0 || day == 0) { + return false; + } + uint16_t year_to_write = 0; + calc_week(calc_daynr(year, month, day), year, month, day, mysql_week_mode(3), + &year_to_write, true); + to = write_four_digits_to_string(year_to_write, to); cursor += 4; pos = cursor; break; @@ -3218,10 +3254,13 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le case 'X': { // Year for the week where Sunday is the first day of the week, // numeric, four digits; used with %V - uint16_t year = 0; - calc_week(this->daynr(), this->year(), this->month(), this->day(), mysql_week_mode(2), - &year); - to = write_four_digits_to_string(year, to); + if (month == 0 || day == 0) { + return false; + } + uint16_t year_to_write = 0; + calc_week(calc_daynr(year, month, day), year, month, day, mysql_week_mode(2), + &year_to_write); + to = write_four_digits_to_string(year_to_write, to); cursor += 4; pos = cursor; break; @@ -3236,6 +3275,18 @@ bool DateV2Value::to_format_string_conservative(const char* format, size_t le return true; } +template +bool DateV2Value::to_format_string_conservative(const char* format, size_t len, char* to, + size_t max_valid_length) const { + if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { + return false; + } + + return to_format_string_without_check(format, len, to, max_valid_length, this->year(), + this->month(), this->day(), this->hour(), this->minute(), + this->second(), this->microsecond()); +} + template int64_t DateV2Value::standardize_timevalue(int64_t value) { if (value <= 0) { @@ -3345,6 +3396,11 @@ uint16_t DateV2Value::year_of_week() const { } return date_v2_value_.year_; } +template +uint8_t DateV2Value::week(int16_t year, int8_t month, int8_t day, uint8_t mode) { + uint16_t year_to_write = 0; + return calc_week(calc_daynr(year, month, day), year, month, day, mode, &year_to_write); +} template uint8_t DateV2Value::calc_week(const uint32_t& day_nr, const uint16_t& year, diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index ffb5da58408b3a..41ca010645533a 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -1449,7 +1449,14 @@ class DateV2Value { void set_int_val(uint64_t val) { this->int_val_ = val; } + static bool to_format_string_without_check(const char* format, size_t len, char* to, + size_t max_valid_length, int16_t year, int8_t month, + int8_t day, int hour, int minute, int second, + int ms); + private: + static uint8_t week(int16_t year, int8_t month, int8_t day, uint8_t mode); + static uint8_t calc_week(const uint32_t& day_nr, const uint16_t& year, const uint8_t& month, const uint8_t& day, uint8_t mode, uint16_t* to_year, bool disable_lut = false); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java index cbeb4c34a3a7ac..38e8cb0ea6f742 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java @@ -285,10 +285,8 @@ public static Expression dateFormat(DateV2Literal date, StringLikeLiteral format throw new AnalysisException("The length of format string in date_format() function should not be greater" + " than 128."); } - DateTimeV2Literal datetime = new DateTimeV2Literal(date.getYear(), date.getMonth(), date.getDay(), 0, 0, 0, 0); format = (StringLikeLiteral) SupportJavaDateFormatter.translateJavaFormatter(format); - return new VarcharLiteral(DateUtils.dateTimeFormatterChecklength(format.getValue(), datetime).format( - java.time.LocalDate.of(((int) date.getYear()), ((int) date.getMonth()), ((int) date.getDay())))); + return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(date, format, false)); } /** @@ -301,10 +299,7 @@ public static Expression dateFormat(DateTimeV2Literal date, StringLikeLiteral fo + " than 128."); } format = (StringLikeLiteral) SupportJavaDateFormatter.translateJavaFormatter(format); - return new VarcharLiteral(DateUtils.dateTimeFormatterChecklength(format.getValue(), date).format( - java.time.LocalDateTime.of(((int) date.getYear()), ((int) date.getMonth()), ((int) date.getDay()), - ((int) date.getHour()), ((int) date.getMinute()), ((int) date.getSecond()), - ((int) date.getMicroSecond() * 1000)))); + return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(date, format, false)); } /** @@ -316,7 +311,7 @@ public static Expression timeFormat(TimeV2Literal time, StringLikeLiteral format throw new AnalysisException("The length of format string in time_format() function should not be greater" + " than 128."); } - return new VarcharLiteral(DateTimeFormatterUtils.formatTimeLiteral(time, format.getValue())); + return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(time, format)); } /** @@ -328,9 +323,7 @@ public static Expression timeFormat(DateV2Literal date, StringLikeLiteral format throw new AnalysisException("The length of format string in time_format() function should not be greater" + " than 128."); } - DateTimeV2Literal dateTime = new DateTimeV2Literal(date.getYear(), date.getMonth(), date.getDay(), - 0, 0, 0, 0); - return timeFormat(dateTime, format); + return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(date, format, true)); } /** @@ -342,10 +335,7 @@ public static Expression timeFormat(DateTimeV2Literal dateTime, StringLikeLitera throw new AnalysisException("The length of format string in time_format() function should not be greater" + " than 128."); } - TimeV2Literal time = new TimeV2Literal((int) dateTime.getHour(), (int) dateTime.getMinute(), - (int) dateTime.getSecond(), (int) dateTime.getMicroSecond(), dateTime.getScale(), - false); - return timeFormat(time, format); + return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(dateTime, format, true)); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/TimeV2Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/TimeV2Literal.java index 6b056b9125de2a..f290f0e9fd07e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/TimeV2Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/TimeV2Literal.java @@ -396,4 +396,8 @@ public Object getValue() { public String computeToSql() { return "'" + getStringValue() + "'"; } + + public boolean isNegative() { + return negative; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java index 6f39ef7afbf572..09bb6bf829a769 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java @@ -17,6 +17,10 @@ package org.apache.doris.nereids.util; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal; +import org.apache.doris.nereids.trees.expressions.literal.DateV2Literal; +import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; import org.apache.doris.nereids.trees.expressions.literal.TimeV2Literal; import java.time.format.DateTimeFormatter; @@ -109,29 +113,227 @@ public class DateTimeFormatterUtils { .append(ZONE_FORMATTER) .toFormatter().withResolverStyle(ResolverStyle.STRICT); + private static final int WEEK_MONDAY_FIRST = 1; + private static final int WEEK_YEAR = 2; + private static final int WEEK_FIRST_WEEKDAY = 4; + + private static final int MAX_FORMAT_RESULT_LENGTH = 100; + private static final int SAFE_FORMAT_STRING_MARGIN = 12; + private static final int MAX_FORMAT_STRING_LENGTH = 128; + + private static final String[] ABBR_MONTH_NAMES = { + "", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + + private static final String[] MONTH_NAMES = { + "", "January", "February", "March", "April", "May", "June", "July", "August", "September", + "October", "November", "December" + }; + + private static final String[] ABBR_DAY_NAMES = { + "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" + }; + + private static final String[] DAY_NAMES = { + "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" + }; + + private static void appendTwoDigits(StringBuilder builder, int value) { + builder.append((char) ('0' + (value / 10) % 10)); + builder.append((char) ('0' + (value % 10))); + } + + /** + * Conservative implementation of DATE_FORMAT/TIME_FORMAT for datetime literals + * used in constant folding. + * + * @param datetime datetime literal to format + * @param format format pattern + * @param isTimeFormat true when invoked via time_format, false for date_format + * @return formatted string or null when pattern requires missing date fields + */ + public static String toFormatStringConservative(DateTimeV2Literal datetime, StringLikeLiteral format, + boolean isTimeFormat) { + int year = isTimeFormat ? 0 : (int) datetime.getYear(); + int month = isTimeFormat ? 0 : (int) datetime.getMonth(); + int day = isTimeFormat ? 0 : (int) datetime.getDay(); + int hour = (int) datetime.getHour(); + int minute = (int) datetime.getMinute(); + int second = (int) datetime.getSecond(); + int microsecond = (int) datetime.getMicroSecond(); + + String pattern = trimFormat(format.getValue()); + return formatTemporalLiteral(year, month, day, hour, minute, second, microsecond, pattern); + } + + /** + * Conservative implementation of DATE_FORMAT/TIME_FORMAT for date literals used + * in constant folding. + * + * @param date date literal to format + * @param format format pattern + * @param isTimeFormat true when invoked via time_format, false for date_format + * @return formatted string or null when pattern requires missing date fields + */ + public static String toFormatStringConservative(DateV2Literal date, StringLikeLiteral format, + boolean isTimeFormat) { + int year = isTimeFormat ? 0 : (int) date.getYear(); + int month = isTimeFormat ? 0 : (int) date.getMonth(); + int day = isTimeFormat ? 0 : (int) date.getDay(); + + String pattern = trimFormat(format.getValue()); + return formatTemporalLiteral(year, month, day, 0, 0, 0, 0, pattern); + } + /** - * Format TimeV2 literal according to MySQL time_format spec. + * Conservative implementation of TIME_FORMAT for time literals used in constant + * folding. + * + * @param time time literal to format + * @param format format pattern + * @return formatted string with sign preserved; null when pattern requires date + * fields */ - public static String formatTimeLiteral(TimeV2Literal time, String pattern) { - double value = (double) time.getValue(); - int hour = Math.abs(time.getHour()); - int minute = Math.abs(time.getMinute()); - int second = Math.abs(time.getSecond()); - int microsecond = Math.abs(time.getMicroSecond()); + public static String toFormatStringConservative(TimeV2Literal time, StringLikeLiteral format) { + String pattern = trimFormat(format.getValue()); + String res = formatTemporalLiteral(0, 0, 0, time.getHour(), time.getMinute(), + time.getSecond(), time.getMicroSecond(), pattern); + if (time.isNegative()) { + res = "-" + res; + } + return res; + } + + private static int calcWeekNumber(int year, int month, int day, int mode) { + int[] weekYear = new int[1]; + return calcWeekNumberAndYear(year, month, day, mode, weekYear); + } + + private static int calcWeekNumberAndYear(int year, int month, int day, int mode, int[] toYear) { + return calcWeekInternal(calcDayNr(year, month, day), year, month, day, mode, toYear); + } + + private static int calcWeekInternal(long dayNr, int year, int month, int day, int mode, int[] toYear) { + if (year == 0) { + toYear[0] = 0; + return 0; + } + boolean mondayFirst = (mode & WEEK_MONDAY_FIRST) != 0; + boolean weekYear = (mode & WEEK_YEAR) != 0; + boolean firstWeekday = (mode & WEEK_FIRST_WEEKDAY) != 0; + + long daynrFirstDay = calcDayNr(year, 1, 1); + int weekdayFirstDay = calcWeekday(daynrFirstDay, !mondayFirst); - StringBuilder builder = new StringBuilder(pattern.length() + 8); - if (value < 0) { - builder.append('-'); + toYear[0] = year; + + if (month == 1 && day <= (7 - weekdayFirstDay)) { + if (!weekYear && ((firstWeekday && weekdayFirstDay != 0) || (!firstWeekday && weekdayFirstDay > 3))) { + return 0; + } + toYear[0]--; + weekYear = true; + int days = calcDaysInYear(toYear[0]); + daynrFirstDay -= days; + weekdayFirstDay = (weekdayFirstDay + 53 * 7 - days) % 7; + } + + int days; + if ((firstWeekday && weekdayFirstDay != 0) || (!firstWeekday && weekdayFirstDay > 3)) { + days = (int) (dayNr - (daynrFirstDay + (7 - weekdayFirstDay))); + } else { + days = (int) (dayNr - (daynrFirstDay - weekdayFirstDay)); + } + + if (weekYear && days >= 52 * 7) { + weekdayFirstDay = (weekdayFirstDay + calcDaysInYear(toYear[0])) % 7; + if ((firstWeekday && weekdayFirstDay == 0) || (!firstWeekday && weekdayFirstDay <= 3)) { + toYear[0]++; + return 1; + } } + return days / 7 + 1; + } + + private static int mysqlWeekMode(int mode) { + mode &= 7; + if ((mode & WEEK_MONDAY_FIRST) == 0) { + mode ^= WEEK_FIRST_WEEKDAY; + } + return mode; + } + + private static int calcWeekday(long dayNr, boolean sundayFirst) { + return (int) ((dayNr + 5 + (sundayFirst ? 1 : 0)) % 7); + } + + private static long calcDayNr(int year, int month, int day) { + // Align with BE/MySQL: Monday = 0 when sundayFirst=false in calcWeekday. + if (year == 0 && month == 0) { + return 0; + } + if (year == 0 && month == 1 && day == 1) { + return 1; + } + + long y = year; + long delsum = 365 * y + 31L * (month - 1) + day; + if (month <= 2) { + y--; + } else { + delsum -= (month * 4 + 23) / 10; + } + return delsum + y / 4 - y / 100 + y / 400; + } + + private static int calcDaysInYear(int year) { + return isLeap(year) ? 366 : 365; + } + + private static boolean isLeap(int year) { + return (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0); + } + + private static void appendFourDigits(StringBuilder builder, int value) { + if (value >= 1000 && value <= 9999) { + builder.append(value); + return; + } + appendWithPad(builder, value, 4, '0'); + } + + private static void appendWithPad(StringBuilder builder, int value, int targetLength, char padChar) { + String str = Integer.toString(Math.abs(value)); + for (int i = str.length(); i < targetLength; i++) { + builder.append(padChar); + } + builder.append(str); + } + + // MySQL-compatible time_format for TIME/DATE/DATETIME literals. + private static String formatTemporalLiteral(int year, int month, int day, int hour, int minute, + int second, int microsecond, String pattern) { + StringBuilder builder = new StringBuilder(pattern.length() + 16); + for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); if (c != '%' || i == pattern.length() - 1) { builder.append(c); continue; } + char spec = pattern.charAt(++i); switch (spec) { + case 'y': + appendTwoDigits(builder, year % 100); + break; + case 'Y': + appendFourDigits(builder, year); + break; + case 'd': + appendTwoDigits(builder, day); + break; case 'H': if (hour < 100) { appendTwoDigits(builder, hour); @@ -139,32 +341,109 @@ public static String formatTimeLiteral(TimeV2Literal time, String pattern) { appendWithPad(builder, hour, 2, '0'); } break; - case 'h': - case 'I': { - int hour12 = (hour % 24 + 11) % 12 + 1; - appendTwoDigits(builder, hour12); - break; - } case 'i': appendTwoDigits(builder, minute); break; - case 'k': - appendWithPad(builder, hour, 1, '0'); + case 'm': + appendTwoDigits(builder, month); break; - case 'l': { + case 'h': + case 'I': { int hour12 = (hour % 24 + 11) % 12 + 1; - appendWithPad(builder, hour12, 1, '0'); + appendTwoDigits(builder, hour12); break; } case 's': case 'S': appendTwoDigits(builder, second); break; + case 'a': + if (month == 0 || day == 0 || year == 0) { + return null; + } + builder.append(ABBR_DAY_NAMES[calcWeekday(calcDayNr(year, month, day), false)]); + break; + case 'b': + if (month == 0) { + return null; + } + builder.append(ABBR_MONTH_NAMES[month]); + break; + case 'c': { + String str = Integer.toString(month); + if (str.length() < 1) { + builder.append('0'); + } + builder.append(str); + break; + } + case 'D': + if (month == 0) { + return null; + } + builder.append(day); + if (day >= 10 && day <= 19) { + builder.append("th"); + } else { + switch (day % 10) { + case 1: + builder.append("st"); + break; + case 2: + builder.append("nd"); + break; + case 3: + builder.append("rd"); + break; + default: + builder.append("th"); + break; + } + } + break; + case 'e': { + String str = Integer.toString(day); + if (str.length() < 1) { + builder.append('0'); + } + builder.append(str); + break; + } case 'f': appendWithPad(builder, microsecond, 6, '0'); break; + case 'j': + if (month == 0 || day == 0) { + return null; + } + int dayOfYear = (int) (calcDayNr(year, month, day) - calcDayNr(year, 1, 1) + 1); + appendWithPad(builder, dayOfYear, 3, '0'); + break; + case 'k': { + String str = Integer.toString(hour); + if (str.length() < 1) { + builder.append('0'); + } + builder.append(str); + break; + } + case 'l': { + int hour12 = (hour % 24 + 11) % 12 + 1; + String str = Integer.toString(hour12); + if (str.length() < 1) { + builder.append('0'); + } + builder.append(str); + break; + } + case 'M': + if (month == 0) { + return null; + } + builder.append(MONTH_NAMES[month]); + break; case 'p': - builder.append((hour % 24 >= 12) ? "PM" : "AM"); + builder.append((hour % 24) >= 12 ? "PM" : "AM"); break; case 'r': { int hour12 = (hour % 24 + 11) % 12 + 1; @@ -174,7 +453,7 @@ public static String formatTimeLiteral(TimeV2Literal time, String pattern) { builder.append(':'); appendTwoDigits(builder, second); builder.append(' '); - builder.append((hour % 24 >= 12) ? "PM" : "AM"); + builder.append((hour % 24) >= 12 ? "PM" : "AM"); break; } case 'T': @@ -188,52 +467,88 @@ public static String formatTimeLiteral(TimeV2Literal time, String pattern) { builder.append(':'); appendTwoDigits(builder, second); break; - case 'Y': - // Year, 4 digits - builder.append("0000"); + case 'u': + if (month == 0) { + return null; + } + appendTwoDigits(builder, calcWeekNumber(year, month, day, mysqlWeekMode(1))); break; - case 'y': - case 'm': - case 'd': - // Year (2 digits), Month, Day - insert 2 zeros - builder.append("00"); + case 'U': + if (month == 0) { + return null; + } + appendTwoDigits(builder, calcWeekNumber(year, month, day, mysqlWeekMode(0))); break; - case 'c': - case 'e': - // Month (0..12) or Day without leading zero - insert 1 zero - builder.append('0'); + case 'v': + if (month == 0) { + return null; + } + appendTwoDigits(builder, calcWeekNumber(year, month, day, mysqlWeekMode(3))); break; - case 'M': - case 'W': - case 'j': - case 'D': - case 'U': - case 'u': case 'V': - case 'v': - case 'x': - case 'X': + if (month == 0) { + return null; + } + appendTwoDigits(builder, calcWeekNumber(year, month, day, mysqlWeekMode(2))); + break; case 'w': - // These specifiers are not supported for TIME type - return null; + if (month == 0 && year == 0) { + return null; + } + builder.append(calcWeekday(calcDayNr(year, month, day), true)); + break; + case 'W': + if (year == 0 && month == 0) { + return null; + } + builder.append(DAY_NAMES[calcWeekday(calcDayNr(year, month, day), false)]); + break; + case 'x': { + if (month == 0 || day == 0) { + return null; + } + int[] weekYear = new int[1]; + calcWeekNumberAndYear(year, month, day, mysqlWeekMode(3), weekYear); + appendFourDigits(builder, weekYear[0]); + break; + } + case 'X': { + if (month == 0 || day == 0) { + return null; + } + int[] weekYear = new int[1]; + calcWeekNumberAndYear(year, month, day, mysqlWeekMode(2), weekYear); + appendFourDigits(builder, weekYear[0]); + break; + } default: builder.append(spec); break; } } - return builder.toString(); - } - private static void appendTwoDigits(StringBuilder builder, int value) { - builder.append((char) ('0' + (value / 10) % 10)); - builder.append((char) ('0' + (value % 10))); + if (builder.length() > MAX_FORMAT_RESULT_LENGTH) { + throw new AnalysisException("Formatted string length exceeds the maximum allowed length"); + } + return builder.toString(); } - private static void appendWithPad(StringBuilder builder, int value, int targetLength, char padChar) { - String str = Integer.toString(Math.abs(value)); - for (int i = str.length(); i < targetLength; i++) { - builder.append(padChar); + private static String trimFormat(String pattern) { + if (pattern == null) { + throw new AnalysisException("Format string is null"); } - builder.append(str); + int start = 0; + int end = pattern.length(); + while (start < end && Character.isWhitespace(pattern.charAt(start))) { + start++; + } + while (end > start && Character.isWhitespace(pattern.charAt(end - 1))) { + end--; + } + String trimmed = pattern.substring(start, end); + if (trimmed.length() > MAX_FORMAT_STRING_LENGTH) { + throw new AnalysisException("Format string length exceeds the maximum allowed length"); + } + return trimmed; } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java index 0cd32e91710d5e..6b24ffd6b611ad 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java @@ -508,7 +508,8 @@ void testFoldString() { d = new DateFormat(DateV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), StringLiteral.of(StringUtils.repeat("s", 128) + " ")); rewritten = executor.rewrite(d, context); - Assertions.assertEquals(new VarcharLiteral(StringUtils.repeat("s", 128) + " "), rewritten); + // Overlength output (>100 chars) is not folded + Assertions.assertEquals(d, rewritten); DateTrunc t = new DateTrunc(DateTimeV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), StringLiteral.of("week")); diff --git a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out index 0c41e96aa35b3f..9ca2211a71ebcb 100644 --- a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out +++ b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out @@ -128,6 +128,9 @@ Sun Oct 10 -- !date_format_8 -- \N +-- !date_format_9 -- +Sunday October 2009 + -- !date_1 -- 2003-12-31 @@ -2029,42 +2032,6 @@ da fanadur \N 07:23:25 --- !to_seconds_1 -- -63358934400 - --- !to_seconds_2 -- -63358970589 - --- !to_seconds_3 -- -86400 - --- !to_seconds_4 -- -315569519999 - --- !to_seconds_5 -- -\N - --- !to_seconds_6 -- -\N - --- !to_seconds_7 -- -\N - --- !to_seconds_8 -- -63863901296 - --- !to_seconds_9 -- -\N - --- !to_seconds_10 -- -\N - --- !to_seconds_11 -- -63902908800 - --- !to_seconds_12 -- -63902953845 - -- !time_format_1 -- 1 00:00:00 00 0 12 12 12 00 00 00 000000 AM 12:00:00 AM 00:00:00 00:00:00.000000 0 00 12 12 12 00 000000 00 AM 00:00:00 12:00:00 AM 12:12 12 0 12 00 12 AM 000000 00 00 00:00:00 12:00:00 AM 2 00:00:00.123456 00 0 12 12 12 00 00 00 123456 AM 12:00:00 AM 00:00:00 00:00:00.123456 0 00 12 12 12 00 123456 00 AM 00:00:00 12:00:00 AM 12:12 12 0 12 00 12 AM 123456 00 00 00:00:00 12:00:00 AM @@ -2168,6 +2135,42 @@ PM 12 12 12 12 12:34:56 000001 -- !time_format_30 -- \N +-- !to_seconds_1 -- +63358934400 + +-- !to_seconds_2 -- +63358970589 + +-- !to_seconds_3 -- +86400 + +-- !to_seconds_4 -- +315569519999 + +-- !to_seconds_5 -- +\N + +-- !to_seconds_6 -- +\N + +-- !to_seconds_7 -- +\N + +-- !to_seconds_8 -- +63863901296 + +-- !to_seconds_9 -- +\N + +-- !to_seconds_10 -- +\N + +-- !to_seconds_11 -- +63902908800 + +-- !to_seconds_12 -- +63902953845 + -- !dateceil -- 2025-10-10T12:34:56 2026-01-01T00:00 2025-11-01T00:00 2025-10-11T00:00 2025-10-10T13:00 2025-10-10T12:35 2025-10-10T12:34:56 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 2025-01-01T00:00 diff --git a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy index 5a56ed970e1c4a..ee8ce43f9db67a 100644 --- a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy +++ b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy @@ -194,6 +194,7 @@ suite("doc_date_functions_test") { // Any parameter is NULL qt_date_format_7 """SELECT DATE_FORMAT(NULL, '%Y-%m-%d')""" qt_date_format_8 """SELECT DATE_FORMAT('2009-10-04', NULL)""" + qt_date_format_9 """SELECT DATE_FORMAT('2009-10-04 22:23:00', ' %W %M %Y')""" // 10. DATE function tests // Extract date part from datetime @@ -1569,6 +1570,7 @@ suite("doc_date_functions_test") { testFoldConst("SELECT DATE_FORMAT('2009-10-04', '%D %e %f')") testFoldConst("SELECT DATE_FORMAT(NULL, '%Y-%m-%d')") testFoldConst("SELECT DATE_FORMAT('2009-10-04', NULL)") + testFoldConst("SELECT DATE_FORMAT('2009-10-04 22:23:00', ' %W %M %Y')") // 7. DATE function constant folding tests testFoldConst("SELECT DATE('2003-12-31 01:02:03')") From 4a625902ea6109437f2c80703a513cc0fe0f4f4a Mon Sep 17 00:00:00 2001 From: linzhenqi Date: Tue, 30 Dec 2025 20:34:57 +0800 Subject: [PATCH 3/3] fix --- be/src/vec/functions/date_time_transforms.h | 205 ++++++++- be/src/vec/runtime/time_value.h | 2 +- be/src/vec/runtime/vdatetime_value.cpp | 419 +++++++++--------- be/src/vec/runtime/vdatetime_value.h | 28 +- .../DateTimeExtractAndTransform.java | 25 -- .../functions/scalar/DateFormat.java | 4 +- .../functions/scalar/TimeFormat.java | 4 +- .../nereids/util/DateTimeFormatterUtils.java | 20 - .../rules/expression/FoldConstantTest.java | 9 +- .../sql-functions/doc_date_functions_test.out | 1 + .../test_function_signature_all_types.groovy | 2 +- .../doc_date_functions_test.groovy | 3 +- 12 files changed, 446 insertions(+), 276 deletions(-) diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 83598ba17b1b6a..7a6b312bf6fc27 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -30,8 +30,8 @@ #include "runtime/primitive_type.h" #include "udf/udf.h" #include "util/binary_cast.hpp" -#include "vec/columns/column_decimal.h" #include "vec/columns/column.h" +#include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" @@ -433,6 +433,209 @@ struct FromUnixTimeDecimalImpl { } }; +// Base template for optimized time field(HOUR, MINUTE, SECOND, MS) extraction from Unix timestamp +// Uses lookup_offset to avoid expensive civil_second construction +template +class FunctionTimeFieldFromUnixtime : public IFunction { +public: + static constexpr auto name = Impl::name; + static FunctionPtr create() { return std::make_shared>(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { + // microsecond_from_unixtime returns Int32, others (hour/minute/second) return Int8 + if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { + return make_nullable(std::make_shared()); + } else { + return make_nullable(std::make_shared()); + } + } + + // (UTC 9999-12-31 23:59:59) - 24 * 3600 + static const int64_t TIMESTAMP_VALID_MAX = 253402243199L; + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override { + using ArgColType = PrimitiveTypeTraits::ColumnType; + using ResColType = std::conditional_t; + using ResItemType = typename ResColType::value_type; + auto res = ResColType::create(); + + const auto* ts_col = + assert_cast(block.get_by_position(arguments[0]).column.get()); + if constexpr (Impl::ArgType == PrimitiveType::TYPE_DECIMAL64) { + // microsecond_from_unixtime only + const auto scale = static_cast(ts_col->get_scale()); + + for (int i = 0; i < input_rows_count; ++i) { + const auto seconds = ts_col->get_intergral_part(i); + const auto fraction = ts_col->get_fractional_part(i); + + if (seconds < 0 || seconds > TIMESTAMP_VALID_MAX) { + return Status::InvalidArgument( + "The input value of TimeFiled(from_unixtime()) must between 0 and " + "253402243199L"); + } + + ResItemType value = Impl::extract_field(fraction, scale); + res->insert_value(value); + } + } else { + auto ctz = context->state()->timezone_obj(); + for (int i = 0; i < input_rows_count; ++i) { + auto date = ts_col->get_element(i); + + if (date < 0 || date > TIMESTAMP_VALID_MAX) { + return Status::InvalidArgument( + "The input value of TimeFiled(from_unixtime()) must between 0 and " + "253402243199L"); + } + + ResItemType value = Impl::extract_field(date, ctz); + res->insert_value(value); + } + } + block.replace_by_position(result, std::move(res)); + return Status::OK(); + } +}; + +struct HourFromUnixtimeImpl { + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; + static constexpr auto name = "hour_from_unixtime"; + + static int8_t extract_field(int64_t local_time, const cctz::time_zone& ctz) { + static const auto epoch = std::chrono::time_point_cast( + std::chrono::system_clock::from_time_t(0)); + cctz::time_point t = epoch + cctz::seconds(local_time); + int offset = ctz.lookup_offset(t).offset; + local_time += offset; + + static const libdivide::divider fast_div_3600(3600); + static const libdivide::divider fast_div_86400(86400); + + int64_t remainder; + if (LIKELY(local_time >= 0)) { + remainder = local_time - local_time / fast_div_86400 * 86400; + } else { + remainder = local_time % 86400; + if (remainder < 0) { + remainder += 86400; + } + } + return static_cast(remainder / fast_div_3600); + } +}; + +struct MinuteFromUnixtimeImpl { + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; + static constexpr auto name = "minute_from_unixtime"; + + static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { + static const libdivide::divider fast_div_60(60); + static const libdivide::divider fast_div_3600(3600); + + local_time = local_time - local_time / fast_div_3600 * 3600; + + return static_cast(local_time / fast_div_60); + } +}; + +struct SecondFromUnixtimeImpl { + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_BIGINT; + static constexpr auto name = "second_from_unixtime"; + + static int8_t extract_field(int64_t local_time, const cctz::time_zone& /*ctz*/) { + return static_cast(local_time % 60); + } +}; + +struct MicrosecondFromUnixtimeImpl { + static constexpr PrimitiveType ArgType = PrimitiveType::TYPE_DECIMAL64; + static constexpr auto name = "microsecond_from_unixtime"; + + static int32_t extract_field(int64_t fraction, int scale) { + if (scale < 6) { + fraction *= common::exp10_i64(6 - scale); + } + return static_cast(fraction); + } +}; + +template +class FunctionTimeFormat : public IFunction { +public: + using ArgColType = typename PrimitiveTypeTraits::ColumnType; + using ArgCppType = typename PrimitiveTypeTraits::CppType; + + static constexpr auto name = "time_format"; + String get_name() const override { return name; } + static FunctionPtr create() { return std::make_shared(); } + DataTypes get_variadic_argument_types_impl() const override { + return {std::make_shared::DataType>(), + std::make_shared()}; + } + DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { + return make_nullable(std::make_shared()); + } + size_t get_number_of_arguments() const override { return 2; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override { + auto res_col = ColumnString::create(); + ColumnString::Chars& res_chars = res_col->get_chars(); + ColumnString::Offsets& res_offsets = res_col->get_offsets(); + + auto null_map = ColumnUInt8::create(); + auto& null_map_data = null_map->get_data(); + null_map_data.resize_fill(input_rows_count, 0); + + res_offsets.reserve(input_rows_count); + + ColumnPtr arg_col[2]; + bool is_const[2]; + for (size_t i = 0; i < 2; ++i) { + const ColumnPtr& col = block.get_by_position(arguments[i]).column; + std::tie(arg_col[i], is_const[i]) = unpack_if_const(col); + } + + const auto* datetime_col = assert_cast(arg_col[0].get()); + const auto* format_col = assert_cast(arg_col[1].get()); + for (size_t i = 0; i < input_rows_count; ++i) { + const auto& datetime_val = datetime_col->get_element(index_check_const(i, is_const[0])); + StringRef format = format_col->get_data_at(index_check_const(i, is_const[1])); + TimeValue::TimeType time = get_time_value(datetime_val); + + char buf[100 + SAFE_FORMAT_STRING_MARGIN]; + if (!TimeValue::to_format_string_conservative(format.data, format.size, buf, + 100 + SAFE_FORMAT_STRING_MARGIN, time)) { + null_map_data[i] = 1; + res_offsets.push_back(res_chars.size()); + continue; + } + res_chars.insert(buf, buf + strlen(buf)); + res_offsets.push_back(res_chars.size()); + } + block.replace_by_position(result, + ColumnNullable::create(std::move(res_col), std::move(null_map))); + return Status::OK(); + } + +private: + TimeValue::TimeType get_time_value(const ArgCppType& datetime_val) const { + if constexpr (ArgPType == PrimitiveType::TYPE_TIMEV2) { + return static_cast(datetime_val); + } else { + return TimeValue::make_time(datetime_val.hour(), datetime_val.minute(), + datetime_val.second(), datetime_val.microsecond()); + } + } +}; + #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/runtime/time_value.h b/be/src/vec/runtime/time_value.h index 09e4c24d927599..340b4da42b3517 100644 --- a/be/src/vec/runtime/time_value.h +++ b/be/src/vec/runtime/time_value.h @@ -162,7 +162,7 @@ class TimeValue { time = -time; } - return DateV2Value::to_format_string_without_check( + return DatetimeValueUtil::to_format_string_without_check( format, len, to, max_valid_length, 0, 0, 0, TimeValue::hour(time), TimeValue::minute(time), TimeValue::second(time), TimeValue::microsecond(time)); } diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index f25bd9c268fb7f..da3b28312891b0 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -2977,10 +2977,192 @@ void DateV2Value::set_microsecond(uint64_t microsecond) { } template -bool DateV2Value::to_format_string_without_check(const char* format, size_t len, char* to, - size_t max_valid_length, int16_t year, - int8_t month, int8_t day, int hour, int minute, - int second, int ms) { +bool DateV2Value::to_format_string_conservative(const char* format, size_t len, char* to, + size_t max_valid_length) const { + if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { + return false; + } + + return DatetimeValueUtil::to_format_string_without_check( + format, len, to, max_valid_length, this->year(), this->month(), this->day(), + this->hour(), this->minute(), this->second(), this->microsecond()); +} + +template +int64_t DateV2Value::standardize_timevalue(int64_t value) { + if (value <= 0) { + return 0; + } + if (value >= 10000101000000L) { + // 9999-99-99 99:99:99 + if (value > 99999999999999L) { + return 0; + } + + // between 1000-01-01 00:00:00L and 9999-99-99 99:99:99 + // all digits exist. + return value; + } + // 2000-01-01 + if (value < 101) { + return 0; + } + // two digits year. 2000 ~ 2069 + if (value <= (YY_PART_YEAR - 1) * 10000L + 1231L) { + return (value + 20000000L) * 1000000L; + } + // two digits year, invalid date + if (value < YY_PART_YEAR * 10000L + 101) { + return 0; + } + // two digits year. 1970 ~ 1999 + if (value <= 991231L) { + return (value + 19000000L) * 1000000L; + } + if (value < 10000101) { + return 0; + } + // four digits years without hour. + if (value <= 99991231L) { + return value * 1000000L; + } + // below 0000-01-01 + if (value < 101000000) { + return 0; + } + + // below is with datetime, must have hh:mm:ss + // 2000 ~ 2069 + if (value <= (YY_PART_YEAR - 1) * 10000000000L + 1231235959L) { + return value + 20000000000000L; + } + if (value < YY_PART_YEAR * 10000000000L + 101000000L) { + return 0; + } + // 1970 ~ 1999 + if (value <= 991231235959L) { + return value + 19000000000000L; + } + return value; +} + +template +bool DateV2Value::from_date_int64(int64_t value) { + value = standardize_timevalue(value); + if (value <= 0) { + return false; + } + uint64_t date = value / 1000000; + + auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; + year = date / 10000; + date %= 10000; + month = date / 100; + day = date % 100; + + if constexpr (is_datetime) { + uint64_t time = value % 1000000; + hour = time / 10000; + time %= 10000; + minute = time / 100; + second = time % 100; + return check_range_and_set_time(year, month, day, hour, minute, second, 0); + } else { + return check_range_and_set_time(year, month, day, 0, 0, 0, 0); + } +} + +// An ISO week-numbering year (also called ISO year informally) has 52 or 53 full weeks. That is 364 or 371 days instead of the usual 365 or 366 days. These 53-week years occur on all years that have Thursday as 1 January and on leap years that start on Wednesday. The extra week is sometimes referred to as a leap week, although ISO 8601 does not use this term. https://en.wikipedia.org/wiki/ISO_week_date +template +uint16_t DateV2Value::year_of_week() const { + constexpr uint8_t THURSDAY = 3; + + if (date_v2_value_.month_ == 1) { + constexpr uint8_t MAX_DISTANCE_WITH_THURSDAY = 6 - THURSDAY; + if (date_v2_value_.day_ <= MAX_DISTANCE_WITH_THURSDAY) { + auto weekday = calc_weekday(daynr(), false); + // if the current day is after Thursday and Thursday is in the previous year, return the previous year + return date_v2_value_.year_ - + (weekday > THURSDAY && weekday - THURSDAY > date_v2_value_.day_ - 1); + } + } else if (date_v2_value_.month_ == 12) { + constexpr uint8_t MAX_DISTANCE_WITH_THURSDAY = THURSDAY - 0; + if (S_DAYS_IN_MONTH[12] - date_v2_value_.day_ <= MAX_DISTANCE_WITH_THURSDAY) { + auto weekday = calc_weekday(daynr(), false); + // if the current day is before Thursday and Thursday is in the next year, return the next year + return date_v2_value_.year_ + + (weekday < THURSDAY && + (THURSDAY - weekday) > S_DAYS_IN_MONTH[12] - date_v2_value_.day_); + } + } + return date_v2_value_.year_; +} + +template +uint8_t DateV2Value::calc_week(const uint32_t& day_nr, const uint16_t& year, + const uint8_t& month, const uint8_t& day, uint8_t mode, + uint16_t* to_year, bool disable_lut) { + if (year == 0) [[unlikely]] { + *to_year = 0; + return 0; + } + if (config::enable_time_lut && !disable_lut && mode == 3 && year >= 1950 && year < 2030) { + return doris::TimeLUT::GetImplement() + ->week_of_year_table[year - doris::LUT_START_YEAR][month - 1][day - 1]; + } + // mode=4 is used for week() + if (config::enable_time_lut && !disable_lut && mode == 4 && year >= 1950 && year < 2030) { + return doris::TimeLUT::GetImplement() + ->week_table[year - doris::LUT_START_YEAR][month - 1][day - 1]; + } + bool monday_first = mode & WEEK_MONDAY_FIRST; + bool week_year = mode & WEEK_YEAR; + bool first_weekday = mode & WEEK_FIRST_WEEKDAY; + uint64_t daynr_first_day = doris::calc_daynr(year, 1, 1); + uint8_t weekday_first_day = doris::calc_weekday(daynr_first_day, !monday_first); + + int days = 0; + *to_year = year; + + // Check weather the first days of this year belongs to last year + if (month == 1 && day <= (7 - weekday_first_day)) { + if (!week_year && ((first_weekday && weekday_first_day != 0) || + (!first_weekday && weekday_first_day > 3))) { + return 0; + } + (*to_year)--; + week_year = true; + daynr_first_day -= (days = doris::calc_days_in_year(*to_year)); + weekday_first_day = (weekday_first_day + 53 * 7 - days) % 7; + } + + // How many days since first week + if ((first_weekday && weekday_first_day != 0) || (!first_weekday && weekday_first_day > 3)) { + // days in new year belongs to last year. + days = day_nr - (daynr_first_day + (7 - weekday_first_day)); + } else { + // days in new year belongs to this year. + days = day_nr - (daynr_first_day - weekday_first_day); + } + + if (week_year && days >= 52 * 7) { + weekday_first_day = (weekday_first_day + doris::calc_days_in_year(*to_year)) % 7; + if ((first_weekday && weekday_first_day == 0) || + (!first_weekday && weekday_first_day <= 3)) { + // Belong to next year. + (*to_year)++; + return 1; + } + } + + return days / 7 + 1; +} + +template +bool DatetimeValueUtil::to_format_string_without_check(const char* format, size_t len, char* to, + size_t max_valid_length, int16_t year, + int8_t month, int8_t day, int hour, + int minute, int second, int ms) { char* const begin = to; // to check written bytes char buf[64]; char* pos = nullptr; @@ -3020,14 +3202,9 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'H': // Hour (00...838) - if (hour < 100) { - to = write_two_digits_to_string(hour, to); - cursor += 2; - } else { - pos = int_to_str(hour, cursor); - to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); - cursor += (pos - cursor); - } + pos = int_to_str(hour, cursor); + to = append_with_prefix(cursor, static_cast(pos - cursor), '0', 2, to); + cursor += (pos - cursor); pos = cursor; break; case 'i': @@ -3057,7 +3234,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'a': // Abbreviated weekday name - if (year == 0 && month == 0) { + if constexpr (only_time) { return false; } to = append_string(s_ab_day_name[calc_weekday(calc_daynr(year, month, day), false)], @@ -3065,7 +3242,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'b': // Abbreviated month name - if (month == 0) { + if constexpr (only_time) { return false; } to = append_string(s_ab_month_name[month], to); @@ -3077,7 +3254,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'D': // Day of the month with English suffix (0th, 1st, ...) - if (month == 0) { + if constexpr (only_time) { return false; } pos = int_to_str(day, cursor); @@ -3113,7 +3290,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'j': // Day of year (001..366) - if (month == 0 || day == 0) { + if constexpr (only_time) { return false; } pos = int_to_str(calc_daynr(year, month, day) - calc_daynr(year, 1, 1) + 1, cursor); @@ -3131,7 +3308,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'M': // Month name (January..December) - if (month == 0) { + if constexpr (only_time) { return false; } to = append_string(s_month_name[month], to); @@ -3185,7 +3362,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'u': // Week (00..53), where Monday is the first day of the week; // WEEK() mode 1 - if (month == 0) { + if constexpr (only_time) { return false; } to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(1)), to); @@ -3195,7 +3372,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'U': // Week (00..53), where Sunday is the first day of the week; // WEEK() mode 0 - if (month == 0) { + if constexpr (only_time) { return false; } to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(0)), to); @@ -3205,7 +3382,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'v': // Week (01..53), where Monday is the first day of the week; // WEEK() mode 3; used with %x - if (month == 0) { + if constexpr (only_time) { return false; } to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(3)), to); @@ -3215,7 +3392,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'V': // Week (01..53), where Sunday is the first day of the week; // WEEK() mode 2; used with %X - if (month == 0) { + if constexpr (only_time) { return false; } to = write_two_digits_to_string(week(year, month, day, mysql_week_mode(2)), to); @@ -3224,7 +3401,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'w': // Day of the week (0=Sunday..6=Saturday) - if (month == 0 && year == 0) { + if constexpr (only_time) { return false; } pos = int_to_str(calc_weekday(calc_daynr(year, month, day), true), cursor); @@ -3232,7 +3409,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l break; case 'W': // Weekday name (Sunday..Saturday) - if (year == 0 && month == 0) { + if constexpr (only_time) { return false; } to = append_string(s_day_name[calc_weekday(calc_daynr(year, month, day), false)], to); @@ -3240,7 +3417,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'x': { // Year for the week, where Monday is the first day of the week, // numeric, four digits; used with %v - if (month == 0 || day == 0) { + if constexpr (only_time) { return false; } uint16_t year_to_write = 0; @@ -3254,7 +3431,7 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l case 'X': { // Year for the week where Sunday is the first day of the week, // numeric, four digits; used with %V - if (month == 0 || day == 0) { + if constexpr (only_time) { return false; } uint16_t year_to_write = 0; @@ -3275,193 +3452,11 @@ bool DateV2Value::to_format_string_without_check(const char* format, size_t l return true; } -template -bool DateV2Value::to_format_string_conservative(const char* format, size_t len, char* to, - size_t max_valid_length) const { - if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) { - return false; - } - - return to_format_string_without_check(format, len, to, max_valid_length, this->year(), - this->month(), this->day(), this->hour(), this->minute(), - this->second(), this->microsecond()); -} - -template -int64_t DateV2Value::standardize_timevalue(int64_t value) { - if (value <= 0) { - return 0; - } - if (value >= 10000101000000L) { - // 9999-99-99 99:99:99 - if (value > 99999999999999L) { - return 0; - } - - // between 1000-01-01 00:00:00L and 9999-99-99 99:99:99 - // all digits exist. - return value; - } - // 2000-01-01 - if (value < 101) { - return 0; - } - // two digits year. 2000 ~ 2069 - if (value <= (YY_PART_YEAR - 1) * 10000L + 1231L) { - return (value + 20000000L) * 1000000L; - } - // two digits year, invalid date - if (value < YY_PART_YEAR * 10000L + 101) { - return 0; - } - // two digits year. 1970 ~ 1999 - if (value <= 991231L) { - return (value + 19000000L) * 1000000L; - } - if (value < 10000101) { - return 0; - } - // four digits years without hour. - if (value <= 99991231L) { - return value * 1000000L; - } - // below 0000-01-01 - if (value < 101000000) { - return 0; - } - - // below is with datetime, must have hh:mm:ss - // 2000 ~ 2069 - if (value <= (YY_PART_YEAR - 1) * 10000000000L + 1231235959L) { - return value + 20000000000000L; - } - if (value < YY_PART_YEAR * 10000000000L + 101000000L) { - return 0; - } - // 1970 ~ 1999 - if (value <= 991231235959L) { - return value + 19000000000000L; - } - return value; -} - -template -bool DateV2Value::from_date_int64(int64_t value) { - value = standardize_timevalue(value); - if (value <= 0) { - return false; - } - uint64_t date = value / 1000000; - - auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; - year = date / 10000; - date %= 10000; - month = date / 100; - day = date % 100; - - if constexpr (is_datetime) { - uint64_t time = value % 1000000; - hour = time / 10000; - time %= 10000; - minute = time / 100; - second = time % 100; - return check_range_and_set_time(year, month, day, hour, minute, second, 0); - } else { - return check_range_and_set_time(year, month, day, 0, 0, 0, 0); - } -} - -// An ISO week-numbering year (also called ISO year informally) has 52 or 53 full weeks. That is 364 or 371 days instead of the usual 365 or 366 days. These 53-week years occur on all years that have Thursday as 1 January and on leap years that start on Wednesday. The extra week is sometimes referred to as a leap week, although ISO 8601 does not use this term. https://en.wikipedia.org/wiki/ISO_week_date -template -uint16_t DateV2Value::year_of_week() const { - constexpr uint8_t THURSDAY = 3; - - if (date_v2_value_.month_ == 1) { - constexpr uint8_t MAX_DISTANCE_WITH_THURSDAY = 6 - THURSDAY; - if (date_v2_value_.day_ <= MAX_DISTANCE_WITH_THURSDAY) { - auto weekday = calc_weekday(daynr(), false); - // if the current day is after Thursday and Thursday is in the previous year, return the previous year - return date_v2_value_.year_ - - (weekday > THURSDAY && weekday - THURSDAY > date_v2_value_.day_ - 1); - } - } else if (date_v2_value_.month_ == 12) { - constexpr uint8_t MAX_DISTANCE_WITH_THURSDAY = THURSDAY - 0; - if (S_DAYS_IN_MONTH[12] - date_v2_value_.day_ <= MAX_DISTANCE_WITH_THURSDAY) { - auto weekday = calc_weekday(daynr(), false); - // if the current day is before Thursday and Thursday is in the next year, return the next year - return date_v2_value_.year_ + - (weekday < THURSDAY && - (THURSDAY - weekday) > S_DAYS_IN_MONTH[12] - date_v2_value_.day_); - } - } - return date_v2_value_.year_; -} -template -uint8_t DateV2Value::week(int16_t year, int8_t month, int8_t day, uint8_t mode) { +uint8_t DatetimeValueUtil::week(int16_t year, int8_t month, int8_t day, uint8_t mode) { uint16_t year_to_write = 0; return calc_week(calc_daynr(year, month, day), year, month, day, mode, &year_to_write); } -template -uint8_t DateV2Value::calc_week(const uint32_t& day_nr, const uint16_t& year, - const uint8_t& month, const uint8_t& day, uint8_t mode, - uint16_t* to_year, bool disable_lut) { - if (year == 0) [[unlikely]] { - *to_year = 0; - return 0; - } - if (config::enable_time_lut && !disable_lut && mode == 3 && year >= 1950 && year < 2030) { - return doris::TimeLUT::GetImplement() - ->week_of_year_table[year - doris::LUT_START_YEAR][month - 1][day - 1]; - } - // mode=4 is used for week() - if (config::enable_time_lut && !disable_lut && mode == 4 && year >= 1950 && year < 2030) { - return doris::TimeLUT::GetImplement() - ->week_table[year - doris::LUT_START_YEAR][month - 1][day - 1]; - } - bool monday_first = mode & WEEK_MONDAY_FIRST; - bool week_year = mode & WEEK_YEAR; - bool first_weekday = mode & WEEK_FIRST_WEEKDAY; - uint64_t daynr_first_day = doris::calc_daynr(year, 1, 1); - uint8_t weekday_first_day = doris::calc_weekday(daynr_first_day, !monday_first); - - int days = 0; - *to_year = year; - - // Check weather the first days of this year belongs to last year - if (month == 1 && day <= (7 - weekday_first_day)) { - if (!week_year && ((first_weekday && weekday_first_day != 0) || - (!first_weekday && weekday_first_day > 3))) { - return 0; - } - (*to_year)--; - week_year = true; - daynr_first_day -= (days = doris::calc_days_in_year(*to_year)); - weekday_first_day = (weekday_first_day + 53 * 7 - days) % 7; - } - - // How many days since first week - if ((first_weekday && weekday_first_day != 0) || (!first_weekday && weekday_first_day > 3)) { - // days in new year belongs to last year. - days = day_nr - (daynr_first_day + (7 - weekday_first_day)); - } else { - // days in new year belongs to this year. - days = day_nr - (daynr_first_day - weekday_first_day); - } - - if (week_year && days >= 52 * 7) { - weekday_first_day = (weekday_first_day + doris::calc_days_in_year(*to_year)) % 7; - if ((first_weekday && weekday_first_day == 0) || - (!first_weekday && weekday_first_day <= 3)) { - // Belong to next year. - (*to_year)++; - return 1; - } - } - - return days / 7 + 1; -} - template std::ostream& operator<<(std::ostream& os, const DateV2Value& value) { char buf[30]; @@ -3724,5 +3719,13 @@ template bool DateV2Value::datetime_trunc( template bool DateV2Value::datetime_trunc(); template bool DateV2Value::datetime_trunc(); template bool DateV2Value::datetime_trunc(); + +template bool DatetimeValueUtil::to_format_string_without_check(const char*, size_t, char*, + size_t, int16_t, int8_t, + int8_t, int, int, int, int); + +template bool DatetimeValueUtil::to_format_string_without_check(const char*, size_t, char*, + size_t, int16_t, int8_t, + int8_t, int, int, int, int); #include "common/compile_check_avoid_end.h" } // namespace doris diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 41ca010645533a..b8fe4067fd3c83 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -817,6 +817,8 @@ inline const VecDateTimeValue VecDateTimeValue::DEFAULT_VALUE(false, TYPE_DATETI template class DateV2Value { + friend class DatetimeValueUtil; + public: static constexpr bool is_datetime = std::is_same_v; using underlying_value = std::conditional_t; @@ -1449,14 +1451,7 @@ class DateV2Value { void set_int_val(uint64_t val) { this->int_val_ = val; } - static bool to_format_string_without_check(const char* format, size_t len, char* to, - size_t max_valid_length, int16_t year, int8_t month, - int8_t day, int hour, int minute, int second, - int ms); - private: - static uint8_t week(int16_t year, int8_t month, int8_t day, uint8_t mode); - static uint8_t calc_week(const uint32_t& day_nr, const uint16_t& year, const uint8_t& month, const uint8_t& day, uint8_t mode, uint16_t* to_year, bool disable_lut = false); @@ -1758,6 +1753,25 @@ inline uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) { return delsum + y / 4 - y / 100 + y / 400; } +class DatetimeValueUtil { +public: + template + static bool to_format_string_without_check(const char* format, size_t len, char* to, + size_t max_valid_length, int16_t year, int8_t month, + int8_t day, int hour, int minute, int second, + int ms); + +private: + static uint8_t week(int16_t year, int8_t month, int8_t day, uint8_t mode); + + static uint8_t calc_week(const uint32_t& day_nr, const uint16_t& year, const uint8_t& month, + const uint8_t& day, uint8_t mode, uint16_t* to_year, + bool disable_lut = false) { + return DateV2Value::calc_week(day_nr, year, month, day, mode, to_year, + disable_lut); + } +}; + template struct DateTraits {}; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java index 38e8cb0ea6f742..efe6b6bc6f9aec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeExtractAndTransform.java @@ -276,19 +276,6 @@ private static LocalDateTime firstDayOfWeek(LocalDateTime dateTime) { return dateTime.plusDays(-distanceToFirstDayOfWeek(dateTime)); } - /** - * datetime arithmetic function date-format - */ - @ExecFunction(name = "date_format") - public static Expression dateFormat(DateV2Literal date, StringLikeLiteral format) { - if (StringUtils.trim(format.getValue()).length() > 128) { - throw new AnalysisException("The length of format string in date_format() function should not be greater" - + " than 128."); - } - format = (StringLikeLiteral) SupportJavaDateFormatter.translateJavaFormatter(format); - return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(date, format, false)); - } - /** * datetime arithmetic function date-format */ @@ -314,18 +301,6 @@ public static Expression timeFormat(TimeV2Literal time, StringLikeLiteral format return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(time, format)); } - /** - * time_format constant folding for datev2 literal. - */ - @ExecFunction(name = "time_format") - public static Expression timeFormat(DateV2Literal date, StringLikeLiteral format) { - if (StringUtils.trim(format.getValue()).length() > 128) { - throw new AnalysisException("The length of format string in time_format() function should not be greater" - + " than 128."); - } - return new VarcharLiteral(DateTimeFormatterUtils.toFormatStringConservative(date, format, true)); - } - /** * time_format constant folding for datetimev2 literal. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DateFormat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DateFormat.java index 3da486abbed1dc..6c26a94f635cd9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DateFormat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/DateFormat.java @@ -28,7 +28,6 @@ import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeV2Type; -import org.apache.doris.nereids.types.DateV2Type; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.util.DateUtils; @@ -45,8 +44,7 @@ public class DateFormat extends ScalarFunction public static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(DateTimeV2Type.WILDCARD, - VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(DateV2Type.INSTANCE, VarcharType.SYSTEM_DEFAULT)); + VarcharType.SYSTEM_DEFAULT)); /** * constructor with 2 arguments. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java index a2d86d4ee0f2d8..6c96683e29df68 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TimeFormat.java @@ -27,7 +27,6 @@ import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DateTimeV2Type; -import org.apache.doris.nereids.types.DateV2Type; import org.apache.doris.nereids.types.TimeV2Type; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.util.DateUtils; @@ -47,8 +46,7 @@ public class TimeFormat extends ScalarFunction FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) .args(TimeV2Type.WILDCARD, VarcharType.SYSTEM_DEFAULT), FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) - .args(DateTimeV2Type.WILDCARD, VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(DateV2Type.INSTANCE, VarcharType.SYSTEM_DEFAULT) + .args(DateTimeV2Type.WILDCARD, VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java index 09bb6bf829a769..0e47a8f2915e19 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/DateTimeFormatterUtils.java @@ -19,7 +19,6 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal; -import org.apache.doris.nereids.trees.expressions.literal.DateV2Literal; import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; import org.apache.doris.nereids.trees.expressions.literal.TimeV2Literal; @@ -166,25 +165,6 @@ public static String toFormatStringConservative(DateTimeV2Literal datetime, Stri return formatTemporalLiteral(year, month, day, hour, minute, second, microsecond, pattern); } - /** - * Conservative implementation of DATE_FORMAT/TIME_FORMAT for date literals used - * in constant folding. - * - * @param date date literal to format - * @param format format pattern - * @param isTimeFormat true when invoked via time_format, false for date_format - * @return formatted string or null when pattern requires missing date fields - */ - public static String toFormatStringConservative(DateV2Literal date, StringLikeLiteral format, - boolean isTimeFormat) { - int year = isTimeFormat ? 0 : (int) date.getYear(); - int month = isTimeFormat ? 0 : (int) date.getMonth(); - int day = isTimeFormat ? 0 : (int) date.getDay(); - - String pattern = trimFormat(format.getValue()); - return formatTemporalLiteral(year, month, day, 0, 0, 0, 0, pattern); - } - /** * Conservative implementation of TIME_FORMAT for time literals used in constant * folding. diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java index 6b24ffd6b611ad..478b4270dedf71 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java @@ -500,12 +500,12 @@ void testFoldString() { StringLiteral.of("%y %m %d")); rewritten = executor.rewrite(d, context); Assertions.assertEquals(new VarcharLiteral("01 01 01"), rewritten); - d = new DateFormat(DateV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), + d = new DateFormat(DateTimeV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), StringLiteral.of("%y %m %d")); rewritten = executor.rewrite(d, context); Assertions.assertEquals(new VarcharLiteral("01 01 01"), rewritten); - d = new DateFormat(DateV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), + d = new DateFormat(DateTimeV2Literal.fromJavaDateType(LocalDateTime.of(1, 1, 1, 1, 1, 1)), StringLiteral.of(StringUtils.repeat("s", 128) + " ")); rewritten = executor.rewrite(d, context); // Overlength output (>100 chars) is not folded @@ -1164,13 +1164,12 @@ void testArithmeticFold() { void testDateV2TypeDateTimeArithmeticFunctions() { DateV2Literal dateLiteral = new DateV2Literal("1999-12-31"); IntegerLiteral integerLiteral = new IntegerLiteral(30); - VarcharLiteral format = new VarcharLiteral("%Y-%m-%d"); String[] answer = { "'2000-01-30'", "'1999-12-01'", "'2029-12-31'", "'1969-12-31'", "'2002-06-30'", "'1997-06-30'", "'2000-01-30'", "'1999-12-01'", "1999", "4", "12", "6", "31", "365", "31", - "'1999-12-31'", "'1999-12-27'", "'1999-12-31'" + "'1999-12-27'", "'1999-12-31'" }; int answerIdx = 0; @@ -1191,8 +1190,6 @@ void testDateV2TypeDateTimeArithmeticFunctions() { Assertions.assertEquals(DateTimeExtractAndTransform.dayOfYear(dateLiteral).toSql(), answer[answerIdx++]); Assertions.assertEquals(DateTimeExtractAndTransform.day(dateLiteral).toSql(), answer[answerIdx++]); - Assertions.assertEquals(DateTimeExtractAndTransform.dateFormat(dateLiteral, format).toSql(), - answer[answerIdx++]); Assertions.assertEquals(DateTimeExtractAndTransform.toMonday(dateLiteral).toSql(), answer[answerIdx++]); Assertions.assertEquals(DateTimeExtractAndTransform.lastDay(dateLiteral).toSql(), answer[answerIdx]); } diff --git a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out index 9ca2211a71ebcb..a05c5e1a17bc42 100644 --- a/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out +++ b/regression-test/data/doc/sql-manual/sql-functions/doc_date_functions_test.out @@ -2047,6 +2047,7 @@ da fanadur 12 -00:00:01 -00 -0 -12 -12 -12 -00 -01 -01 -000000 -AM -12:00:01 AM -00:00:01 -00:00:01.000000 -0 00 12 12 12 -01 000000 00 AM -00:00:01 12:00:01 AM 12:12 -12 0 12 00 12 AM -000000 01 00 00:00:01 12:00:01 AM 13 -12:34:56.000001 -12 -12 -12 -12 -12 -34 -56 -56 -000001 -PM -12:34:56 PM -12:34:56 -12:34:56.000001 -12 12 12 12 12 -56 000001 34 PM -12:34:56 12:34:56 PM 12:12 -12 12 12 12 12 PM -000001 56 34 12:34:56 12:34:56 PM 14 -838:59:59.999999 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N +15 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N -- !time_format_2 -- 00 0 12 12 12 AM diff --git a/regression-test/suites/correctness_p0/test_function_signature_all_types.groovy b/regression-test/suites/correctness_p0/test_function_signature_all_types.groovy index 0e5f49433d77cc..31ea693bb3ce76 100644 --- a/regression-test/suites/correctness_p0/test_function_signature_all_types.groovy +++ b/regression-test/suites/correctness_p0/test_function_signature_all_types.groovy @@ -461,7 +461,7 @@ suite("test_function_signature_all_types", 'nonConcurrent') { explain { sql("SELECT date_format(k14, '%Y-%m-%d') FROM test_sig_all_types") - notContains("CAST") + contains("CAST") } qt_dateformat_datev2 "SELECT date_format(k14, '%Y-%m-%d') FROM test_sig_all_types ORDER BY k1" diff --git a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy index ee8ce43f9db67a..f64e6391bd381f 100644 --- a/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy +++ b/regression-test/suites/doc/sql-manual/sql-functions/doc_date_functions_test.groovy @@ -1428,7 +1428,8 @@ suite("doc_date_functions_test") { (11, '838:59:59.999999'), (12, '-00:00:01'), (13, '-12:34:56.000001'), - (14, '-838:59:59.999999') + (14, '-838:59:59.999999'), + (15, NULL); """ qt_time_format_1 """SELECT id,