Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions be/src/olap/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1236,11 +1236,11 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>
CppType tmp = *reinterpret_cast<const CppType*>(src);
DateV2Value<DateV2ValueType> value =
binary_cast<CppType, DateV2Value<DateV2ValueType>>(tmp);
string format = "%Y-%m-%d";
string res;
res.resize(12);
res.reserve(12);
value.to_format_string(format.c_str(), format.size(), res.data());
std::string format = "%Y-%m-%d";
std::string res;
res.resize(12 + SAFE_FORMAT_STRING_MARGIN);
value.to_format_string_conservative(format.c_str(), format.size(), res.data(),
12 + SAFE_FORMAT_STRING_MARGIN);
return res;
}

Expand Down Expand Up @@ -1277,9 +1277,9 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>
binary_cast<CppType, DateV2Value<DateTimeV2ValueType>>(tmp);
string format = "%Y-%m-%d %H:%i:%s.%f";
string res;
res.resize(30);
res.reserve(30);
value.to_format_string(format.c_str(), format.size(), res.data());
res.resize(30 + SAFE_FORMAT_STRING_MARGIN);
value.to_format_string_conservative(format.c_str(), format.size(), res.data(),
30 + SAFE_FORMAT_STRING_MARGIN);
return res;
}

Expand Down
16 changes: 7 additions & 9 deletions be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <arrow/builder.h>

#include <chrono> // IWYU pragma: keep
#include <type_traits>

#include "vec/columns/column_const.h"
#include "vec/io/io_helper.h"
Expand All @@ -32,8 +31,7 @@ enum {
DIVISOR_FOR_NANO = 1000000000
};

namespace doris {
namespace vectorized {
namespace doris::vectorized {
static const int64_t timestamp_threshold = -2177481943;
static const int64_t timestamp_diff = 343;
static const int64_t micr_to_nano_second = 1000;
Expand All @@ -57,8 +55,9 @@ Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column

if (options.date_olap_format) {
std::string format = "%Y-%m-%d %H:%i:%s.%f";
char buf[30];
val.to_format_string(format.c_str(), format.size(), buf);
char buf[30 + SAFE_FORMAT_STRING_MARGIN];
val.to_format_string_conservative(format.c_str(), format.size(), buf,
30 + SAFE_FORMAT_STRING_MARGIN);
std::string s = std::string(buf);
bw.write(s.c_str(), s.length());
} else {
Expand Down Expand Up @@ -132,7 +131,7 @@ void DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
auto& col_data = static_cast<ColumnDateTimeV2&>(column).get_data();
int64_t divisor = 1;
if (arrow_array->type()->id() == arrow::Type::TIMESTAMP) {
auto concrete_array = dynamic_cast<const arrow::TimestampArray*>(arrow_array);
const auto* concrete_array = dynamic_cast<const arrow::TimestampArray*>(arrow_array);
const auto type = std::static_pointer_cast<arrow::TimestampType>(arrow_array->type());
switch (type->unit()) {
case arrow::TimeUnit::type::SECOND: {
Expand Down Expand Up @@ -176,7 +175,7 @@ template <bool is_binary_format>
Status DataTypeDateTimeV2SerDe::_write_column_to_mysql(const IColumn& column,
MysqlRowBuffer<is_binary_format>& result,
int row_idx, bool col_const) const {
auto& data = assert_cast<const ColumnVector<UInt64>&>(column).get_data();
const auto& data = assert_cast<const ColumnVector<UInt64>&>(column).get_data();
const auto col_index = index_check_const(row_idx, col_const);
DateV2Value<DateTimeV2ValueType> date_val =
binary_cast<UInt64, DateV2Value<DateTimeV2ValueType>>(data[col_index]);
Expand Down Expand Up @@ -245,5 +244,4 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone,
return Status::OK();
}

} // namespace vectorized
} // namespace doris
} // namespace doris::vectorized
10 changes: 6 additions & 4 deletions be/src/vec/functions/date_time_transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,9 @@ struct DateFormatImpl {
if (format.size > 128) {
return std::pair {offset, true};
}
char buf[128];
if (!dt.to_format_string(format.data, format.size, buf)) {
char buf[100 + SAFE_FORMAT_STRING_MARGIN];
if (!dt.to_format_string_conservative(format.data, format.size, buf,
100 + SAFE_FORMAT_STRING_MARGIN)) {
return std::pair {offset, true};
}

Expand Down Expand Up @@ -227,8 +228,9 @@ struct FromUnixTimeImpl {
}
dt.from_unixtime(val, time_zone);

char buf[128];
if (!dt.to_format_string(format.data, format.size, buf)) {
char buf[100 + SAFE_FORMAT_STRING_MARGIN];
if (!dt.to_format_string_conservative(format.data, format.size, buf,
100 + SAFE_FORMAT_STRING_MARGIN)) {
return std::pair {offset, true};
}

Expand Down
35 changes: 23 additions & 12 deletions be/src/vec/runtime/vdatetime_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,7 @@ bool VecDateTimeValue::from_date_daynr(uint64_t daynr) {
return true;
}

/// @return: tail
static char* int_to_str(uint64_t val, char* to) {
char buf[64];
char* ptr = buf;
Expand All @@ -557,7 +558,6 @@ static char* int_to_str(uint64_t val, char* to) {
while (ptr > buf) {
*to++ = *--ptr;
}

return to;
}

Expand All @@ -568,18 +568,17 @@ static char* append_string(const char* from, char* to) {
return to;
}

static char* append_with_prefix(const char* str, int str_len, char prefix, int full_len, char* to) {
int len = (str_len > full_len) ? str_len : full_len;
len -= str_len;
while (len-- > 0) {
// push prefix;
static char* append_with_prefix(const char* str, int str_len, char prefix, int target_len,
char* to) {
// full_len is the lower bound. if less, use prefix to pad. if greater, accept all.
int diff = target_len - str_len;
// use prefix to pad
while (diff-- > 0) { // won't be INT_MIN. it's ok
*to++ = prefix;
}
while (str_len-- > 0) {
*to++ = *str++;
}

return to;
memcpy(to, str, str_len);
return to + str_len;
}

int VecDateTimeValue::compute_format_len(const char* format, int len) {
Expand Down Expand Up @@ -675,10 +674,12 @@ char* write_four_digits_to_string(int number, char* dst) {
return dst + 4;
}

bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const {
bool VecDateTimeValue::to_format_string_conservative(const char* format, int len, char* to,
int max_valid_length) const {
if (check_range(_year, _month, _day, _hour, _minute, _second, _type)) {
return false;
}
char* const begin = to; // to check written bytes
char buf[64];
char* cursor = buf;
char* pos = nullptr;
Expand All @@ -687,6 +688,9 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c
char ch = '\0';

while (ptr < end) {
if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] {
return false;
}
if (*ptr != '%' || (ptr + 1) == end) {
*to++ = *ptr++;
continue;
Expand Down Expand Up @@ -934,6 +938,7 @@ bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) c
break;
}
default:
// put it literal
*to++ = ch;
break;
}
Expand Down Expand Up @@ -3435,10 +3440,12 @@ void DateV2Value<T>::set_microsecond(uint32_t microsecond) {
}

template <typename T>
bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) const {
bool DateV2Value<T>::to_format_string_conservative(const char* format, int len, char* to,
int max_valid_length) const {
if (is_invalid(year(), month(), day(), hour(), minute(), second(), microsecond())) {
return false;
}
char* const begin = to; // to check written bytes
char buf[64];
char* pos = nullptr;
char* cursor = buf;
Expand All @@ -3447,6 +3454,9 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con
char ch = '\0';

while (ptr < end) {
if (to - begin + SAFE_FORMAT_STRING_MARGIN > max_valid_length) [[unlikely]] {
return false;
}
if (*ptr != '%' || (ptr + 1) == end) {
*to++ = *ptr++;
continue;
Expand Down Expand Up @@ -3680,6 +3690,7 @@ bool DateV2Value<T>::to_format_string(const char* format, int len, char* to) con
break;
}
default:
// put it literal
*to++ = ch;
break;
}
Expand Down
17 changes: 14 additions & 3 deletions be/src/vec/runtime/vdatetime_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ struct TimeInterval {

enum TimeType { TIME_TIME = 1, TIME_DATE = 2, TIME_DATETIME = 3 };

constexpr int SAFE_FORMAT_STRING_MARGIN = 12;

// Used to compute week
const int WEEK_MONDAY_FIRST = 1;
const int WEEK_YEAR = 2;
Expand Down Expand Up @@ -391,8 +393,12 @@ class VecDateTimeValue { // Now this type is a temp solution with little changes

char* to_string(char* to) const;

// Convert this datetime value to string by the format string
bool to_format_string(const char* format, int len, char* to) const;
// Convert this datetime value to string by the format string.
// for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length.
// so need a little big buffer and its length as max_valid_length to make sure store valid data.
// to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length
bool to_format_string_conservative(const char* format, int len, char* to,
int max_valid_length) const;

// compute the length of data format pattern
static int compute_format_len(const char* format, int len);
Expand Down Expand Up @@ -818,7 +824,12 @@ class DateV2Value {
return val;
}

bool to_format_string(const char* format, int len, char* to) const;
// Convert this datetime value to string by the format string.
// for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length.
// so need a little big buffer and its length as max_valid_length to make sure store valid data.
// to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length
bool to_format_string_conservative(const char* format, int len, char* to,
int max_valid_length) const;

bool from_date_format_str(const char* format, int format_len, const char* value,
int value_len) {
Expand Down
6 changes: 5 additions & 1 deletion regression-test/data/datatype_p0/date/test_from_unixtime.out
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,8 @@
\N

-- !sql10 --
\N
\N

-- !long --
\N

Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,9 @@ true
-- !sql --
2022 31 4

-- !sql_date_format_long --
\N

-- !sql --
\N

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,5 @@ suite("test_from_unixtime") {
qt_sql9 "select from_unixtime(-7629445119491449, \"%Y-%m-%d\");"
qt_sql10 "select from_unixtime(-7629445119491449);"

qt_long "select from_unixtime(1196440219, '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b');"
}
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ suite("test_date_function") {
qt_sql """ select date_format('1999-01-01', '%X %V'); """
qt_sql """ select date_format('2025-01-01', '%X %V'); """
qt_sql """ select date_format('2022-08-04', '%X %V %w'); """
qt_sql_date_format_long """ select date_format(cast('2011-06-24' as DATETIMEV2(0)), '%f %V %f %l %V %I %S %p %w %r %j %f %l %I %D %w %j %D %e %s %V %f %D %M %s %X %U %v %c %u %x %r %j %a %h %s %m %a %v %u %b') """
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %H:%i:%s %Y'); """
qt_sql """ select STR_TO_DATE('Tue Jul 12 20:00:45 CST 2022', '%a %b %e %T CST %Y'); """
qt_sql """ select STR_TO_DATE('2018-4-2 15:3:28','%Y-%m-%d %H:%i:%s'); """
Expand Down