Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions be/src/vec/functions/function_date_or_datetime_computation.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,16 +1127,34 @@ struct TimestampToDateTime : IFunction {

static FunctionPtr create() { return std::make_shared<TimestampToDateTime<Impl>>(); }

// Handle nulls manually to prevent invalid default values from causing errors
bool use_default_implementation_for_nulls() const override { return false; }

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
const auto& arg_col = block.get_by_position(arguments[0]).column;
const auto& column_data = assert_cast<const ColumnInt64&>(*arg_col);
// Handle null map manually
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
NullMap& result_null_map = assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();

ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
const NullMap* null_map = VectorizedUtils::get_null_map(argument_column);
if (null_map) {
VectorizedUtils::update_null_map(result_null_map, *null_map);
}

// Extract nested column
argument_column = remove_nullable(argument_column);

const auto& column_data = assert_cast<const ColumnInt64&>(*argument_column);
auto res_col = ColumnDateTimeV2::create();
res_col->get_data().resize_fill(input_rows_count, 0);
auto& res_data = res_col->get_data();
const cctz::time_zone& time_zone = context->state()->timezone_obj();

for (int i = 0; i < input_rows_count; ++i) {
for (size_t i = 0; i < input_rows_count; ++i) {
if (result_null_map[i]) {
continue;
}
Int64 value = column_data.get_element(i);
if (value < 0) [[unlikely]] {
throw_out_of_bound_int(name, value);
Expand All @@ -1151,7 +1169,13 @@ struct TimestampToDateTime : IFunction {
dt.set_microsecond((value % Impl::ratio) * ratio_to_micro);
}

block.replace_by_position(result, std::move(res_col));
if (null_map) {
block.replace_by_position(
result,
ColumnNullable::create(std::move(res_col), std::move(result_null_map_column)));
} else {
block.replace_by_position(result, std::move(res_col));
}
return Status::OK();
}
};
Expand Down
165 changes: 108 additions & 57 deletions be/src/vec/functions/function_other_types_to_date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,9 @@ struct DateTrunc {
auto& res = static_cast<ColumnType*>(result_column->assume_mutable().get())->get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
auto dt = binary_cast<NativeType, DateValueType>(data[i]);
if (!dt.template datetime_trunc<Unit>()) {
throw_out_of_bound_one_date<DateValueType>(name, data[i]);
}
// datetime_trunc only raise only when dt invalid which is impossible. so we dont throw error better.
// then we can use default implementation for nulls with no worry of invalid nested value.
dt.template datetime_trunc<Unit>();
res[i] = binary_cast<DateValueType, NativeType>(dt);
}
}
Expand Down Expand Up @@ -735,9 +735,7 @@ struct UnixTimeStampDateImpl {
const auto& ts_value =
reinterpret_cast<const DateV2Value<DateV2ValueType>&>(*source.data);
int64_t timestamp {};
const auto valid =
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
DCHECK(valid);
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
col_result_data[i] = trim_timestamp(timestamp, NewVersion);
}
block.replace_by_position(result, std::move(col_result));
Expand All @@ -753,9 +751,7 @@ struct UnixTimeStampDateImpl {
const auto& ts_value =
reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>(*source.data);
std::pair<int64_t, int64_t> timestamp {};
const auto valid =
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
DCHECK(valid);
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());

auto [sec, ms] = trim_timestamp(timestamp, NewVersion);
col_result_data[i] =
Expand All @@ -770,12 +766,7 @@ struct UnixTimeStampDateImpl {
}
};

template <typename DateType, bool NewVersion = false>
struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl<DateType, NewVersion> {
static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; }
};

// This impl doesn't use default impl to deal null value.
// Handle nulls manually to prevent invalid default values from causing errors
template <bool NewVersion = false>
struct UnixTimeStampStrImpl {
static DataTypes get_variadic_argument_types() {
Expand All @@ -789,22 +780,47 @@ struct UnixTimeStampStrImpl {
return std::make_shared<DataTypeDecimal64>(16, 6);
}

static bool use_default_implementation_for_nulls() { return false; }

static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
// Handle null map manually
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
NullMap& result_null_map = assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();

ColumnPtr col_left = nullptr, col_right = nullptr;
bool source_const = false, format_const = false;
std::tie(col_left, source_const) =
unpack_if_const(block.get_by_position(arguments[0]).column);
std::tie(col_right, format_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);

// Update result null map from input null maps
const NullMap* null_map_left =
VectorizedUtils::get_null_map(block.get_by_position(arguments[0]).column);
const NullMap* null_map_right =
VectorizedUtils::get_null_map(block.get_by_position(arguments[1]).column);
if (null_map_left) {
VectorizedUtils::update_null_map(result_null_map, *null_map_left, source_const);
}
if (null_map_right) {
VectorizedUtils::update_null_map(result_null_map, *null_map_right, format_const);
}

// Extract nested columns
col_left = remove_nullable(col_left);
col_right = remove_nullable(col_right);

auto col_result = ColumnDecimal64::create(input_rows_count, 6);
auto& col_result_data = col_result->get_data();

const auto* col_source = assert_cast<const ColumnString*>(col_left.get());
const auto* col_format = assert_cast<const ColumnString*>(col_right.get());
for (int i = 0; i < input_rows_count; i++) {
for (size_t i = 0; i < input_rows_count; i++) {
if (result_null_map[i]) {
continue;
}
StringRef source = col_source->get_data_at(index_check_const(i, source_const));
StringRef fmt = col_format->get_data_at(index_check_const(i, format_const));

Expand All @@ -829,7 +845,13 @@ struct UnixTimeStampStrImpl {
}
}

block.replace_by_position(result, std::move(col_result));
if (null_map_left || null_map_right) {
block.replace_by_position(result,
ColumnNullable::create(std::move(col_result),
std::move(result_null_map_column)));
} else {
block.replace_by_position(result, std::move(col_result));
}

return Status::OK();
}
Expand All @@ -855,6 +877,13 @@ class FunctionUnixTimestamp : public IFunction {
return Impl::get_variadic_argument_types();
}

bool use_default_implementation_for_nulls() const override {
if constexpr (requires { Impl::use_default_implementation_for_nulls(); }) {
return Impl::use_default_implementation_for_nulls();
}
return true;
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
Expand All @@ -881,6 +910,13 @@ class FunctionUnixTimestampNew : public IFunction {
return Impl::get_variadic_argument_types();
}

bool use_default_implementation_for_nulls() const override {
if constexpr (requires { Impl::use_default_implementation_for_nulls(); }) {
return Impl::use_default_implementation_for_nulls();
}
return true;
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
Expand Down Expand Up @@ -966,11 +1002,27 @@ class FunctionDateOrDateTimeToDate : public IFunction {
return {std::make_shared<typename PrimitiveTypeTraits<PType>::DataType>()};
}

//ATTN: no need to replace null value now because last_day and to_monday both process boundary case well.
// may need to change if support more functions
// Handle nulls manually to prevent invalid default values from causing errors
bool use_default_implementation_for_nulls() const override { return false; }

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl<PType>::execute_impl(context, block, arguments, result, input_rows_count);
// Handle null map manually - update result null map from input null maps upfront
auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
NullMap& result_null_map = assert_cast<ColumnUInt8&>(*result_null_map_column).get_data();

ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
const NullMap* null_map = VectorizedUtils::get_null_map(argument_column);
if (null_map) {
VectorizedUtils::update_null_map(result_null_map, *null_map);
}

// Extract nested column
argument_column = remove_nullable(argument_column);

return Impl<PType>::execute_impl(context, block, arguments, result, input_rows_count,
argument_column, result_null_map,
std::move(result_null_map_column));
}
};

Expand All @@ -988,35 +1040,35 @@ struct LastDayImpl {

static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
size_t input_rows_count, const ColumnPtr& argument_column,
NullMap& result_null_map,
ColumnUInt8::MutablePtr result_null_map_column) {
const auto is_nullable = block.get_by_position(result).type->is_nullable();
ColumnPtr res_column;
ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column);
if (is_nullable) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
auto res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
result_null_map);

if (is_nullable) {
block.replace_by_position(result,
ColumnNullable::create(res_column, std::move(null_map)));
ColumnNullable::create(std::move(res_column),
std::move(result_null_map_column)));
} else {
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result, std::move(res_column));
}
return Status::OK();
}

static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
PaddedPODArray<ResultNativeType>& res_data,
const NullMap& null_map) {
for (size_t i = 0; i < input_rows_count; i++) {
if (null_map[i]) {
continue;
}
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) {
Expand Down Expand Up @@ -1065,36 +1117,35 @@ struct ToMondayImpl {

static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
size_t input_rows_count, const ColumnPtr& argument_column,
NullMap& result_null_map,
ColumnUInt8::MutablePtr result_null_map_column) {
const auto is_nullable = block.get_by_position(result).type->is_nullable();
ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column);
ColumnPtr res_column;
if (is_nullable) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
auto res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data(),
result_null_map);

if (is_nullable) {
block.replace_by_position(result,
ColumnNullable::create(res_column, std::move(null_map)));
ColumnNullable::create(std::move(res_column),
std::move(result_null_map_column)));
} else {
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result, std::move(res_column));
}
return Status::OK();
}

// v1, throws on invalid date
static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
PaddedPODArray<ResultNativeType>& res_data,
const NullMap& null_map) {
for (size_t i = 0; i < input_rows_count; i++) {
if (null_map[i]) {
continue;
}
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) [[unlikely]] {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 match 2024-01-15T10:23:45 2024-01-15T00:00 2024-01-31 2024-01-15 2024-01-15T10:23:45 1705285425.000000
2 no_match \N \N \N \N \N \N
3 match 2024-02-20T08:00 2024-02-20T00:00 2024-02-29 2024-02-19 2024-02-20T08:00 1708387200.000000

Loading
Loading