diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index c6b3b5a989980b..ec1c6ff36b38a6 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -1127,16 +1127,34 @@ struct TimestampToDateTime : IFunction { static FunctionPtr create() { return std::make_shared>(); } + // Handle nulls manually to prevent invalid default values from causing errors + bool use_default_implementation_for_nulls() const override { return false; } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { - const auto& arg_col = block.get_by_position(arguments[0]).column; - const auto& column_data = assert_cast(*arg_col); + // Handle null map manually + auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + NullMap& result_null_map = assert_cast(*result_null_map_column).get_data(); + + ColumnPtr argument_column = block.get_by_position(arguments[0]).column; + const NullMap* null_map = VectorizedUtils::get_null_map(argument_column); + if (null_map) { + VectorizedUtils::update_null_map(result_null_map, *null_map); + } + + // Extract nested column + argument_column = remove_nullable(argument_column); + + const auto& column_data = assert_cast(*argument_column); auto res_col = ColumnDateTimeV2::create(); res_col->get_data().resize_fill(input_rows_count, 0); auto& res_data = res_col->get_data(); const cctz::time_zone& time_zone = context->state()->timezone_obj(); - for (int i = 0; i < input_rows_count; ++i) { + for (size_t i = 0; i < input_rows_count; ++i) { + if (result_null_map[i]) { + continue; + } Int64 value = column_data.get_element(i); if (value < 0) [[unlikely]] { throw_out_of_bound_int(name, value); @@ -1151,7 +1169,13 @@ struct TimestampToDateTime : IFunction { dt.set_microsecond((value % Impl::ratio) * ratio_to_micro); } - block.replace_by_position(result, std::move(res_col)); + if (null_map) { + block.replace_by_position( + result, + ColumnNullable::create(std::move(res_col), std::move(result_null_map_column))); + } else { + block.replace_by_position(result, std::move(res_col)); + } return Status::OK(); } }; diff --git a/be/src/vec/functions/function_other_types_to_date.cpp b/be/src/vec/functions/function_other_types_to_date.cpp index ec6e506d0918f3..347612b1350512 100644 --- a/be/src/vec/functions/function_other_types_to_date.cpp +++ b/be/src/vec/functions/function_other_types_to_date.cpp @@ -545,9 +545,9 @@ struct DateTrunc { auto& res = static_cast(result_column->assume_mutable().get())->get_data(); for (size_t i = 0; i < input_rows_count; ++i) { auto dt = binary_cast(data[i]); - if (!dt.template datetime_trunc()) { - throw_out_of_bound_one_date(name, data[i]); - } + // datetime_trunc only raise only when dt invalid which is impossible. so we dont throw error better. + // then we can use default implementation for nulls with no worry of invalid nested value. + dt.template datetime_trunc(); res[i] = binary_cast(dt); } } @@ -735,9 +735,7 @@ struct UnixTimeStampDateImpl { const auto& ts_value = reinterpret_cast&>(*source.data); int64_t timestamp {}; - const auto valid = - ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); - DCHECK(valid); + ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); col_result_data[i] = trim_timestamp(timestamp, NewVersion); } block.replace_by_position(result, std::move(col_result)); @@ -753,9 +751,7 @@ struct UnixTimeStampDateImpl { const auto& ts_value = reinterpret_cast&>(*source.data); std::pair timestamp {}; - const auto valid = - ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); - DCHECK(valid); + ts_value.unix_timestamp(×tamp, context->state()->timezone_obj()); auto [sec, ms] = trim_timestamp(timestamp, NewVersion); col_result_data[i] = @@ -770,12 +766,7 @@ struct UnixTimeStampDateImpl { } }; -template -struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl { - static DataTypes get_variadic_argument_types() { return {std::make_shared()}; } -}; - -// This impl doesn't use default impl to deal null value. +// Handle nulls manually to prevent invalid default values from causing errors template struct UnixTimeStampStrImpl { static DataTypes get_variadic_argument_types() { @@ -789,9 +780,15 @@ struct UnixTimeStampStrImpl { return std::make_shared(16, 6); } + static bool use_default_implementation_for_nulls() { return false; } + static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) { + // Handle null map manually + auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + NullMap& result_null_map = assert_cast(*result_null_map_column).get_data(); + ColumnPtr col_left = nullptr, col_right = nullptr; bool source_const = false, format_const = false; std::tie(col_left, source_const) = @@ -799,12 +796,31 @@ struct UnixTimeStampStrImpl { std::tie(col_right, format_const) = unpack_if_const(block.get_by_position(arguments[1]).column); + // Update result null map from input null maps + const NullMap* null_map_left = + VectorizedUtils::get_null_map(block.get_by_position(arguments[0]).column); + const NullMap* null_map_right = + VectorizedUtils::get_null_map(block.get_by_position(arguments[1]).column); + if (null_map_left) { + VectorizedUtils::update_null_map(result_null_map, *null_map_left, source_const); + } + if (null_map_right) { + VectorizedUtils::update_null_map(result_null_map, *null_map_right, format_const); + } + + // Extract nested columns + col_left = remove_nullable(col_left); + col_right = remove_nullable(col_right); + auto col_result = ColumnDecimal64::create(input_rows_count, 6); auto& col_result_data = col_result->get_data(); const auto* col_source = assert_cast(col_left.get()); const auto* col_format = assert_cast(col_right.get()); - for (int i = 0; i < input_rows_count; i++) { + for (size_t i = 0; i < input_rows_count; i++) { + if (result_null_map[i]) { + continue; + } StringRef source = col_source->get_data_at(index_check_const(i, source_const)); StringRef fmt = col_format->get_data_at(index_check_const(i, format_const)); @@ -829,7 +845,13 @@ struct UnixTimeStampStrImpl { } } - block.replace_by_position(result, std::move(col_result)); + if (null_map_left || null_map_right) { + block.replace_by_position(result, + ColumnNullable::create(std::move(col_result), + std::move(result_null_map_column))); + } else { + block.replace_by_position(result, std::move(col_result)); + } return Status::OK(); } @@ -855,6 +877,13 @@ class FunctionUnixTimestamp : public IFunction { return Impl::get_variadic_argument_types(); } + bool use_default_implementation_for_nulls() const override { + if constexpr (requires { Impl::use_default_implementation_for_nulls(); }) { + return Impl::use_default_implementation_for_nulls(); + } + return true; + } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); @@ -881,6 +910,13 @@ class FunctionUnixTimestampNew : public IFunction { return Impl::get_variadic_argument_types(); } + bool use_default_implementation_for_nulls() const override { + if constexpr (requires { Impl::use_default_implementation_for_nulls(); }) { + return Impl::use_default_implementation_for_nulls(); + } + return true; + } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { return Impl::execute_impl(context, block, arguments, result, input_rows_count); @@ -966,11 +1002,27 @@ class FunctionDateOrDateTimeToDate : public IFunction { return {std::make_shared::DataType>()}; } - //ATTN: no need to replace null value now because last_day and to_monday both process boundary case well. - // may need to change if support more functions + // Handle nulls manually to prevent invalid default values from causing errors + bool use_default_implementation_for_nulls() const override { return false; } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { - return Impl::execute_impl(context, block, arguments, result, input_rows_count); + // Handle null map manually - update result null map from input null maps upfront + auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); + NullMap& result_null_map = assert_cast(*result_null_map_column).get_data(); + + ColumnPtr argument_column = block.get_by_position(arguments[0]).column; + const NullMap* null_map = VectorizedUtils::get_null_map(argument_column); + if (null_map) { + VectorizedUtils::update_null_map(result_null_map, *null_map); + } + + // Extract nested column + argument_column = remove_nullable(argument_column); + + return Impl::execute_impl(context, block, arguments, result, input_rows_count, + argument_column, result_null_map, + std::move(result_null_map_column)); } }; @@ -988,26 +1040,22 @@ struct LastDayImpl { static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, - size_t input_rows_count) { + size_t input_rows_count, const ColumnPtr& argument_column, + NullMap& result_null_map, + ColumnUInt8::MutablePtr result_null_map_column) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); - ColumnPtr res_column; - ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column); - if (is_nullable) { - auto null_map = ColumnUInt8::create(input_rows_count, 0); - auto data_col = assert_cast(argument_column.get()); - res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assume_mutable().get())->get_data()); + auto data_col = assert_cast(argument_column.get()); + auto res_column = ResultColumnType::create(input_rows_count); + execute_straight( + input_rows_count, data_col->get_data(), + static_cast(res_column->assume_mutable().get())->get_data(), + result_null_map); + if (is_nullable) { block.replace_by_position(result, - ColumnNullable::create(res_column, std::move(null_map))); + ColumnNullable::create(std::move(res_column), + std::move(result_null_map_column))); } else { - auto data_col = assert_cast(argument_column.get()); - res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assume_mutable().get())->get_data()); block.replace_by_position(result, std::move(res_column)); } return Status::OK(); @@ -1015,8 +1063,12 @@ struct LastDayImpl { static void execute_straight(size_t input_rows_count, const PaddedPODArray& data_col, - PaddedPODArray& res_data) { - for (int i = 0; i < input_rows_count; i++) { + PaddedPODArray& res_data, + const NullMap& null_map) { + for (size_t i = 0; i < input_rows_count; i++) { + if (null_map[i]) { + continue; + } const auto& cur_data = data_col[i]; auto ts_value = binary_cast(cur_data); if (!ts_value.is_valid_date()) { @@ -1065,36 +1117,35 @@ struct ToMondayImpl { static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, - size_t input_rows_count) { + size_t input_rows_count, const ColumnPtr& argument_column, + NullMap& result_null_map, + ColumnUInt8::MutablePtr result_null_map_column) { const auto is_nullable = block.get_by_position(result).type->is_nullable(); - ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column); - ColumnPtr res_column; - if (is_nullable) { - auto null_map = ColumnUInt8::create(input_rows_count, 0); - auto data_col = assert_cast(argument_column.get()); - res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assume_mutable().get())->get_data()); + auto data_col = assert_cast(argument_column.get()); + auto res_column = ResultColumnType::create(input_rows_count); + execute_straight( + input_rows_count, data_col->get_data(), + static_cast(res_column->assume_mutable().get())->get_data(), + result_null_map); + if (is_nullable) { block.replace_by_position(result, - ColumnNullable::create(res_column, std::move(null_map))); + ColumnNullable::create(std::move(res_column), + std::move(result_null_map_column))); } else { - auto data_col = assert_cast(argument_column.get()); - res_column = ResultColumnType::create(input_rows_count); - execute_straight( - input_rows_count, data_col->get_data(), - static_cast(res_column->assume_mutable().get())->get_data()); block.replace_by_position(result, std::move(res_column)); } return Status::OK(); } - // v1, throws on invalid date static void execute_straight(size_t input_rows_count, const PaddedPODArray& data_col, - PaddedPODArray& res_data) { - for (int i = 0; i < input_rows_count; i++) { + PaddedPODArray& res_data, + const NullMap& null_map) { + for (size_t i = 0; i < input_rows_count; i++) { + if (null_map[i]) { + continue; + } const auto& cur_data = data_col[i]; auto ts_value = binary_cast(cur_data); if (!ts_value.is_valid_date()) [[unlikely]] { diff --git a/regression-test/data/correctness_p0/test_date_trunc_error.out b/regression-test/data/correctness_p0/test_date_trunc_error.out new file mode 100644 index 00000000000000..5ba0c61f6136da --- /dev/null +++ b/regression-test/data/correctness_p0/test_date_trunc_error.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 match 2024-01-15T10:23:45 2024-01-15T00:00 2024-01-31 2024-01-15 2024-01-15T10:23:45 1705285425.000000 +2 no_match \N \N \N \N \N \N +3 match 2024-02-20T08:00 2024-02-20T00:00 2024-02-29 2024-02-19 2024-02-20T08:00 1708387200.000000 + diff --git a/regression-test/suites/correctness_p0/test_date_trunc_error.groovy b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy new file mode 100644 index 00000000000000..68f770469fa664 --- /dev/null +++ b/regression-test/suites/correctness_p0/test_date_trunc_error.groovy @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_datelike_false_alarm") { + sql "DROP TABLE IF EXISTS dt_t_left;" + sql """ + CREATE TABLE dt_t_left ( + id INT, + name STRING + ) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql "DROP TABLE IF EXISTS dt_t_right;" + sql """ + CREATE TABLE dt_t_right ( + id INT, + event_time DATETIME + ) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + INSERT INTO dt_t_left VALUES + (1, 'match'), + (2, 'no_match'), + (3, 'match'); + """ + sql """ + INSERT INTO dt_t_right VALUES + (1, '2024-01-15 10:23:45'), + (3, '2024-02-20 08:00:00'); + """ + sql "DROP TABLE IF EXISTS dt_one_row;" + sql """ + CREATE TABLE dt_one_row ( + k INT + ) + DISTRIBUTED BY HASH(k) BUCKETS 1 + PROPERTIES ("replication_num" = "1"); + """ + sql "INSERT INTO dt_one_row VALUES (1);" + + qt_sql """ + SELECT + t.id, + t.name, + t.event_time, + date_trunc('day', t.event_time) AS trunc_day, + last_day(t.event_time) AS last_day, + to_monday(t.event_time) AS to_monday, + from_microsecond( unix_timestamp(t.event_time) * 1000000 ) AS microsecond, + unix_timestamp( CAST(t.event_time AS VARCHAR), "%Y-%m-%d %H:%i:%s" ) AS unix_timestamp + FROM ( + SELECT + l.id, + l.name, + r.event_time + FROM dt_t_left l + LEFT JOIN dt_t_right r + ON l.id = r.id + ) t + LEFT JOIN dt_one_row o + ON o.k = 1 + ORDER BY t.id; + """ +} \ No newline at end of file