From e8447df2ce89a36ba79acaeea522bda37c2c933d Mon Sep 17 00:00:00 2001 From: jacktengg <18241664+jacktengg@users.noreply.github.com> Date: Wed, 19 Oct 2022 15:32:47 +0800 Subject: [PATCH 1/4] [fix](hashjoin) fix coredump of hash join in ubsan build --- be/CMakeLists.txt | 2 +- be/src/vec/exec/join/vhash_join_node.cpp | 29 +++++++++++++----------- be/src/vec/exec/join/vhash_join_node.h | 4 ++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index df22953202b653..4f246413a3de05 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -556,7 +556,7 @@ SET(CXX_FLAGS_LSAN "${CXX_GCC_FLAGS} -O0 -fsanitize=leak -DLEAK_SANITIZER") # Set the flags to the undefined behavior sanitizer, also known as "ubsan" # Turn on sanitizer and debug symbols to get stack traces: -SET(CXX_FLAGS_UBSAN "${CXX_GCC_FLAGS} -O0 -fno-wrapv -fsanitize=undefined") +SET(CXX_FLAGS_UBSAN "${CXX_GCC_FLAGS} -O0 -fno-wrapv -fsanitize=undefined -DUNDEFINED_BEHAVIOR_SANITIZER") # Set the flags to the thread sanitizer, also known as "tsan" # Turn on sanitizer and debug symbols to get stack traces: diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 41332b68502c31..946193fbd93c6d 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -192,16 +192,19 @@ ProcessHashTableProbe::ProcessHashTableProbe(HashJoinNo : _join_node(join_node), _batch_size(batch_size), _build_blocks(join_node->_build_blocks), - _tuple_is_null_left_flags( - reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) - .get_data()), - _tuple_is_null_right_flags( - reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) - .get_data()), _rows_returned_counter(join_node->_rows_returned_counter), _search_hashtable_timer(join_node->_search_hashtable_timer), _build_side_output_timer(join_node->_build_side_output_timer), - _probe_side_output_timer(join_node->_probe_side_output_timer) {} + _probe_side_output_timer(join_node->_probe_side_output_timer) { + if (join_node->_is_outer_join) { + _tuple_is_null_left_flags = + &(reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) + .get_data()); + _tuple_is_null_right_flags = + &(reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) + .get_data()); + } +} template template @@ -268,8 +271,8 @@ void ProcessHashTableProbe::build_side_output_column( // Dispose right tuple is null flags columns if constexpr (probe_all && !have_other_join_conjunct) { - _tuple_is_null_right_flags.resize(size); - auto* __restrict null_data = _tuple_is_null_right_flags.data(); + _tuple_is_null_right_flags->resize(size); + auto* __restrict null_data = _tuple_is_null_right_flags->data(); for (int i = 0; i < size; ++i) { null_data[i] = _build_block_rows[i] == -1; } @@ -298,7 +301,7 @@ void ProcessHashTableProbe::probe_side_output_column( } if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN && !have_other_join_conjunct) { - _tuple_is_null_left_flags.resize_fill(size, 0); + _tuple_is_null_left_flags->resize_fill(size, 0); } } @@ -616,7 +619,7 @@ Status ProcessHashTableProbe::do_process_with_other_joi for (int i = 0; i < column->size(); ++i) { if (filter_map[i]) { - _tuple_is_null_right_flags.emplace_back(null_map_data[i]); + _tuple_is_null_right_flags->emplace_back(null_map_data[i]); } } output_block->get_by_position(result_column_id).column = std::move(new_filter_column); @@ -672,7 +675,7 @@ Status ProcessHashTableProbe::do_process_with_other_joi *visited_map[i] |= result; filter_size += result; } - _tuple_is_null_left_flags.resize_fill(filter_size, 0); + _tuple_is_null_left_flags->resize_fill(filter_size, 0); } else { // inner join do nothing } @@ -741,7 +744,7 @@ Status ProcessHashTableProbe::process_data_in_hashtable for (int i = 0; i < right_col_idx; ++i) { assert_cast(mcol[i].get())->insert_many_defaults(block_size); } - _tuple_is_null_left_flags.resize_fill(block_size, 1); + _tuple_is_null_left_flags->resize_fill(block_size, 1); } *eos = iter == hash_table_ctx.hash_table.end(); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 011bc244a57f0f..0ab4437f04a5a0 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -198,9 +198,9 @@ struct ProcessHashTableProbe { std::vector _build_block_offsets; std::vector _build_block_rows; // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN - ColumnUInt8::Container& _tuple_is_null_left_flags; + ColumnUInt8::Container* _tuple_is_null_left_flags = nullptr; // only need set the tuple is null in LEFT_OUTER_JOIN and FULL_OUTER_JOIN - ColumnUInt8::Container& _tuple_is_null_right_flags; + ColumnUInt8::Container* _tuple_is_null_right_flags = nullptr; RuntimeProfile::Counter* _rows_returned_counter; RuntimeProfile::Counter* _search_hashtable_timer; From 188ee91a968f93bec1f202703878a9c486bceb7a Mon Sep 17 00:00:00 2001 From: jacktengg <18241664+jacktengg@users.noreply.github.com> Date: Wed, 19 Oct 2022 17:24:45 +0800 Subject: [PATCH 2/4] ormat code --- be/src/vec/exec/join/vhash_join_node.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 946193fbd93c6d..e085a2239d0770 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -199,10 +199,10 @@ ProcessHashTableProbe::ProcessHashTableProbe(HashJoinNo if (join_node->_is_outer_join) { _tuple_is_null_left_flags = &(reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) - .get_data()); + .get_data()); _tuple_is_null_right_flags = &(reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) - .get_data()); + .get_data()); } } From 09bdee7274b5328ae5122172ec23e05654459270 Mon Sep 17 00:00:00 2001 From: jacktengg <18241664+jacktengg@users.noreply.github.com> Date: Wed, 19 Oct 2022 19:20:32 +0800 Subject: [PATCH 3/4] improve according to comment --- be/src/vec/exec/join/vhash_join_node.cpp | 17 +++++++---------- be/src/vec/exec/join/vhash_join_node.h | 4 ++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index e085a2239d0770..5c48827c33b049 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -192,19 +192,16 @@ ProcessHashTableProbe::ProcessHashTableProbe(HashJoinNo : _join_node(join_node), _batch_size(batch_size), _build_blocks(join_node->_build_blocks), + _tuple_is_null_left_flags( + join_node->_is_outer_join ? &(reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) + .get_data()) : nullptr), + _tuple_is_null_right_flags( + join_node->_is_outer_join ? &(reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) + .get_data()) : nullptr), _rows_returned_counter(join_node->_rows_returned_counter), _search_hashtable_timer(join_node->_search_hashtable_timer), _build_side_output_timer(join_node->_build_side_output_timer), - _probe_side_output_timer(join_node->_probe_side_output_timer) { - if (join_node->_is_outer_join) { - _tuple_is_null_left_flags = - &(reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) - .get_data()); - _tuple_is_null_right_flags = - &(reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) - .get_data()); - } -} + _probe_side_output_timer(join_node->_probe_side_output_timer) {} template template diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 0ab4437f04a5a0..2be992d5bdfd02 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -198,9 +198,9 @@ struct ProcessHashTableProbe { std::vector _build_block_offsets; std::vector _build_block_rows; // only need set the tuple is null in RIGHT_OUTER_JOIN and FULL_OUTER_JOIN - ColumnUInt8::Container* _tuple_is_null_left_flags = nullptr; + ColumnUInt8::Container* _tuple_is_null_left_flags; // only need set the tuple is null in LEFT_OUTER_JOIN and FULL_OUTER_JOIN - ColumnUInt8::Container* _tuple_is_null_right_flags = nullptr; + ColumnUInt8::Container* _tuple_is_null_right_flags; RuntimeProfile::Counter* _rows_returned_counter; RuntimeProfile::Counter* _search_hashtable_timer; From 2514b71d695decb5a59602801412ef7fa3e4c89b Mon Sep 17 00:00:00 2001 From: jacktengg <18241664+jacktengg@users.noreply.github.com> Date: Wed, 19 Oct 2022 22:22:49 +0800 Subject: [PATCH 4/4] format code --- be/src/vec/exec/join/vhash_join_node.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 5c48827c33b049..8126e7732d23e5 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -192,12 +192,17 @@ ProcessHashTableProbe::ProcessHashTableProbe(HashJoinNo : _join_node(join_node), _batch_size(batch_size), _build_blocks(join_node->_build_blocks), - _tuple_is_null_left_flags( - join_node->_is_outer_join ? &(reinterpret_cast(*join_node->_tuple_is_null_left_flag_column) - .get_data()) : nullptr), + _tuple_is_null_left_flags(join_node->_is_outer_join + ? &(reinterpret_cast( + *join_node->_tuple_is_null_left_flag_column) + .get_data()) + : nullptr), _tuple_is_null_right_flags( - join_node->_is_outer_join ? &(reinterpret_cast(*join_node->_tuple_is_null_right_flag_column) - .get_data()) : nullptr), + join_node->_is_outer_join + ? &(reinterpret_cast( + *join_node->_tuple_is_null_right_flag_column) + .get_data()) + : nullptr), _rows_returned_counter(join_node->_rows_returned_counter), _search_hashtable_timer(join_node->_search_hashtable_timer), _build_side_output_timer(join_node->_build_side_output_timer),