From 53880dfdda551321e57c5c59abcef5bad2614df1 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 3 Mar 2025 15:26:14 +0800 Subject: [PATCH 1/7] fix wrong chars size on TransformerToStringOneArgument and add more check --- be/src/vec/columns/column_string.cpp | 3 ++- be/src/vec/columns/column_string.h | 18 +++++++++++++++--- be/src/vec/functions/date_time_transforms.h | 2 ++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 1d2e1c092b0054..153e8e62b9b9bc 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -55,7 +55,7 @@ void ColumnStr::sanity_check_simple() const { #ifndef NDEBUG auto count = offsets.size(); if (chars.size() != offsets[count - 1]) { - throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: ", + throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}", count, chars.size(), offsets[count - 1])); } if (offsets[-1] != 0) { @@ -639,6 +639,7 @@ template void ColumnStr::compare_internal(size_t rhs_row_id, const IColumn& rhs, int nan_direction_hint, int direction, std::vector& cmp_res, uint8* __restrict filter) const { + sanity_check_simple(); auto sz = offsets.size(); DCHECK(cmp_res.size() == sz); const auto& cmp_base = diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 287ef66a2d9f14..14755e5f7f34d4 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -131,11 +131,13 @@ class ColumnStr final : public COWHelper> { Field operator[](size_t n) const override { assert(n < size()); + sanity_check_simple(); return Field(String(reinterpret_cast(&chars[offset_at(n)]), size_at(n))); } void get(size_t n, Field& res) const override { assert(n < size()); + sanity_check_simple(); if (res.get_type() == Field::Types::JSONB) { // Handle JsonbField res = JsonbField(reinterpret_cast(&chars[offset_at(n)]), size_at(n)); @@ -146,6 +148,7 @@ class ColumnStr final : public COWHelper> { StringRef get_data_at(size_t n) const override { DCHECK_LT(n, size()); + sanity_check_simple(); return StringRef(&chars[offset_at(n)], size_at(n)); } @@ -527,11 +530,20 @@ class ColumnStr final : public COWHelper> { return typeid(rhs) == typeid(ColumnStr); } - Chars& get_chars() { return chars; } + Chars& get_chars() { + sanity_check_simple(); + return chars; + } const Chars& get_chars() const { return chars; } - auto& get_offsets() { return offsets; } - const auto& get_offsets() const { return offsets; } + auto& get_offsets() { + sanity_check_simple(); + return offsets; + } + const auto& get_offsets() const { + sanity_check_simple(); + return offsets; + } void clear() override { chars.clear(); diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h index 301effe80e0df8..63ed635df45431 100644 --- a/be/src/vec/functions/date_time_transforms.h +++ b/be/src/vec/functions/date_time_transforms.h @@ -317,6 +317,7 @@ struct TransformerToStringOneArgument { cast_set(Transform::execute(date_time_value, res_data, offset)); null_map[i] = !date_time_value.is_valid_date(); } + res_data.resize(res_offsets[res_offsets.size() - 1]); } static void vector(FunctionContext* context, @@ -336,6 +337,7 @@ struct TransformerToStringOneArgument { cast_set(Transform::execute(date_time_value, res_data, offset)); DCHECK(date_time_value.is_valid_date()); } + res_data.resize(res_offsets[res_offsets.size() - 1]); } }; From 6175165d9eecbb72a437a0d0e8ca69e386f11a73 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 3 Mar 2025 18:07:54 +0800 Subject: [PATCH 2/7] update --- be/src/vec/columns/column_string.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 153e8e62b9b9bc..14bb5c1cb1dddb 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -56,7 +56,7 @@ void ColumnStr::sanity_check_simple() const { auto count = offsets.size(); if (chars.size() != offsets[count - 1]) { throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}", - count, chars.size(), offsets[count - 1])); + count, chars.size(), count - 1, offsets[count - 1])); } if (offsets[-1] != 0) { throw Exception(Status::InternalError("wrong offsets[-1]: {}", offsets[-1])); From a6cb10b93d39cd8acd89c574db299f2df30b53c4 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 3 Mar 2025 19:11:46 +0800 Subject: [PATCH 3/7] fix --- be/src/vec/columns/column_string.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 14bb5c1cb1dddb..aacad59f524208 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -53,7 +53,7 @@ void ColumnStr::sanity_check() const { template void ColumnStr::sanity_check_simple() const { #ifndef NDEBUG - auto count = offsets.size(); + int count = offsets.size(); if (chars.size() != offsets[count - 1]) { throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}", count, chars.size(), count - 1, offsets[count - 1])); From 1db684688af3098d33bdc0f24b0fcabe905f853e Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 3 Mar 2025 19:11:53 +0800 Subject: [PATCH 4/7] fix --- be/src/vec/columns/column_string.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index aacad59f524208..c28d5f34f29721 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -41,7 +41,7 @@ void ColumnStr::sanity_check() const { #ifndef NDEBUG sanity_check_simple(); auto count = offsets.size(); - for (size_t i = 0; i < count; ++i) { + for (int i = 0; i < count; ++i) { if (offsets[i] < offsets[i - 1]) { throw Exception(Status::InternalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}", count, i, offsets[i], i - 1, offsets[i - 1])); From 3ad7580f9f24d0c8b31b7a9a68ec38621e15aced Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 3 Mar 2025 19:23:17 +0800 Subject: [PATCH 5/7] update --- be/src/vec/columns/column_string.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index c28d5f34f29721..98c0b7020cbfda 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -40,7 +40,7 @@ template void ColumnStr::sanity_check() const { #ifndef NDEBUG sanity_check_simple(); - auto count = offsets.size(); + auto count = cast_set(offsets.size()); for (int i = 0; i < count; ++i) { if (offsets[i] < offsets[i - 1]) { throw Exception(Status::InternalError("row count: {}, offsets[{}]: {}, offsets[{}]: {}", @@ -53,7 +53,7 @@ void ColumnStr::sanity_check() const { template void ColumnStr::sanity_check_simple() const { #ifndef NDEBUG - int count = offsets.size(); + auto count = cast_set(offsets.size()); if (chars.size() != offsets[count - 1]) { throw Exception(Status::InternalError("row count: {}, chars.size(): {}, offset[{}]: {}", count, chars.size(), count - 1, offsets[count - 1])); From e3fb20ae267bab2710632290a6d6e18940e0ffc6 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Tue, 4 Mar 2025 11:09:03 +0800 Subject: [PATCH 6/7] update check --- be/src/vec/columns/column_string.h | 15 +++------------ be/src/vec/exprs/vexpr_context.cpp | 4 ++++ be/test/vec/function/function_test_util.h | 3 +++ 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 14755e5f7f34d4..a35d5b9dc72ce9 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -530,20 +530,11 @@ class ColumnStr final : public COWHelper> { return typeid(rhs) == typeid(ColumnStr); } - Chars& get_chars() { - sanity_check_simple(); - return chars; - } + Chars& get_chars() { return chars; } const Chars& get_chars() const { return chars; } - auto& get_offsets() { - sanity_check_simple(); - return offsets; - } - const auto& get_offsets() const { - sanity_check_simple(); - return offsets; - } + auto& get_offsets() { return offsets; } + const auto& get_offsets() const { return offsets; } void clear() override { chars.clear(); diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index ae17ace911b518..99565cef247c79 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -61,6 +61,10 @@ Status VExprContext::execute(vectorized::Block* block, int* result_column_id) { RETURN_IF_CATCH_EXCEPTION({ st = _root->execute(this, block, result_column_id); _last_result_column_id = *result_column_id; + if (const auto* column_str = check_and_get_column( + block->get_by_position(*result_column_id).column.get())) { + column_str->sanity_check(); + } }); return st; } diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 937a873c607f9d..4526f8a7295d0a 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -317,6 +317,9 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty // 3. check the result of function ColumnPtr column = block.get_columns()[result]; EXPECT_TRUE(column); + if (const auto* column_str = check_and_get_column(column.get())) { + column_str->sanity_check(); + } for (int i = 0; i < row_size; ++i) { // update current line From 9237498626cf95af5ea33cfa64bb3399a359fbe9 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Wed, 5 Mar 2025 10:04:16 +0800 Subject: [PATCH 7/7] update --- be/src/vec/exprs/vexpr_context.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 99565cef247c79..569d002cb0ccd8 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -61,9 +61,11 @@ Status VExprContext::execute(vectorized::Block* block, int* result_column_id) { RETURN_IF_CATCH_EXCEPTION({ st = _root->execute(this, block, result_column_id); _last_result_column_id = *result_column_id; - if (const auto* column_str = check_and_get_column( - block->get_by_position(*result_column_id).column.get())) { - column_str->sanity_check(); + if (_last_result_column_id != -1) { + if (const auto* column_str = check_and_get_column( + block->get_by_position(*result_column_id).column.get())) { + column_str->sanity_check(); + } } }); return st;