From 04a6783c1e0449031e3b06fd6def42c05db0b407 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Mon, 8 Apr 2024 07:26:42 +0800 Subject: [PATCH] [Bug](array) fix array column core dump in get_shrinked_column as not check type (#33295) * [Bug](array) fix array column core dump in get_shrinked_column as not check type * add function could_shrinked_column --- be/src/vec/columns/column.h | 6 +++++- be/src/vec/columns/column_array.cpp | 10 +++++++++- be/src/vec/columns/column_array.h | 1 + be/src/vec/columns/column_map.cpp | 10 ++++++---- be/src/vec/columns/column_map.h | 1 + be/src/vec/columns/column_nullable.cpp | 12 ++++++++++-- be/src/vec/columns/column_nullable.h | 2 ++ be/src/vec/columns/column_string.h | 1 + be/src/vec/columns/column_struct.cpp | 13 +++++++++++-- be/src/vec/columns/column_struct.h | 1 + .../data/query_p0/test_array_orderby_limit.out | 3 +++ .../query_p0/test_array_orderby_limit.groovy | 16 ++++++++++++++++ 12 files changed, 66 insertions(+), 10 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 96df039f6539e3..836ba443f76a55 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -141,10 +141,14 @@ class IColumn : public COW { // shrink the end zeros for CHAR type or ARRAY type virtual MutablePtr get_shrinked_column() { - LOG(FATAL) << "Cannot clone_resized() column " << get_name(); + LOG(FATAL) << "Cannot get_shrinked_column() column " << get_name(); return nullptr; } + // check the column whether could shrinked + // now support only in char type, or the nested type in complex type: array{char}, struct{char}, map{char} + virtual bool could_shrinked_column() { return false; } + // Only used on ColumnDictionary virtual void set_rowset_segment_id(std::pair rowset_segment_id) {} diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index eecd6bb64b9f88..e6fecc5d9cbb80 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -118,8 +118,16 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } +bool ColumnArray::could_shrinked_column() { + return data->could_shrinked_column(); +} + MutableColumnPtr ColumnArray::get_shrinked_column() { - return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); + if (could_shrinked_column()) { + return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); + } else { + return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable()); + } } std::string ColumnArray::get_name() const { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 0dc24c2d86da5a..65455310ac71e5 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -119,6 +119,7 @@ class ColumnArray final : public COWHelper { } MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnVector; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 03a38a6abdf3f1..a93eccba6be18b 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -481,18 +481,20 @@ void ColumnMap::replicate(const uint32_t* indices, size_t target_size, IColumn& values_array->replicate(indices, target_size, result_array->assume_mutable_ref()); } +bool ColumnMap::could_shrinked_column() { + return keys_column->could_shrinked_column() || values_column->could_shrinked_column(); +} + MutableColumnPtr ColumnMap::get_shrinked_column() { MutableColumns new_columns(2); - if (keys_column->is_column_string() || keys_column->is_column_array() || - keys_column->is_column_map() || keys_column->is_column_struct()) { + if (keys_column->could_shrinked_column()) { new_columns[0] = keys_column->get_shrinked_column(); } else { new_columns[0] = keys_column->get_ptr(); } - if (values_column->is_column_string() || values_column->is_column_array() || - values_column->is_column_map() || values_column->is_column_struct()) { + if (values_column->could_shrinked_column()) { new_columns[1] = values_column->get_shrinked_column(); } else { new_columns[1] = values_column->get_ptr(); diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index fb5c6e535c3b4c..706888528ed1f8 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -114,6 +114,7 @@ class ColumnMap final : public COWHelper { void update_hash_with_value(size_t n, SipHash& hash) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 342da45118551f..47bd2d9e247317 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -60,9 +60,17 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP _need_update_has_null = true; } +bool ColumnNullable::could_shrinked_column() { + return get_nested_column_ptr()->could_shrinked_column(); +} + MutableColumnPtr ColumnNullable::get_shrinked_column() { - return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), - get_null_map_column_ptr()); + if (could_shrinked_column()) { + return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), + get_null_map_column_ptr()); + } else { + return ColumnNullable::create(get_nested_column_ptr(), get_null_map_column_ptr()); + } } void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index a2356cffb26bb3..89cd27e8bd0041 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -94,7 +94,9 @@ class ColumnNullable final : public COWHelper { } MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; bool is_variable_length() const override { return nested_column->is_variable_length(); } + const char* get_family_name() const override { return "Nullable"; } std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 03d81ae725bd62..3ab7af5882900a 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -123,6 +123,7 @@ class ColumnString final : public COWHelper { MutableColumnPtr clone_resized(size_t to_size) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override { return true; } Field operator[](size_t n) const override { assert(n < size()); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 9ddeaef9c1ae9b..7bc103a70c8e19 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -308,13 +308,22 @@ void ColumnStruct::replicate(const uint32_t* indexs, size_t target_size, IColumn } } +bool ColumnStruct::could_shrinked_column() { + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) { + if (columns[i]->could_shrinked_column()) { + return true; + } + } + return false; +} + MutableColumnPtr ColumnStruct::get_shrinked_column() { const size_t tuple_size = columns.size(); MutableColumns new_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { - if (columns[i]->is_column_string() || columns[i]->is_column_array() || - columns[i]->is_column_map() || columns[i]->is_column_struct()) { + if (columns[i]->could_shrinked_column()) { new_columns[i] = columns[i]->get_shrinked_column(); } else { new_columns[i] = columns[i]->get_ptr(); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 681dff1b70e6ed..c276bf419b038c 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -160,6 +160,7 @@ class ColumnStruct final : public COWHelper { int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; void reserve(size_t n) override; void resize(size_t n) override; diff --git a/regression-test/data/query_p0/test_array_orderby_limit.out b/regression-test/data/query_p0/test_array_orderby_limit.out index abcea7af965fd4..d06cad836aac58 100644 --- a/regression-test/data/query_p0/test_array_orderby_limit.out +++ b/regression-test/data/query_p0/test_array_orderby_limit.out @@ -2,3 +2,6 @@ -- !select -- 100 [["abc"]] +-- !select_2 -- +a {"codes": [123, 456], "props": {"key1":["char1", "char2"]}} + diff --git a/regression-test/suites/query_p0/test_array_orderby_limit.groovy b/regression-test/suites/query_p0/test_array_orderby_limit.groovy index b63d936dfc699e..800533b2e3f76c 100644 --- a/regression-test/suites/query_p0/test_array_orderby_limit.groovy +++ b/regression-test/suites/query_p0/test_array_orderby_limit.groovy @@ -45,4 +45,20 @@ suite("test_array_char_orderby", "query") { } qt_select """ select * from ${testTable} order by k1 limit 1 """ + + sql "DROP TABLE IF EXISTS unpart_tbl_parquet_struct_3;" + sql """ + CREATE TABLE unpart_tbl_parquet_struct_3 ( + `col1` CHAR, + `col20` STRUCT,props:MAP>> + )ENGINE=OLAP + DUPLICATE KEY(`col1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`col1`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ insert into unpart_tbl_parquet_struct_3 values ('a',STRUCT(ARRAY(123, 456), MAP('key1', ARRAY('char1', 'char2'))) ); """ + qt_select_2 """ select * from unpart_tbl_parquet_struct_3;""" }