From 9d603b0a6c4c721ffc476c77e79a3e8dfb5652bb Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Sun, 7 Apr 2024 10:51:26 +0800 Subject: [PATCH 1/2] [Bug](array) fix array column core dump in get_shrinked_column as not check type --- be/src/vec/columns/column_array.cpp | 7 ++++++- .../data/query_p0/test_array_orderby_limit.out | 3 +++ .../query_p0/test_array_orderby_limit.groovy | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 86d31c9223baf0..7d18d19d478c30 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -119,7 +119,12 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest } MutableColumnPtr ColumnArray::get_shrinked_column() { - return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); + if (data->is_column_string() || data->is_column_array() || data->is_column_map() || + data->is_column_struct()) { + return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); + } else { + return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable()); + } } std::string ColumnArray::get_name() const { diff --git a/regression-test/data/query_p0/test_array_orderby_limit.out b/regression-test/data/query_p0/test_array_orderby_limit.out index abcea7af965fd4..d06cad836aac58 100644 --- a/regression-test/data/query_p0/test_array_orderby_limit.out +++ b/regression-test/data/query_p0/test_array_orderby_limit.out @@ -2,3 +2,6 @@ -- !select -- 100 [["abc"]] +-- !select_2 -- +a {"codes": [123, 456], "props": {"key1":["char1", "char2"]}} + diff --git a/regression-test/suites/query_p0/test_array_orderby_limit.groovy b/regression-test/suites/query_p0/test_array_orderby_limit.groovy index c573b1c7986a72..2fa3bb9c5ed838 100644 --- a/regression-test/suites/query_p0/test_array_orderby_limit.groovy +++ b/regression-test/suites/query_p0/test_array_orderby_limit.groovy @@ -45,4 +45,20 @@ suite("test_array_char_orderby", "query") { } qt_select """ select * from ${testTable} order by k1 limit 1 """ + + sql "DROP TABLE IF EXISTS unpart_tbl_parquet_struct_3;" + sql """ + CREATE TABLE unpart_tbl_parquet_struct_3 ( + `col1` CHAR, + `col20` STRUCT,props:MAP>> + )ENGINE=OLAP + DUPLICATE KEY(`col1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`col1`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ insert into unpart_tbl_parquet_struct_3 values ('a',STRUCT(ARRAY(123, 456), MAP('key1', ARRAY('char1', 'char2'))) ); """ + qt_select_2 """ select * from unpart_tbl_parquet_struct_3;""" } From 1e47abeff4354ba5315f64b65789e8cfb697a2ad Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Sun, 7 Apr 2024 13:35:32 +0800 Subject: [PATCH 2/2] add function could_shrinked_column --- be/src/vec/columns/column.h | 6 +++++- be/src/vec/columns/column_array.cpp | 7 +++++-- be/src/vec/columns/column_array.h | 1 + be/src/vec/columns/column_map.cpp | 10 ++++++---- be/src/vec/columns/column_map.h | 1 + be/src/vec/columns/column_nullable.cpp | 12 ++++++++++-- be/src/vec/columns/column_nullable.h | 1 + be/src/vec/columns/column_string.h | 1 + be/src/vec/columns/column_struct.cpp | 13 +++++++++++-- be/src/vec/columns/column_struct.h | 1 + 10 files changed, 42 insertions(+), 11 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index ed62354b1bb16a..344d75355bd372 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -127,10 +127,14 @@ class IColumn : public COW { // shrink the end zeros for CHAR type or ARRAY type virtual MutablePtr get_shrinked_column() { - LOG(FATAL) << "Cannot clone_resized() column " << get_name(); + LOG(FATAL) << "Cannot get_shrinked_column() column " << get_name(); return nullptr; } + // check the column whether could shrinked + // now support only in char type, or the nested type in complex type: array{char}, struct{char}, map{char} + virtual bool could_shrinked_column() { return false; } + /// Some columns may require finalization before using of other operations. virtual void finalize() {} diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 7d18d19d478c30..7251f1e17988a1 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -118,9 +118,12 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } +bool ColumnArray::could_shrinked_column() { + return data->could_shrinked_column(); +} + MutableColumnPtr ColumnArray::get_shrinked_column() { - if (data->is_column_string() || data->is_column_array() || data->is_column_map() || - data->is_column_struct()) { + if (could_shrinked_column()) { return ColumnArray::create(data->get_shrinked_column(), offsets->assume_mutable()); } else { return ColumnArray::create(data->assume_mutable(), offsets->assume_mutable()); diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 24044fc8bce9d6..ec75319753a535 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -119,6 +119,7 @@ class ColumnArray final : public COWHelper { } MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnVector; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index c1c668ef07cd8b..bec58bfda7b887 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -455,18 +455,20 @@ ColumnPtr ColumnMap::replicate(const Offsets& offsets) const { return res; } +bool ColumnMap::could_shrinked_column() { + return keys_column->could_shrinked_column() || values_column->could_shrinked_column(); +} + MutableColumnPtr ColumnMap::get_shrinked_column() { MutableColumns new_columns(2); - if (keys_column->is_column_string() || keys_column->is_column_array() || - keys_column->is_column_map() || keys_column->is_column_struct()) { + if (keys_column->could_shrinked_column()) { new_columns[0] = keys_column->get_shrinked_column(); } else { new_columns[0] = keys_column->get_ptr(); } - if (values_column->is_column_string() || values_column->is_column_array() || - values_column->is_column_map() || values_column->is_column_struct()) { + if (values_column->could_shrinked_column()) { new_columns[1] = values_column->get_shrinked_column(); } else { new_columns[1] = values_column->get_ptr(); diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 9926a62e54ddff..b2d93f0b07594c 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -112,6 +112,7 @@ class ColumnMap final : public COWHelper { void update_hash_with_value(size_t n, SipHash& hash) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index e20b87af826aba..dc8853e49af1ee 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -48,9 +48,17 @@ ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnP _need_update_has_null = true; } +bool ColumnNullable::could_shrinked_column() { + return get_nested_column_ptr()->could_shrinked_column(); +} + MutableColumnPtr ColumnNullable::get_shrinked_column() { - return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), - get_null_map_column_ptr()); + if (could_shrinked_column()) { + return ColumnNullable::create(get_nested_column_ptr()->get_shrinked_column(), + get_null_map_column_ptr()); + } else { + return ColumnNullable::create(get_nested_column_ptr(), get_null_map_column_ptr()); + } } void ColumnNullable::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 8e48bf1fc3f77e..3ec396a4aff0c3 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -82,6 +82,7 @@ class ColumnNullable final : public COWHelper { } MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; const char* get_family_name() const override { return "Nullable"; } std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 062812315eed4b..41d1d6d6333f78 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -120,6 +120,7 @@ class ColumnString final : public COWHelper { MutableColumnPtr clone_resized(size_t to_size) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override { return true; } Field operator[](size_t n) const override { assert(n < size()); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index aea9db2be50dd8..d075b040e6d4d3 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -295,13 +295,22 @@ ColumnPtr ColumnStruct::replicate(const Offsets& offsets) const { return ColumnStruct::create(new_columns); } +bool ColumnStruct::could_shrinked_column() { + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) { + if (columns[i]->could_shrinked_column()) { + return true; + } + } + return false; +} + MutableColumnPtr ColumnStruct::get_shrinked_column() { const size_t tuple_size = columns.size(); MutableColumns new_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { - if (columns[i]->is_column_string() || columns[i]->is_column_array() || - columns[i]->is_column_map() || columns[i]->is_column_struct()) { + if (columns[i]->could_shrinked_column()) { new_columns[i] = columns[i]->get_shrinked_column(); } else { new_columns[i] = columns[i]->get_ptr(); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index d064c447dae067..34d1e3ecf0e3a4 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -156,6 +156,7 @@ class ColumnStruct final : public COWHelper { int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; MutableColumnPtr get_shrinked_column() override; + bool could_shrinked_column() override; void reserve(size_t n) override; void resize(size_t n) override;