diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index bd2b9df914c8b4..ce04b05d4b4078 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1498,24 +1498,38 @@ bool SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) { } } +bool SegmentIterator::_has_char_type(const Field& column_desc) { + switch (column_desc.type()) { + case FieldType::OLAP_FIELD_TYPE_CHAR: + return true; + case FieldType::OLAP_FIELD_TYPE_ARRAY: + return _has_char_type(*column_desc.get_sub_field(0)); + case FieldType::OLAP_FIELD_TYPE_MAP: + return _has_char_type(*column_desc.get_sub_field(0)) || + _has_char_type(*column_desc.get_sub_field(1)); + case FieldType::OLAP_FIELD_TYPE_STRUCT: + for (int idx = 0; idx < column_desc.get_sub_field_count(); ++idx) { + if (_has_char_type(*column_desc.get_sub_field(idx))) { + return true; + } + } + return false; + default: + return false; + } +}; + void SegmentIterator::_vec_init_char_column_id() { for (size_t i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); - auto column_desc = _schema->column(cid); + const Field* column_desc = _schema->column(cid); - do { - if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { - _char_type_idx.emplace_back(i); - if (i != 0) { - _char_type_idx_no_0.emplace_back(i); - } - break; - } else if (column_desc->type() != FieldType::OLAP_FIELD_TYPE_ARRAY) { - break; + if (_has_char_type(*column_desc)) { + _char_type_idx.emplace_back(i); + if (i != 0) { + _char_type_idx_no_0.emplace_back(i); } - // for Array or Array> - column_desc = column_desc->get_sub_field(0); - } while (column_desc != nullptr); + } } } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index d53ad9d62b0571..33d3a3f5f9c9c9 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -201,6 +201,7 @@ class SegmentIterator : public RowwiseIterator { // CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0. // so segment iterator need to shrink char column before output it. only use in vec query engine. void _vec_init_char_column_id(); + bool _has_char_type(const Field& column_desc); uint32_t segment_id() const { return _segment->id(); } uint32_t num_rows() const { return _segment->num_rows(); } diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index df4c66fa14c561..139a23f793595a 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -670,6 +670,8 @@ class IColumn : public COW { virtual bool is_column_map() const { return false; } + virtual bool is_column_struct() const { return false; } + /// If the only value column can contain is NULL. virtual bool only_null() const { return false; } diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index f10890b009364d..9a550097af9369 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -442,6 +442,27 @@ void ColumnMap::replicate(const uint32_t* indexs, size_t target_size, IColumn& c ->replicate(indexs, target_size, res.values_column->assume_mutable_ref()); } +MutableColumnPtr ColumnMap::get_shrinked_column() { + MutableColumns new_columns(2); + + if (keys_column->is_column_string() || keys_column->is_column_array() || + keys_column->is_column_map() || keys_column->is_column_struct()) { + new_columns[0] = keys_column->get_shrinked_column(); + } else { + new_columns[0] = keys_column->get_ptr(); + } + + if (values_column->is_column_string() || values_column->is_column_array() || + values_column->is_column_map() || values_column->is_column_struct()) { + new_columns[1] = values_column->get_shrinked_column(); + } else { + new_columns[1] = values_column->get_ptr(); + } + + return ColumnMap::create(new_columns[0]->assume_mutable(), new_columns[1]->assume_mutable(), + offsets_column->assume_mutable()); +} + void ColumnMap::reserve(size_t n) { get_offsets().reserve(n); keys_column->reserve(n); diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 0c09f0a9dae38a..f8bc8a4812d3c2 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -36,6 +36,8 @@ #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_impl.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/common/assert_cast.h" #include "vec/common/cow.h" @@ -110,7 +112,7 @@ class ColumnMap final : public COWHelper { const char* deserialize_and_insert_from_arena(const char* pos) override; void update_hash_with_value(size_t n, SipHash& hash) const override; - + MutableColumnPtr get_shrinked_column() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 465e7dc2d96a75..4350bead62d131 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -262,6 +262,8 @@ class ColumnNullable final : public COWHelper { bool is_column_decimal() const override { return get_nested_column().is_column_decimal(); } bool is_column_string() const override { return get_nested_column().is_column_string(); } bool is_column_array() const override { return get_nested_column().is_column_array(); } + bool is_column_map() const override { return get_nested_column().is_column_map(); } + bool is_column_struct() const override { return get_nested_column().is_column_struct(); } bool is_fixed_and_contiguous() const override { return false; } bool values_have_fixed_size() const override { return nested_column->values_have_fixed_size(); } diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 883913703980db..a2d1eae5709a7b 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -293,6 +293,21 @@ void ColumnStruct::replicate(const uint32_t* indexs, size_t target_size, IColumn } } +MutableColumnPtr ColumnStruct::get_shrinked_column() { + const size_t tuple_size = columns.size(); + MutableColumns new_columns(tuple_size); + + for (size_t i = 0; i < tuple_size; ++i) { + if (columns[i]->is_column_string() || columns[i]->is_column_array() || + columns[i]->is_column_map() || columns[i]->is_column_struct()) { + new_columns[i] = columns[i]->get_shrinked_column(); + } else { + new_columns[i] = columns[i]->get_ptr(); + } + } + return ColumnStruct::create(std::move(new_columns)); +} + MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector& selector) const { const size_t tuple_size = columns.size(); std::vector scattered_tuple_elements(tuple_size); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 9d219c3c989c11..db9633f6032913 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -82,6 +82,7 @@ class ColumnStruct final : public COWHelper { } std::string get_name() const override; + bool is_column_struct() const override { return true; } const char* get_family_name() const override { return "Struct"; } bool can_be_inside_nullable() const override { return true; } MutableColumnPtr clone_empty() const override; @@ -160,6 +161,9 @@ class ColumnStruct final : public COWHelper { LOG(FATAL) << "compare_at not implemented"; } void get_extremes(Field& min, Field& max) const override; + + MutableColumnPtr get_shrinked_column() override; + void reserve(size_t n) override; void resize(size_t n) override; size_t byte_size() const override; diff --git a/regression-test/data/export/test_struct_export.out b/regression-test/data/export/test_struct_export.out index 0dc07fa7cb66ab..21141b417d302c 100644 Binary files a/regression-test/data/export/test_struct_export.out and b/regression-test/data/export/test_struct_export.out differ diff --git a/regression-test/data/insert_p0/test_struct_insert.out b/regression-test/data/insert_p0/test_struct_insert.out index 2c202a651e7171..7bd960b0e3dc30 100644 Binary files a/regression-test/data/insert_p0/test_struct_insert.out and b/regression-test/data/insert_p0/test_struct_insert.out differ diff --git a/regression-test/data/load_p0/stream_load/map_char_test.csv b/regression-test/data/load_p0/stream_load/map_char_test.csv new file mode 100644 index 00000000000000..4b545d1ea36b75 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/map_char_test.csv @@ -0,0 +1,4 @@ +1 {1:"1", 2:"22", 3:"333", 4:"4444", 5:"55555", 6:"666666", 7:"7777777"} +3 {1:"11", 2:"22", 3:"33", 4:"44", 5:"55", 6:"66", 7:"77"} +2 {1:"1", 2:"2", 3:"3", 4:"4", 5:"5", 6:"6", 7:"7"} +4 {1:"111", 2:"22", 3:"333", 4:"444", 5:"55", 6:"66", 7:"777"} diff --git a/regression-test/data/load_p0/stream_load/test_stream_load.out b/regression-test/data/load_p0/stream_load/test_stream_load.out index 8816fce94b80ec..505b3110cf2253 100644 --- a/regression-test/data/load_p0/stream_load/test_stream_load.out +++ b/regression-test/data/load_p0/stream_load/test_stream_load.out @@ -6,6 +6,12 @@ -- !sql1 -- 2019 9 9 9 7.700 a 2019-09-09 1970-01-01T08:33:39 k7 9.0 9.0 +-- !map11 -- +1 {1:"1", 2:"22", 3:"333", 4:"4444", 5:"55555", 6:"666666", 7:"7777777"} +2 {1:"1", 2:"2", 3:"3", 4:"4", 5:"5", 6:"6", 7:"7"} +3 {1:"11", 2:"22", 3:"33", 4:"44", 5:"55", 6:"66", 7:"77"} +4 {1:"111", 2:"22", 3:"333", 4:"444", 5:"55", 6:"66", 7:"777"} + -- !all11 -- 2500 diff --git a/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out b/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out index 32cdb5a79e9f91..d37e5e62bb036d 100644 Binary files a/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out and b/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out differ diff --git a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out index ffce5c3e1968eb..fb744aec56d32d 100644 Binary files a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out and b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out differ diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy index d9cf9c72938394..0884330f2d3b50 100644 --- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy @@ -191,6 +191,8 @@ suite("test_stream_load", "p0") { def tableName7 = "test_unique_key_with_delete" def tableName8 = "test_array" def tableName10 = "test_struct" + def tableName11 = "test_map" + sql """ DROP TABLE IF EXISTS ${tableName3} """ sql """ DROP TABLE IF EXISTS ${tableName4} """ sql """ DROP TABLE IF EXISTS ${tableName5} """ @@ -198,6 +200,7 @@ suite("test_stream_load", "p0") { sql """ DROP TABLE IF EXISTS ${tableName7} """ sql """ DROP TABLE IF EXISTS ${tableName8} """ sql """ DROP TABLE IF EXISTS ${tableName10} """ + sql """ DROP TABLE IF EXISTS ${tableName11} """ sql """ CREATE TABLE IF NOT EXISTS ${tableName3} ( `k1` int(11) NULL, @@ -281,7 +284,7 @@ suite("test_stream_load", "p0") { `k4` ARRAY NULL COMMENT "", `k5` ARRAY NULL COMMENT "", `k6` ARRAY NULL COMMENT "", - `k7` ARRAY NULL COMMENT "", + `k7` ARRAY NULL COMMENT "", `k8` ARRAY NULL COMMENT "", `k9` ARRAY NULL COMMENT "", `k10` ARRAY NULL COMMENT "", @@ -316,6 +319,41 @@ suite("test_stream_load", "p0") { ); """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName11} ( + `k1` int(11) NULL, + `k2` map NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + // load map with specific-length char with non-specific-length data + streamLoad { + table "${tableName11}" + + set 'column_separator', '\t' + + file 'map_char_test.csv' + time 10000 // limit inflight 10s + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(4, json.NumberTotalRows) + assertEquals(0, json.NumberFilteredRows) + } + } + sql "sync" + order_qt_map11 "SELECT * FROM ${tableName11} order by k1" + // load all columns streamLoad { table "${tableName3}"