From a71b0bb270d0764b27954817c14cc21917a3932a Mon Sep 17 00:00:00 2001 From: amorynan Date: Mon, 25 Sep 2023 18:33:16 +0800 Subject: [PATCH 1/2] fix agg table with complex type --- be/src/vec/columns/column.h | 3 ++ be/src/vec/columns/column_array.h | 26 +++++++++++++++-- be/src/vec/columns/column_complex.h | 8 +++++ be/src/vec/columns/column_const.h | 8 +++++ be/src/vec/columns/column_decimal.h | 8 ++++- be/src/vec/columns/column_dictionary.h | 5 ++++ be/src/vec/columns/column_dummy.h | 5 ++++ .../vec/columns/column_fixed_length_object.h | 8 +++++ be/src/vec/columns/column_map.h | 29 +++++++++++++++++-- be/src/vec/columns/column_nullable.h | 19 ++++++++++++ be/src/vec/columns/column_object.h | 5 ++++ be/src/vec/columns/column_string.h | 7 +++++ be/src/vec/columns/column_struct.h | 27 +++++++++++++++-- be/src/vec/columns/column_vector.h | 9 ++++++ be/src/vec/columns/predicate_column.h | 5 ++++ ...ested_types_insert_into_with_agg_table.out | 15 ++++++++++ ...ed_types_insert_into_with_agg_table.groovy | 5 ++++ 17 files changed, 182 insertions(+), 10 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index 9a4793a76fccf6..c52c9d4eeaccce 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -693,6 +693,9 @@ class IColumn : public COW { // only used in agg value replace // ColumnString should replace according to 0,1,2... ,size,0,1,2... virtual void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) = 0; + // replace column data in batch to avoid virtual call in nested type like array/map + virtual void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) = 0; // only used in ColumnNullable replace_column_data virtual void replace_column_data_default(size_t self_row = 0) = 0; diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index b42800d5b2696a..48b93dd3ec79a0 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -227,12 +227,32 @@ class ColumnArray final : public COWHelper { void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override; - void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data not implemented"; + void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { + DCHECK(size() > self_row); + const auto& r = assert_cast(rhs); + const size_t nested_row_size = r.size_at(row); + const size_t r_nested_start_off = r.offset_at(row); + const size_t l_nested_start_off = offset_at(self_row); + + get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; + // here we use batch size to avoid many virtual call in nested column + data->replace_batch_column_data(r.get_data(), nested_row_size, r_nested_start_off, + l_nested_start_off); } + + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + replace_column_data(rhs, row + start_idx, self_row + start_idx); + } + } + void replace_column_data_default(size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data_default not implemented"; + DCHECK(size() > self_row); + get_offsets()[self_row] = get_offsets()[self_row - 1]; } + void clear() override { data->clear(); offsets->clear(); diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index e63e206ac9f2c7..3fbf766d5e8deb 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -289,6 +289,14 @@ class ColumnComplexType final : public COWHelper> data[self_row] = assert_cast(rhs).data[row]; } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { + data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; + } + } + void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index e06902972abcac..136e72f539792c 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -264,6 +264,14 @@ class ColumnConst final : public COWHelper { data->replace_column_data(rhs, row, self_row); } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + replace_column_data(rhs, row + start_idx, self_row + start_idx); + } + } + void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); LOG(FATAL) << "should not call the method in column const"; diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 87a8cddc99e9cd..315d3c4ed2a1ce 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -261,7 +261,13 @@ class ColumnDecimal final : public COWHelper self_row); data[self_row] = assert_cast(rhs).data[row]; } - + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { + data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; + } + } void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index b6db5af7e9edab..3d0a7af8e479f5 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -225,6 +225,11 @@ class ColumnDictionary final : public COWHelper> { LOG(FATAL) << "should not call replace_column_data in ColumnDictionary"; } + void replace_batch_column_data(const IColumn&, size_t num_rows, size_t row, + size_t self_row = 0) override { + LOG(FATAL) << "should not call replace_column_data in ColumnDictionary"; + } + void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call replace_column_data_default in ColumnDictionary"; } diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index f6b36ae80aa4f9..8eb7f71263f30c 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -155,6 +155,11 @@ class IColumnDummy : public IColumn { LOG(FATAL) << "should not call the method in column dummy"; } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + LOG(FATAL) << "should not call the method in column dummy"; + } + void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call the method in column dummy"; } diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index 4f83e4308c8465..851e4f3e444435 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -270,6 +270,14 @@ class ColumnFixedLengthObject final : public COWHelper self_row + num_rows); + for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { + replace_column_data(rhs, row + start_idx, self_row + start_idx); + } + } + void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "replace_column_data_default not supported"; } diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 9d4f1f927022de..fc2340b57e3803 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -140,11 +140,34 @@ class ColumnMap final : public COWHelper { return append_data_by_selector_impl(res, selector); } - void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data not implemented"; + void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { + DCHECK(size() > self_row); + const auto& r = assert_cast(rhs); + const size_t nested_row_size = r.size_at(row); + const size_t r_key_nested_start_off = r.offset_at(row); + const size_t r_val_nested_start_off = r.offset_at(row); + const size_t l_key_nested_start_off = r.offset_at(self_row); + const size_t l_val_nested_start_off = r.offset_at(self_row); + + get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; + // here we use batch size to avoid many virtual call in nested column + keys_column->replace_batch_column_data(r.get_keys(), nested_row_size, + r_key_nested_start_off, l_key_nested_start_off); + values_column->replace_batch_column_data(r.get_values(), nested_row_size, + r_val_nested_start_off, l_val_nested_start_off); } + + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + replace_column_data(rhs, row + start_idx, self_row + start_idx); + } + } + void replace_column_data_default(size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data_default not implemented"; + DCHECK(size() > self_row); + get_offsets()[self_row] = get_offsets()[self_row - 1]; } ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() { diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index dab4c66d63a76d..5f52c4359ffdc8 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -357,6 +357,25 @@ class ColumnNullable final : public COWHelper { } } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + const ColumnNullable& nullable_rhs = assert_cast(rhs); + + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + size_t cur_row = row + start_idx; + size_t cur_self_row = self_row + start_idx; + null_map->replace_column_data(*nullable_rhs.null_map, cur_row, cur_self_row); + + if (!nullable_rhs.is_null_at(cur_row)) { + nested_column->replace_column_data(*nullable_rhs.nested_column, cur_row, + cur_self_row); + } else { + nested_column->replace_column_data_default(cur_self_row); + } + } + } + void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call the method in column nullable"; } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 4febce3b52d30c..b69e1cb8b01d45 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -367,6 +367,11 @@ class ColumnObject final : public COWHelper { LOG(FATAL) << "should not call the method in column object"; } + void replace_batch_column_data(const IColumn& rhs, size_t rows_num, size_t row, + size_t self_row = 0) override { + LOG(FATAL) << "should not call the method in column object"; + } + void replace_column_data_default(size_t self_row) override { LOG(FATAL) << "should not call the method in column object"; } diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 96d4a4f834c215..ffb59183825056 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -562,6 +562,13 @@ class ColumnString final : public COWHelper { chars.insert(data.data, data.data + data.size); } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + replace_column_data(rhs, row + start_idx, self_row + start_idx); + } + } + // should replace according to 0,1,2... ,size,0,1,2... void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 700b5801c37936..bf6a5c7d52abeb 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -34,6 +34,7 @@ #include "common/status.h" #include "vec/columns/column.h" #include "vec/columns/column_impl.h" +#include "vec/common/assert_cast.h" #include "vec/common/cow.h" #include "vec/common/sip_hash.h" #include "vec/common/string_ref.h" @@ -130,11 +131,31 @@ class ColumnStruct final : public COWHelper { void append_data_by_selector(MutableColumnPtr& res, const Selector& selector) const override { return append_data_by_selector_impl(res, selector); } - void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data not implemented"; + void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { + DCHECK(size() > self_row); + const auto& r = assert_cast(rhs); + + for (size_t idx = 0; idx < columns.size(); ++idx) { + columns[idx]->replace_column_data(r.get_column(idx), row, self_row); + } } + + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + const auto& r = assert_cast(rhs); + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + for (size_t idx = 0; idx < columns.size(); ++idx) { + columns[idx]->replace_column_data(r.get_column(idx), row, self_row); + } + } + } + void replace_column_data_default(size_t self_row = 0) override { - LOG(FATAL) << "replace_column_data_default not implemented"; + DCHECK(size() > self_row); + for (size_t idx = 0; idx < columns.size(); ++idx) { + columns[idx]->replace_column_data_default(self_row); + } } void insert_range_from(const IColumn& src, size_t start, size_t length) override; diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 04908d8711612b..087962972bc8ab 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -465,6 +465,15 @@ class ColumnVector final : public COWHelper> data[self_row] = assert_cast(rhs).data[row]; } + void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, + size_t self_row = 0) override { + DCHECK(size() > self_row + num_rows); + + for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { + data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; + } + } + void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index 21e503817cf3c3..47e6e83bf5af1c 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -502,6 +502,11 @@ class PredicateColumnType final : public COWHelper Date: Thu, 28 Sep 2023 09:18:03 +0800 Subject: [PATCH 2/2] fix array/map agg --- be/src/vec/columns/column.h | 3 --- be/src/vec/columns/column_array.h | 18 +++++---------- be/src/vec/columns/column_complex.h | 8 ------- be/src/vec/columns/column_const.h | 8 ------- be/src/vec/columns/column_decimal.h | 8 +------ be/src/vec/columns/column_dictionary.h | 5 ----- be/src/vec/columns/column_dummy.h | 5 ----- .../vec/columns/column_fixed_length_object.h | 8 ------- be/src/vec/columns/column_map.h | 20 +++++------------ be/src/vec/columns/column_nullable.h | 19 ---------------- be/src/vec/columns/column_object.h | 5 ----- be/src/vec/columns/column_string.h | 7 ------ be/src/vec/columns/column_struct.h | 11 ---------- be/src/vec/columns/column_vector.h | 9 -------- be/src/vec/columns/predicate_column.h | 5 ----- be/src/vec/olap/block_reader.cpp | 22 ++++++++++++++----- be/src/vec/olap/block_reader.h | 2 +- 17 files changed, 30 insertions(+), 133 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index c52c9d4eeaccce..9a4793a76fccf6 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -693,9 +693,6 @@ class IColumn : public COW { // only used in agg value replace // ColumnString should replace according to 0,1,2... ,size,0,1,2... virtual void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) = 0; - // replace column data in batch to avoid virtual call in nested type like array/map - virtual void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) = 0; // only used in ColumnNullable replace_column_data virtual void replace_column_data_default(size_t self_row = 0) = 0; diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 48b93dd3ec79a0..bf89d74ba5d6a1 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -232,20 +232,14 @@ class ColumnArray final : public COWHelper { const auto& r = assert_cast(rhs); const size_t nested_row_size = r.size_at(row); const size_t r_nested_start_off = r.offset_at(row); - const size_t l_nested_start_off = offset_at(self_row); - get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; - // here we use batch size to avoid many virtual call in nested column - data->replace_batch_column_data(r.get_data(), nested_row_size, r_nested_start_off, - l_nested_start_off); - } - - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - replace_column_data(rhs, row + start_idx, self_row + start_idx); + // we should clear data because we call resize() before replace_column_data() + if (self_row == 0) { + data->clear(); } + get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; + // we make sure call replace_column_data() by order so, here we just insert data for nested + data->insert_range_from(r.get_data(), r_nested_start_off, nested_row_size); } void replace_column_data_default(size_t self_row = 0) override { diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 3fbf766d5e8deb..e63e206ac9f2c7 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -289,14 +289,6 @@ class ColumnComplexType final : public COWHelper> data[self_row] = assert_cast(rhs).data[row]; } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { - data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; - } - } - void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 136e72f539792c..e06902972abcac 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -264,14 +264,6 @@ class ColumnConst final : public COWHelper { data->replace_column_data(rhs, row, self_row); } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - replace_column_data(rhs, row + start_idx, self_row + start_idx); - } - } - void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); LOG(FATAL) << "should not call the method in column const"; diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 315d3c4ed2a1ce..87a8cddc99e9cd 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -261,13 +261,7 @@ class ColumnDecimal final : public COWHelper self_row); data[self_row] = assert_cast(rhs).data[row]; } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { - data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; - } - } + void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 3d0a7af8e479f5..b6db5af7e9edab 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -225,11 +225,6 @@ class ColumnDictionary final : public COWHelper> { LOG(FATAL) << "should not call replace_column_data in ColumnDictionary"; } - void replace_batch_column_data(const IColumn&, size_t num_rows, size_t row, - size_t self_row = 0) override { - LOG(FATAL) << "should not call replace_column_data in ColumnDictionary"; - } - void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call replace_column_data_default in ColumnDictionary"; } diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index 8eb7f71263f30c..f6b36ae80aa4f9 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -155,11 +155,6 @@ class IColumnDummy : public IColumn { LOG(FATAL) << "should not call the method in column dummy"; } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - LOG(FATAL) << "should not call the method in column dummy"; - } - void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call the method in column dummy"; } diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index 851e4f3e444435..4f83e4308c8465 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -270,14 +270,6 @@ class ColumnFixedLengthObject final : public COWHelper self_row + num_rows); - for (size_t start_idx = 0; start_idx < num_rows; ++start_idx) { - replace_column_data(rhs, row + start_idx, self_row + start_idx); - } - } - void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "replace_column_data_default not supported"; } diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index fc2340b57e3803..5dc1c22aefcacf 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -146,23 +146,15 @@ class ColumnMap final : public COWHelper { const size_t nested_row_size = r.size_at(row); const size_t r_key_nested_start_off = r.offset_at(row); const size_t r_val_nested_start_off = r.offset_at(row); - const size_t l_key_nested_start_off = r.offset_at(self_row); - const size_t l_val_nested_start_off = r.offset_at(self_row); + if (self_row == 0) { + keys_column->clear(); + values_column->clear(); + } get_offsets()[self_row] = get_offsets()[self_row - 1] + nested_row_size; // here we use batch size to avoid many virtual call in nested column - keys_column->replace_batch_column_data(r.get_keys(), nested_row_size, - r_key_nested_start_off, l_key_nested_start_off); - values_column->replace_batch_column_data(r.get_values(), nested_row_size, - r_val_nested_start_off, l_val_nested_start_off); - } - - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - replace_column_data(rhs, row + start_idx, self_row + start_idx); - } + keys_column->insert_range_from(r.get_keys(), r_key_nested_start_off, nested_row_size); + values_column->insert_range_from(r.get_values(), r_val_nested_start_off, nested_row_size); } void replace_column_data_default(size_t self_row = 0) override { diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 5f52c4359ffdc8..dab4c66d63a76d 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -357,25 +357,6 @@ class ColumnNullable final : public COWHelper { } } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - const ColumnNullable& nullable_rhs = assert_cast(rhs); - - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - size_t cur_row = row + start_idx; - size_t cur_self_row = self_row + start_idx; - null_map->replace_column_data(*nullable_rhs.null_map, cur_row, cur_self_row); - - if (!nullable_rhs.is_null_at(cur_row)) { - nested_column->replace_column_data(*nullable_rhs.nested_column, cur_row, - cur_self_row); - } else { - nested_column->replace_column_data_default(cur_self_row); - } - } - } - void replace_column_data_default(size_t self_row = 0) override { LOG(FATAL) << "should not call the method in column nullable"; } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index b69e1cb8b01d45..4febce3b52d30c 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -367,11 +367,6 @@ class ColumnObject final : public COWHelper { LOG(FATAL) << "should not call the method in column object"; } - void replace_batch_column_data(const IColumn& rhs, size_t rows_num, size_t row, - size_t self_row = 0) override { - LOG(FATAL) << "should not call the method in column object"; - } - void replace_column_data_default(size_t self_row) override { LOG(FATAL) << "should not call the method in column object"; } diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index ffb59183825056..96d4a4f834c215 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -562,13 +562,6 @@ class ColumnString final : public COWHelper { chars.insert(data.data, data.data + data.size); } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - replace_column_data(rhs, row + start_idx, self_row + start_idx); - } - } - // should replace according to 0,1,2... ,size,0,1,2... void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index bf6a5c7d52abeb..820c151a941c56 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -140,17 +140,6 @@ class ColumnStruct final : public COWHelper { } } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - const auto& r = assert_cast(rhs); - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - for (size_t idx = 0; idx < columns.size(); ++idx) { - columns[idx]->replace_column_data(r.get_column(idx), row, self_row); - } - } - } - void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); for (size_t idx = 0; idx < columns.size(); ++idx) { diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 087962972bc8ab..04908d8711612b 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -465,15 +465,6 @@ class ColumnVector final : public COWHelper> data[self_row] = assert_cast(rhs).data[row]; } - void replace_batch_column_data(const IColumn& rhs, size_t num_rows, size_t row, - size_t self_row = 0) override { - DCHECK(size() > self_row + num_rows); - - for (auto start_idx = 0; start_idx < num_rows; ++start_idx) { - data[self_row + start_idx] = assert_cast(rhs).data[row + start_idx]; - } - } - void replace_column_data_default(size_t self_row = 0) override { DCHECK(size() > self_row); data[self_row] = T(); diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index 47e6e83bf5af1c..21e503817cf3c3 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -502,11 +502,6 @@ class PredicateColumnType final : public COWHelpercreate_same_struct_block(_reader_context.batch_size)->mutate_columns(); _stored_has_null_tag.resize(_stored_data_columns.size()); - _stored_has_string_tag.resize(_stored_data_columns.size()); + _stored_has_variable_length_tag.resize(_stored_data_columns.size()); auto& tablet_schema = *_tablet_schema; for (auto idx : _agg_columns_idx) { @@ -182,13 +182,23 @@ void BlockReader::_init_agg_state(const ReaderParams& read_params) { }); _agg_places.push_back(place); - // calculate `has_string` tag. - _stored_has_string_tag[idx] = + // calculate `_has_variable_length_tag` tag. like string, array, map + _stored_has_variable_length_tag[idx] = _stored_data_columns[idx]->is_column_string() || (_stored_data_columns[idx]->is_nullable() && reinterpret_cast(_stored_data_columns[idx].get()) ->get_nested_column_ptr() - ->is_column_string()); + ->is_column_string()) || + _stored_data_columns[idx]->is_column_array() || + (_stored_data_columns[idx]->is_nullable() && + reinterpret_cast(_stored_data_columns[idx].get()) + ->get_nested_column_ptr() + ->is_column_array()) || + _stored_data_columns[idx]->is_column_map() || + (_stored_data_columns[idx]->is_nullable() && + reinterpret_cast(_stored_data_columns[idx].get()) + ->get_nested_column_ptr() + ->is_column_map()); } } @@ -461,8 +471,8 @@ size_t BlockReader::_copy_agg_data() { for (auto idx : _agg_columns_idx) { auto& dst_column = _stored_data_columns[idx]; - if (_stored_has_string_tag[idx]) { - //string type should replace ordered + if (_stored_has_variable_length_tag[idx]) { + //variable length type should replace ordered for (size_t i = 0; i < copy_size; i++) { auto& ref = _stored_row_ref[i]; dst_column->replace_column_data(*ref.block->get_by_position(idx).column, diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h index b573e1066945b3..273c21899d4de8 100644 --- a/be/src/vec/olap/block_reader.h +++ b/be/src/vec/olap/block_reader.h @@ -111,7 +111,7 @@ class BlockReader final : public TabletReader { std::vector _stored_row_ref; std::vector _stored_has_null_tag; - std::vector _stored_has_string_tag; + std::vector _stored_has_variable_length_tag; phmap::flat_hash_map>> _temp_ref_map;