From 45c6d9ca5229896003b22b147815c560008f2184 Mon Sep 17 00:00:00 2001 From: xy720 Date: Tue, 21 Feb 2023 18:49:59 +0800 Subject: [PATCH 1/5] save --- be/src/olap/rowset/segment_v2/column_writer.cpp | 5 ----- be/src/olap/schema.cpp | 3 ++- be/src/vec/columns/column_map.cpp | 12 ++++++++++++ be/src/vec/columns/column_map.h | 3 +-- be/src/vec/columns/column_struct.cpp | 10 ++++++++++ be/src/vec/columns/column_struct.h | 5 ++--- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index a35c840eb5bf1b..f4ade0b3472abf 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -737,11 +737,6 @@ Status StructColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { reinterpret_cast(data), num_rows)); } - if (is_nullable()) { - std::vector null_signs(num_rows, 0); - const uint8_t* null_sign_ptr = null_signs.data(); - RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, num_rows)); - } return Status::OK(); } diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index cb4b9036aab3bf..8bc18fbe1847a3 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -118,7 +118,8 @@ vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) } vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field) { - if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY)) { + if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY || field.type() == OLAP_FIELD_TYPE_STRUCT || + field.type() == OLAP_FIELD_TYPE_MAP)) { return get_data_type_ptr(field)->create_column(); } diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 9febda957074e1..14879b7bdf577d 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -164,6 +164,18 @@ size_t ColumnMap::filter(const Filter& filter) { return value_result_size; } +Status ColumnMap::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { + auto to = reinterpret_cast(col_ptr); + + auto& array_keys = assert_cast(*keys); + array_keys.filter_by_selector(sel, sel_size, &to->get_keys()); + + auto& array_values = assert_cast(*values); + array_values.filter_by_selector(sel, sel_size, &to->get_values()); + + return Status::OK(); +} + ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { return ColumnMap::create(keys->permute(perm, limit), values->permute(perm, limit)); } diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 946dbd537dd7ad..9f014a813ae89e 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -84,9 +84,8 @@ class ColumnMap final : public COWHelper { void update_hash_with_value(size_t n, SipHash& hash) const override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; - size_t filter(const Filter& filter) override; - + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; ColumnPtr replicate(const Offsets& offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override { diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 66eb57500c1654..89335f01cb8bb4 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -260,6 +260,16 @@ size_t ColumnStruct::filter(const Filter& filter) { return result_size; } +Status ColumnStruct::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { + auto to = reinterpret_cast(col_ptr); + const size_t tuple_size = columns.size(); + DCHECK_EQ(to->tuple_size(), tuple_size); + for (size_t i = 0; i < tuple_size; ++i) { + columns[i]->filter_by_selector(sel, sel_size, &to->get_column(i)); + } + return Status::OK(); +} + ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const { const size_t tuple_size = columns.size(); Columns new_columns(tuple_size); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 6d0f951d056ed2..1ac85de308d841 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -146,9 +146,8 @@ class ColumnStruct final : public COWHelper { void insert_range_from(const IColumn& src, size_t start, size_t length) override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; - size_t filter(const Filter& filter) override; - + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; ColumnPtr replicate(const Offsets& offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override; @@ -240,4 +239,4 @@ class ColumnStruct final : public COWHelper { // const Collator* collator = nullptr) const; }; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized From c15db661b8608d95b8f237f10ab6517ff0decf3d Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 22 Feb 2023 13:25:17 +0800 Subject: [PATCH 2/5] save --- be/src/olap/schema.cpp | 31 ++++++++++++++----- be/src/olap/schema.h | 3 +- be/src/vec/columns/column_map.h | 2 +- be/src/vec/data_types/data_type_map.cpp | 8 ----- be/src/vec/data_types/data_type_map.h | 7 +++++ .../data/delete_p0/test_map_column_delete.out | 4 +++ .../delete_p0/test_struct_column_delete.out | 3 ++ .../delete_p0/test_map_column_delete.groovy | 27 ++++++++++++++++ .../test_struct_column_delete.groovy | 27 ++++++++++++++++ 9 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 regression-test/data/delete_p0/test_map_column_delete.out create mode 100644 regression-test/data/delete_p0/test_struct_column_delete.out create mode 100644 regression-test/suites/delete_p0/test_map_column_delete.groovy create mode 100644 regression-test/suites/delete_p0/test_struct_column_delete.groovy diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index 8bc18fbe1847a3..59ce7922ed66ec 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -117,12 +117,8 @@ vectorized::IColumn::MutablePtr Schema::get_column_by_field(const Field& field) return get_data_type_ptr(field)->create_column(); } -vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field) { - if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY || field.type() == OLAP_FIELD_TYPE_STRUCT || - field.type() == OLAP_FIELD_TYPE_MAP)) { - return get_data_type_ptr(field)->create_column(); - } - +vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& field, + bool is_nullable) { vectorized::IColumn::MutablePtr ptr = nullptr; switch (field.type()) { case OLAP_FIELD_TYPE_BOOL: @@ -183,11 +179,32 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(const Field& fi case OLAP_FIELD_TYPE_DECIMAL128I: ptr = doris::vectorized::PredicateColumnType::create(); break; + case OLAP_FIELD_TYPE_ARRAY: + ptr = doris::vectorized::ColumnArray::create( + get_predicate_column_ptr(*field.get_sub_field(0)), + doris::vectorized::ColumnArray::ColumnOffsets::create()); + break; + case OLAP_FIELD_TYPE_STRUCT: { + size_t field_size = field.get_sub_field_count(); + doris::vectorized::MutableColumns columns(field_size); + for (size_t i = 0; i < field_size; i++) { + columns[i] = get_predicate_column_ptr(*field.get_sub_field(i)); + } + ptr = doris::vectorized::ColumnStruct::create(std::move(columns)); + break; + } + case OLAP_FIELD_TYPE_MAP: + ptr = doris::vectorized::ColumnMap::create( + doris::vectorized::ColumnArray::create( + get_predicate_column_ptr(*field.get_sub_field(0), true)), + doris::vectorized::ColumnArray::create( + get_predicate_column_ptr(*field.get_sub_field(1), true))); + break; default: LOG(FATAL) << "Unexpected type when choosing predicate column, type=" << field.type(); } - if (field.is_nullable()) { + if (field.is_nullable() || is_nullable) { return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create()); } diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index bbe912718f3156..41061ae43e9d46 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -122,7 +122,8 @@ class Schema { static vectorized::IColumn::MutablePtr get_column_by_field(const Field& field); - static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const Field& field); + static vectorized::IColumn::MutablePtr get_predicate_column_ptr(const Field& field, + bool is_nullable = false); const std::vector& columns() const { return _cols; } diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 9f014a813ae89e..1c7974e3f4e8bb 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -147,7 +147,7 @@ class ColumnMap final : public COWHelper { return get_offsets()[i] - get_offsets()[i - 1]; } - explicit ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values); + ColumnMap(MutableColumnPtr&& keys, MutableColumnPtr&& values); ColumnMap(const ColumnMap&) = default; }; diff --git a/be/src/vec/data_types/data_type_map.cpp b/be/src/vec/data_types/data_type_map.cpp index daf683c3abc11b..4895a48c4c4de4 100644 --- a/be/src/vec/data_types/data_type_map.cpp +++ b/be/src/vec/data_types/data_type_map.cpp @@ -17,14 +17,6 @@ #include "data_type_map.h" -#include "gen_cpp/data.pb.h" -#include "vec/columns/column_array.h" -#include "vec/columns/column_map.h" -#include "vec/columns/column_nullable.h" -#include "vec/common/assert_cast.h" -#include "vec/data_types/data_type_array.h" -#include "vec/data_types/data_type_nullable.h" - namespace doris::vectorized { DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_) { diff --git a/be/src/vec/data_types/data_type_map.h b/be/src/vec/data_types/data_type_map.h index 58261b0b3d30ce..2d260e0254f7cc 100644 --- a/be/src/vec/data_types/data_type_map.h +++ b/be/src/vec/data_types/data_type_map.h @@ -20,7 +20,14 @@ #pragma once +#include "gen_cpp/data.pb.h" +#include "util/stack_util.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_map.h" +#include "vec/columns/column_nullable.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { /** Map data type. diff --git a/regression-test/data/delete_p0/test_map_column_delete.out b/regression-test/data/delete_p0/test_map_column_delete.out new file mode 100644 index 00000000000000..f41a73d4892bb8 --- /dev/null +++ b/regression-test/data/delete_p0/test_map_column_delete.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 {1:'a', 2:'doris'} +2 {} diff --git a/regression-test/data/delete_p0/test_struct_column_delete.out b/regression-test/data/delete_p0/test_struct_column_delete.out new file mode 100644 index 00000000000000..e953717a94d365 --- /dev/null +++ b/regression-test/data/delete_p0/test_struct_column_delete.out @@ -0,0 +1,3 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 {1, 'a'} diff --git a/regression-test/suites/delete_p0/test_map_column_delete.groovy b/regression-test/suites/delete_p0/test_map_column_delete.groovy new file mode 100644 index 00000000000000..8f1b1a9c90b60f --- /dev/null +++ b/regression-test/suites/delete_p0/test_map_column_delete.groovy @@ -0,0 +1,27 @@ +2023-02-22 13:20:33.367 INFO [main] (RegressionTest.groovy:74) - Test finished +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_map_column_delete") { + def tableName = "test_map_column_delete" + + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, m_map MAP NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """ + sql """ insert into ${tableName} values(1, {1:'a', 2:"doris"}),(2,{}),(3,NULL),(4,NULL),(5,NULL) """ + sql """ DELETE FROM ${tableName} WHERE m_map is NULL """ + qt_sql """ SELECT * FROM ${tableName} order by id """ +} diff --git a/regression-test/suites/delete_p0/test_struct_column_delete.groovy b/regression-test/suites/delete_p0/test_struct_column_delete.groovy new file mode 100644 index 00000000000000..e3c129a5eb1e6e --- /dev/null +++ b/regression-test/suites/delete_p0/test_struct_column_delete.groovy @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_column_delete") { + def tableName = "test_struct_column_delete" + + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql "ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true')" + sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, s_struct STRUCT NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """ + sql """ insert into ${tableName} values(1, {1, 'a'}),(2,NULL),(3,NULL),(4,NULL),(5,NULL) """ + sql """ DELETE FROM ${tableName} WHERE s_struct is NULL """ + qt_sql """ SELECT * FROM ${tableName} order by id """ +} From 625da97fc370de3bdf6a645b9ed7fd3494136821 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 22 Feb 2023 13:45:45 +0800 Subject: [PATCH 3/5] format --- be/src/vec/columns/column_map.cpp | 2 +- regression-test/suites/delete_p0/test_map_column_delete.groovy | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index 14879b7bdf577d..2e26d5cedfecaa 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -172,7 +172,7 @@ Status ColumnMap::filter_by_selector(const uint16_t* sel, size_t sel_size, IColu auto& array_values = assert_cast(*values); array_values.filter_by_selector(sel, sel_size, &to->get_values()); - + return Status::OK(); } diff --git a/regression-test/suites/delete_p0/test_map_column_delete.groovy b/regression-test/suites/delete_p0/test_map_column_delete.groovy index 8f1b1a9c90b60f..69f651b26785e0 100644 --- a/regression-test/suites/delete_p0/test_map_column_delete.groovy +++ b/regression-test/suites/delete_p0/test_map_column_delete.groovy @@ -1,4 +1,3 @@ -2023-02-22 13:20:33.367 INFO [main] (RegressionTest.groovy:74) - Test finished // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information From 914507ff7200eef62dcb070df36007c64e5b3500 Mon Sep 17 00:00:00 2001 From: xy720 Date: Wed, 22 Feb 2023 13:56:59 +0800 Subject: [PATCH 4/5] fix test --- regression-test/suites/delete_p0/test_map_column_delete.groovy | 1 + 1 file changed, 1 insertion(+) diff --git a/regression-test/suites/delete_p0/test_map_column_delete.groovy b/regression-test/suites/delete_p0/test_map_column_delete.groovy index 69f651b26785e0..47d98181d6da82 100644 --- a/regression-test/suites/delete_p0/test_map_column_delete.groovy +++ b/regression-test/suites/delete_p0/test_map_column_delete.groovy @@ -19,6 +19,7 @@ suite("test_map_column_delete") { def tableName = "test_map_column_delete" sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql "ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true')" sql """ CREATE TABLE IF NOT EXISTS ${tableName} (id INT NULL, m_map MAP NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """ sql """ insert into ${tableName} values(1, {1:'a', 2:"doris"}),(2,{}),(3,NULL),(4,NULL),(5,NULL) """ sql """ DELETE FROM ${tableName} WHERE m_map is NULL """ From 04132684d24e525fb328fed849779afde31389db Mon Sep 17 00:00:00 2001 From: xy720 Date: Thu, 23 Feb 2023 11:15:37 +0800 Subject: [PATCH 5/5] fix test --- regression-test/data/delete_p0/test_map_column_delete.out | 4 ++-- regression-test/data/delete_p0/test_struct_column_delete.out | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/regression-test/data/delete_p0/test_map_column_delete.out b/regression-test/data/delete_p0/test_map_column_delete.out index f41a73d4892bb8..4aad1884b8a3b3 100644 --- a/regression-test/data/delete_p0/test_map_column_delete.out +++ b/regression-test/data/delete_p0/test_map_column_delete.out @@ -1,4 +1,4 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- -1 {1:'a', 2:'doris'} -2 {} +1 {1:'a', 2:'doris'} +2 {} diff --git a/regression-test/data/delete_p0/test_struct_column_delete.out b/regression-test/data/delete_p0/test_struct_column_delete.out index e953717a94d365..bfc0e40d1e21fa 100644 --- a/regression-test/data/delete_p0/test_struct_column_delete.out +++ b/regression-test/data/delete_p0/test_struct_column_delete.out @@ -1,3 +1,3 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- -1 {1, 'a'} +1 {1, 'a'}