From 611cf10c1b38a6cb0065c6d2d9990332f14d126f Mon Sep 17 00:00:00 2001 From: Kang Date: Thu, 21 Mar 2024 12:26:47 +0800 Subject: [PATCH 1/2] skip read index column data only for DUP and MOW table --- be/src/olap/iterators.h | 1 + be/src/olap/rowset/beta_rowset_reader.cpp | 2 + .../rowset/segment_v2/segment_iterator.cpp | 6 + .../test_index_skip_read_data.out | 83 +++++++++++ .../test_index_skip_read_data.groovy | 131 ++++++++++++++++++ 5 files changed, 223 insertions(+) create mode 100644 regression-test/data/inverted_index_p0/test_index_skip_read_data.out create mode 100644 regression-test/suites/inverted_index_p0/test_index_skip_read_data.groovy diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h index f1b195f8f98d26..3b9d205e83d468 100644 --- a/be/src/olap/iterators.h +++ b/be/src/olap/iterators.h @@ -92,6 +92,7 @@ class StorageReadOptions { int block_row_max = 4096 - 32; // see https://github.com/apache/doris/pull/11816 TabletSchemaSPtr tablet_schema = nullptr; + bool enable_unique_key_merge_on_write = false; bool record_rowids = false; // flag for enable topn opt bool use_topn_opt = false; diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 0f8ee1715621db..ee19ef6dde41b9 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -215,6 +215,8 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context } _read_options.use_page_cache = _read_context->use_page_cache; _read_options.tablet_schema = _read_context->tablet_schema; + _read_options.enable_unique_key_merge_on_write = + _read_context->enable_unique_key_merge_on_write; _read_options.record_rowids = _read_context->record_rowids; _read_options.use_topn_opt = _read_context->use_topn_opt; _read_options.read_orderby_key_reverse = _read_context->read_orderby_key_reverse; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 984d118ddf5ea5..80fb6f7856c635 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1044,6 +1044,12 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate( } bool SegmentIterator::_need_read_data(ColumnId cid) { + // only support DUP_KEYS and UNIQUE_KEYS with MOW + if (!((_opts.tablet_schema->keys_type() == KeysType::DUP_KEYS || + (_opts.tablet_schema->keys_type() == KeysType::UNIQUE_KEYS && + _opts.enable_unique_key_merge_on_write)))) { + return true; + } // if there is delete predicate, we always need to read data if (_opts.delete_condition_predicates->num_of_column_predicate() > 0) { return true; diff --git a/regression-test/data/inverted_index_p0/test_index_skip_read_data.out b/regression-test/data/inverted_index_p0/test_index_skip_read_data.out new file mode 100644 index 00000000000000..bb79d437b2a492 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_index_skip_read_data.out @@ -0,0 +1,83 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql10 -- +1 20 300 +1 20 400 + +-- !sql11 -- +1 20 300 +1 20 400 + +-- !sql12 -- +1 20 300 +1 20 400 + +-- !sql13 -- +1 20 300 + +-- !sql14 -- +1 20 400 + +-- !sql15 -- +20 +20 + +-- !sql16 -- +1 +1 + +-- !sql17 -- +1 20 + +-- !sql18 -- +1 20 + +-- !sql20 -- +1 20 400 + +-- !sql21 -- +1 20 400 + +-- !sql22 -- +1 20 400 + +-- !sql23 -- + +-- !sql24 -- +1 20 400 + +-- !sql25 -- +20 + +-- !sql26 -- +1 + +-- !sql27 -- + +-- !sql28 -- +1 20 + +-- !sql30 -- +1 20 400 + +-- !sql31 -- +1 20 400 + +-- !sql32 -- +1 20 400 + +-- !sql33 -- + +-- !sql34 -- +1 20 400 + +-- !sql35 -- +20 + +-- !sql36 -- +1 + +-- !sql37 -- + +-- !sql38 -- +1 20 + diff --git a/regression-test/suites/inverted_index_p0/test_index_skip_read_data.groovy b/regression-test/suites/inverted_index_p0/test_index_skip_read_data.groovy new file mode 100644 index 00000000000000..70213910934d3a --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_index_skip_read_data.groovy @@ -0,0 +1,131 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_index_skip_read_data", "p0"){ + def indexTbName1 = "test_index_skip_read_data_dup" + def indexTbName2 = "test_index_skip_read_data_mow" + def indexTbName3 = "test_index_skip_read_data_mor" + + + // dup + sql "DROP TABLE IF EXISTS ${indexTbName1}" + + sql """ + CREATE TABLE ${indexTbName1} ( + `k1` int(11) NULL COMMENT "", + `k2` varchar(20) NULL COMMENT "", + `data` text NULL COMMENT "", + INDEX idx_k1 (`k1`) USING INVERTED COMMENT '', + INDEX idx_k2 (`k2`) USING BITMAP COMMENT '', + INDEX idx_data (`data`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`k1`, `k2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ INSERT INTO ${indexTbName1} VALUES (1, 20, 300); """ + sql """ INSERT INTO ${indexTbName1} VALUES (1, 20, 400); """ + + qt_sql10 """ SELECT * FROM ${indexTbName1} ORDER BY k1,k2,data; """ + qt_sql11 """ SELECT * FROM ${indexTbName1} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql12 """ SELECT * FROM ${indexTbName1} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql13 """ SELECT * FROM ${indexTbName1} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql14 """ SELECT * FROM ${indexTbName1} WHERE data = 400 ORDER BY k1,k2,data; """ + qt_sql15 """ SELECT k2 FROM ${indexTbName1} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql16 """ SELECT k1 FROM ${indexTbName1} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql17 """ SELECT k1, k2 FROM ${indexTbName1} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql18 """ SELECT k1, k2 FROM ${indexTbName1} WHERE data = 400 ORDER BY k1,k2,data; """ + + + + // mow + sql "DROP TABLE IF EXISTS ${indexTbName2}" + + sql """ + CREATE TABLE ${indexTbName2} ( + `k1` int(11) NULL COMMENT "", + `k2` varchar(20) NULL COMMENT "", + `data` text NULL COMMENT "", + INDEX idx_k1 (`k1`) USING INVERTED COMMENT '', + INDEX idx_k2 (`k2`) USING BITMAP COMMENT '', + INDEX idx_data (`data`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + UNIQUE KEY(`k1`, `k2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true" + ); + """ + + sql """ INSERT INTO ${indexTbName2} VALUES (1, 20, 300); """ + sql """ INSERT INTO ${indexTbName2} VALUES (1, 20, 400); """ + + qt_sql20 """ SELECT * FROM ${indexTbName2} ORDER BY k1,k2,data; """ + qt_sql21 """ SELECT * FROM ${indexTbName2} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql22 """ SELECT * FROM ${indexTbName2} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql23 """ SELECT * FROM ${indexTbName2} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql24 """ SELECT * FROM ${indexTbName2} WHERE data = 400 ORDER BY k1,k2,data; """ + qt_sql25 """ SELECT k2 FROM ${indexTbName2} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql26 """ SELECT k1 FROM ${indexTbName2} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql27 """ SELECT k1, k2 FROM ${indexTbName2} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql28 """ SELECT k1, k2 FROM ${indexTbName2} WHERE data = 400 ORDER BY k1,k2,data; """ + + + // mor + sql "DROP TABLE IF EXISTS ${indexTbName3}" + + sql """ + CREATE TABLE ${indexTbName3} ( + `k1` int(11) NULL COMMENT "", + `k2` varchar(20) NULL COMMENT "", + `data` text NULL COMMENT "", + INDEX idx_k1 (`k1`) USING INVERTED COMMENT '', + INDEX idx_k2 (`k2`) USING BITMAP COMMENT '', + INDEX idx_data (`data`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + UNIQUE KEY(`k1`, `k2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "false", + "disable_auto_compaction" = "true" + ); + """ + + sql """ INSERT INTO ${indexTbName3} VALUES (1, 20, 300); """ + sql """ INSERT INTO ${indexTbName3} VALUES (1, 20, 400); """ + + qt_sql30 """ SELECT * FROM ${indexTbName3} ORDER BY k1,k2,data; """ + qt_sql31 """ SELECT * FROM ${indexTbName3} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql32 """ SELECT * FROM ${indexTbName3} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql33 """ SELECT * FROM ${indexTbName3} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql34 """ SELECT * FROM ${indexTbName3} WHERE data = 400 ORDER BY k1,k2,data; """ + qt_sql35 """ SELECT k2 FROM ${indexTbName3} WHERE k1 = 1 ORDER BY k1,k2,data; """ + qt_sql36 """ SELECT k1 FROM ${indexTbName3} WHERE k2 = 20 ORDER BY k1,k2,data; """ + qt_sql37 """ SELECT k1, k2 FROM ${indexTbName3} WHERE data = 300 ORDER BY k1,k2,data; """ + qt_sql38 """ SELECT k1, k2 FROM ${indexTbName3} WHERE data = 400 ORDER BY k1,k2,data; """ +} \ No newline at end of file From 069255032a43b3f7648eaa08e0c112b54791286c Mon Sep 17 00:00:00 2001 From: Kang Date: Thu, 21 Mar 2024 12:30:13 +0800 Subject: [PATCH 2/2] clang format --- be/src/olap/rowset/beta_rowset_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index ee19ef6dde41b9..bb11347990c5cb 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -216,7 +216,7 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.use_page_cache = _read_context->use_page_cache; _read_options.tablet_schema = _read_context->tablet_schema; _read_options.enable_unique_key_merge_on_write = - _read_context->enable_unique_key_merge_on_write; + _read_context->enable_unique_key_merge_on_write; _read_options.record_rowids = _read_context->record_rowids; _read_options.use_topn_opt = _read_context->use_topn_opt; _read_options.read_orderby_key_reverse = _read_context->read_orderby_key_reverse;