From 61cbffe494b24192eab9816dd292b2c3ffc5544e Mon Sep 17 00:00:00 2001 From: chaoyli Date: Tue, 17 Sep 2019 14:56:40 +0800 Subject: [PATCH 1/4] Check file descriptor number is larger than 65536 upon start --- patch | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 patch diff --git a/patch b/patch new file mode 100644 index 00000000000000..198682bff261c6 --- /dev/null +++ b/patch @@ -0,0 +1,42 @@ +diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp +index 6d7bd20a..fcf12aa8 100644 +--- a/be/src/olap/storage_engine.cpp ++++ b/be/src/olap/storage_engine.cpp +@@ -231,7 +231,6 @@ void StorageEngine::_update_storage_medium_type_count() { + } + } + +- + OLAPStatus StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { + OLAPStatus res = OLAP_SUCCESS; + +@@ -265,16 +264,6 @@ void StorageEngine::set_store_used_flag(const string& path, bool is_used) { + _update_storage_medium_type_count(); + } + +-void StorageEngine::get_all_available_root_path(std::vector* available_paths) { +- available_paths->clear(); +- std::lock_guard l(_store_lock); +- for (auto& it : _store_map) { +- if (it.second->is_used()) { +- available_paths->push_back(it.first); +- } +- } +-} +- + template + std::vector StorageEngine::get_stores() { + std::vector stores; +diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h +index 59e3555c..e4e46a7b 100644 +--- a/be/src/olap/storage_engine.h ++++ b/be/src/olap/storage_engine.h +@@ -122,8 +122,6 @@ public: + // @brief 获取所有root_path信息 + OLAPStatus get_all_data_dir_info(std::vector* data_dir_infos); + +- void get_all_available_root_path(std::vector* available_paths); +- + // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位, + // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。 + // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上 From 0f59e22846eca291e35c376b677d08dee8567f16 Mon Sep 17 00:00:00 2001 From: chaoyli Date: Thu, 19 Sep 2019 12:49:22 +0800 Subject: [PATCH 2/4] Seek block when starts a ScanKey. In Doris, one block has 1024 rows. 1. If the previous ScanKey scan rows multiple blocks, and also the final block has 1024 rows just right. 2. The current ScanKey scan rows with number less than one block. Under the two conditions, if not seek block, the position of prefix shortkey columns is wrong. --- be/src/olap/rowset/segment_reader.cpp | 15 ++++++++-- be/src/olap/rowset/segment_reader.h | 2 +- patch | 42 --------------------------- 3 files changed, 13 insertions(+), 46 deletions(-) delete mode 100644 patch diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 5e847801f916d4..fa5b1c4bedaf9e 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -289,6 +289,15 @@ OLAPStatus SegmentReader::seek_to_block( *next_block_id = _next_block_id; *eof = _eof; + // Must seek block when starts a ScanKey. + // In Doris, one block has 1024 rows. + // 1. If the previous ScanKey scan rows multiple blocks, + // and also the final block has 1024 rows just right. + // 2. The current ScanKey scan rows with number less than one block. + // Under the two conditions, if not seek block, the position + // of prefix shortkey columns is wrong. + _seek_block = true; + return OLAP_SUCCESS; } @@ -832,7 +841,7 @@ OLAPStatus SegmentReader::_create_reader(size_t* buffer_size) { OLAPStatus SegmentReader::_seek_to_block_directly( int64_t block_id, const std::vector& cids) { - if (!config::block_seek_position && _at_block_start && block_id == _current_block_id) { + if (!_seek_block && block_id == _current_block_id) { // no need to execute seek return OLAP_SUCCESS; } @@ -861,7 +870,7 @@ OLAPStatus SegmentReader::_seek_to_block_directly( } } _current_block_id = block_id; - _at_block_start = true; + _seek_block = false; return OLAP_SUCCESS; } @@ -933,7 +942,7 @@ OLAPStatus SegmentReader::_load_to_vectorized_row_batch( if (size == _num_rows_in_block) { _current_block_id++; } else { - _at_block_start = false; + _seek_block = true; } _stats->blocks_load++; diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index 8bdf961ce59fc1..6180a9c234259b 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -308,7 +308,7 @@ class SegmentReader { // If this field is false, client must to call seek_to_block before // calling get_block. - bool _at_block_start = false; + bool _seek_block = true; int64_t _end_block; // 本次读取的结束块 int64_t _current_block_id = 0; // 当前读取到的块 diff --git a/patch b/patch deleted file mode 100644 index 198682bff261c6..00000000000000 --- a/patch +++ /dev/null @@ -1,42 +0,0 @@ -diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp -index 6d7bd20a..fcf12aa8 100644 ---- a/be/src/olap/storage_engine.cpp -+++ b/be/src/olap/storage_engine.cpp -@@ -231,7 +231,6 @@ void StorageEngine::_update_storage_medium_type_count() { - } - } - -- - OLAPStatus StorageEngine::_judge_and_update_effective_cluster_id(int32_t cluster_id) { - OLAPStatus res = OLAP_SUCCESS; - -@@ -265,16 +264,6 @@ void StorageEngine::set_store_used_flag(const string& path, bool is_used) { - _update_storage_medium_type_count(); - } - --void StorageEngine::get_all_available_root_path(std::vector* available_paths) { -- available_paths->clear(); -- std::lock_guard l(_store_lock); -- for (auto& it : _store_map) { -- if (it.second->is_used()) { -- available_paths->push_back(it.first); -- } -- } --} -- - template - std::vector StorageEngine::get_stores() { - std::vector stores; -diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h -index 59e3555c..e4e46a7b 100644 ---- a/be/src/olap/storage_engine.h -+++ b/be/src/olap/storage_engine.h -@@ -122,8 +122,6 @@ public: - // @brief 获取所有root_path信息 - OLAPStatus get_all_data_dir_info(std::vector* data_dir_infos); - -- void get_all_available_root_path(std::vector* available_paths); -- - // 磁盘状态监测。监测unused_flag路劲新的对应root_path unused标识位, - // 当检测到有unused标识时,从内存中删除对应表信息,磁盘数据不动。 - // 当磁盘状态为不可用,但未检测到unused标识时,需要从root_path上 From e3fa13d435a22049327a23ab5e8388de93102dcf Mon Sep 17 00:00:00 2001 From: chaoyli Date: Thu, 19 Sep 2019 15:09:53 +0800 Subject: [PATCH 3/4] Seek block when starts a ScanKey --- be/src/olap/rowset/segment_reader.cpp | 8 ++++---- be/src/olap/rowset/segment_reader.h | 2 +- be/test/olap/delete_handler_test.cpp | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index fa5b1c4bedaf9e..19939e476c109a 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -296,7 +296,7 @@ OLAPStatus SegmentReader::seek_to_block( // 2. The current ScanKey scan rows with number less than one block. // Under the two conditions, if not seek block, the position // of prefix shortkey columns is wrong. - _seek_block = true; + _need_to_seek_block = true; return OLAP_SUCCESS; } @@ -841,7 +841,7 @@ OLAPStatus SegmentReader::_create_reader(size_t* buffer_size) { OLAPStatus SegmentReader::_seek_to_block_directly( int64_t block_id, const std::vector& cids) { - if (!_seek_block && block_id == _current_block_id) { + if (!_need_to_seek_block && block_id == _current_block_id) { // no need to execute seek return OLAP_SUCCESS; } @@ -870,7 +870,7 @@ OLAPStatus SegmentReader::_seek_to_block_directly( } } _current_block_id = block_id; - _seek_block = false; + _need_to_seek_block = false; return OLAP_SUCCESS; } @@ -942,7 +942,7 @@ OLAPStatus SegmentReader::_load_to_vectorized_row_batch( if (size == _num_rows_in_block) { _current_block_id++; } else { - _seek_block = true; + _need_to_seek_block = true; } _stats->blocks_load++; diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index 6180a9c234259b..259093df4f33e0 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -308,7 +308,7 @@ class SegmentReader { // If this field is false, client must to call seek_to_block before // calling get_block. - bool _seek_block = true; + bool _need_to_seek_block = true; int64_t _end_block; // 本次读取的结束块 int64_t _current_block_id = 0; // 当前读取到的块 diff --git a/be/test/olap/delete_handler_test.cpp b/be/test/olap/delete_handler_test.cpp index ef840bd32ccef5..fa062c0cb3fafc 100644 --- a/be/test/olap/delete_handler_test.cpp +++ b/be/test/olap/delete_handler_test.cpp @@ -54,6 +54,9 @@ void set_up() { std::vector paths; paths.emplace_back(config::storage_root_path, -1); + config::min_file_descriptor_number = 65536; + config::max_file_descriptor_number = 131072; + doris::EngineOptions options; options.store_paths = paths; doris::StorageEngine::open(options, &k_engine); From 53495348071a843d0263edf35721b198c52b0c47 Mon Sep 17 00:00:00 2001 From: chaoyli Date: Thu, 19 Sep 2019 16:26:00 +0800 Subject: [PATCH 4/4] Seek block when starts a ScanKey --- be/src/common/config.h | 4 ---- be/src/olap/rowset/segment_reader.h | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 540df3a3a8b6dd..daa36d4f55641c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -431,10 +431,6 @@ namespace config { // and the tablet will be marked as bad, so that FE will try to repair it. CONF_Bool(auto_recover_index_loading_failure, "false"); - // This configuration is used to recover compaction under the corner case. - // If this configuration is set to true, block will seek position. - CONF_Bool(block_seek_position, "false"); - // max external scan cache batch count, means cache max_memory_cache_batch_count * batch_size row // default is 10, batch_size's defualt value is 1024 means 10 * 1024 rows will be cached CONF_Int32(max_memory_sink_batch_count, "20"); diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h index 259093df4f33e0..b52f19aea1d977 100644 --- a/be/src/olap/rowset/segment_reader.h +++ b/be/src/olap/rowset/segment_reader.h @@ -306,7 +306,7 @@ class SegmentReader { bool _eof; // eof标志 - // If this field is false, client must to call seek_to_block before + // If this field is true, client must to call seek_to_block before // calling get_block. bool _need_to_seek_block = true;