From 7d2fcaf6ec08b637722adf4a741285bbcb0c98e0 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Thu, 17 Oct 2024 11:27:43 +0800 Subject: [PATCH 01/17] org merge io --- be/src/vec/exec/format/orc/vorc_reader.cpp | 105 +++++++++++++++------ be/src/vec/exec/format/orc/vorc_reader.h | 23 ++++- 2 files changed, 97 insertions(+), 31 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 6b6639f2feb244..91f0cf0bc3000a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -891,6 +891,72 @@ Status OrcReader::set_fill_columns( _remaining_rows = _row_reader->getNumberOfRows(); + // if xxx + + vector select_column = _row_reader->getSelectedColumns(); + // for(auto col : select_column) {//include nested inner column + // std::cout << col <<" "; + // } + // std::cout <<"\n"; + + // std::cout <<" _reader->getNumberOfStripes() = " << _reader->getNumberOfStripes() <<"\n"; + uint64_t number_of_stripes = _reader->getNumberOfStripes(); + auto allStripesNeeded = _row_reader->getAllStripesNeeded(); + + // for(auto stn : allStripesNeeded) { + // std::cout <<"stn = " << stn<<" "; + // } + // std::cout <<"\n"; + + std::vector prefetch_ranges; + size_t total_io_size = 0; + + //_range_start_offset _range_size + // int64_t range_end_offset = _range_start_offset + _range_size; + for (uint64_t i = 0; i < number_of_stripes; i++) { + std::unique_ptr strip_info = _reader->getStripe(i); + uint64_t strip_start_offset = strip_info->getOffset(); + uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); + + if (strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { + continue; + } + if (strip_info->getLength() < 4096) { + prefetch_ranges.emplace_back(strip_start_offset, strip_end_offset - 1); + continue; + } + for (uint64_t stream_id = 0; stream_id < strip_info->getNumberOfStreams(); + ++stream_id) { + std::unique_ptr stream = + strip_info->getStreamInformation(stream_id); + + uint32_t column_id = stream->getColumnId(); + uint64_t stream_offset = stream->getOffset(); + uint64_t stream_length = stream->getLength(); + if (select_column[column_id]) { + total_io_size += stream_length; + // doris::io::PrefetchRange prefetch_range = {stream_offset, stream_offset + stream_length}; + // prefetch_ranges.emplace_back(prefetch_range) + prefetch_ranges.emplace_back(stream_offset, stream_offset + stream_length - 1); + } + } + } + // for (auto range : prefetch_ranges) { + // std::cout << range.start_offset <<" , " << range.end_offset <<"\n"; + // } + + orc::InputStream* inputStreamPtr = _reader->getStream(); + auto* orcInputStreamPtr = static_cast(inputStreamPtr); + + if (prefetch_ranges.size() != 0 && + total_io_size / prefetch_ranges.size() < io::MergeRangeFileReader::SMALL_IO) { + orcInputStreamPtr->_file_reader->collect_profile_before_close(); + + orcInputStreamPtr->_file_reader.reset(new io::MergeRangeFileReader( + orcInputStreamPtr->_profile, orcInputStreamPtr->_inner_reader, + prefetch_ranges)); + } + } catch (std::exception& e) { std::string _err_msg = e.what(); // ignore stop exception @@ -2415,34 +2481,17 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( void ORCFileInputStream::beforeReadStripe( std::unique_ptr current_strip_information, std::vector selected_columns) { - if (_file_reader != nullptr) { - _file_reader->collect_profile_before_close(); - } - // Generate prefetch ranges, build stripe file reader. - uint64_t offset = current_strip_information->getOffset(); - std::vector prefetch_ranges; - size_t total_io_size = 0; - for (uint64_t stream_id = 0; stream_id < current_strip_information->getNumberOfStreams(); - ++stream_id) { - std::unique_ptr stream = - current_strip_information->getStreamInformation(stream_id); - uint32_t columnId = stream->getColumnId(); - uint64_t length = stream->getLength(); - if (selected_columns[columnId]) { - total_io_size += length; - doris::io::PrefetchRange prefetch_range = {offset, offset + length}; - prefetch_ranges.emplace_back(std::move(prefetch_range)); - } - offset += length; - } - size_t num_columns = std::count_if(selected_columns.begin(), selected_columns.end(), - [](bool selected) { return selected; }); - if (total_io_size / num_columns < io::MergeRangeFileReader::SMALL_IO) { - // The underlying page reader will prefetch data in column. - _file_reader.reset(new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); - } else { - _file_reader = _inner_reader; - } + // auto x = assert_cast(_file_reader.get()); + // std::cout <<" x->range_cached_data().size() = " << x->range_cached_data().size() <<"\n"; + // if (_file_reader != nullptr) { + // _file_reader->collect_profile_before_close(); + // } + // if (total_io_size / num_columns < io::MergeRangeFileReader::SMALL_IO) { + // // The underlying page reader will prefetch data in column. + // _file_reader.reset(new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); + // } else { + // _file_reader = _inner_reader; + // } } void ORCFileInputStream::_collect_profile_before_close() { diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 4aad5637ef544e..66ca3edc962602 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -34,6 +34,7 @@ #include "common/status.h" #include "exec/olap_common.h" #include "io/file_factory.h" +#include "io/fs/buffered_reader.h" #include "io/fs/file_reader.h" #include "io/fs/file_reader_writer_fwd.h" #include "olap/olap_common.h" @@ -642,7 +643,11 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { _io_ctx(io_ctx), _profile(profile) {} - ~ORCFileInputStream() override = default; + ~ORCFileInputStream() override { + if (_file_reader != nullptr) { + _file_reader->collect_profile_before_close(); + } + } uint64_t getLength() const override { return _file_reader->size(); } @@ -655,11 +660,23 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { void beforeReadStripe(std::unique_ptr current_strip_information, std::vector selected_columns) override; + void setPreFetchRanges(std::vector prefetch_ranges, size_t total_io_size) { + // !prefetch_ranges.empty() To prevent the entire orc file be filtered. + if (!prefetch_ranges.empty() && + total_io_size / prefetch_ranges.size() < io::MergeRangeFileReader::SMALL_IO) { + // The underlying page reader will prefetch data in column. + _file_reader.reset( + new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); + } else { + _file_reader = _inner_reader; + } + } + protected: void _collect_profile_at_runtime() override {}; void _collect_profile_before_close() override; -private: +public: const std::string& _file_name; io::FileReaderSPtr _inner_reader; io::FileReaderSPtr _file_reader; @@ -667,6 +684,6 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { OrcReader::Statistics* _statistics = nullptr; const io::IOContext* _io_ctx = nullptr; RuntimeProfile* _profile = nullptr; + // std::vector _prefetch_ranges; }; - } // namespace doris::vectorized From d96bd9f016c920777872e4237f519f9fdf600e75 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 01:21:53 +0800 Subject: [PATCH 02/17] use orc cache xxx --- be/src/vec/exec/format/orc/vorc_reader.cpp | 173 ++++++++++++++------- be/src/vec/exec/format/orc/vorc_reader.h | 120 +++++++++++--- 2 files changed, 216 insertions(+), 77 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 91f0cf0bc3000a..b658cc61620e4a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -84,6 +84,7 @@ namespace doris { class RuntimeState; +class ORCCacheFileInputStream; namespace io { struct IOContext; @@ -114,11 +115,13 @@ static constexpr int decimal_scale_for_hive11 = 10; M(TypeIndex::Float64, Float64, orc::DoubleVectorBatch) void ORCFileInputStream::read(void* buf, uint64_t length, uint64_t offset) { + read_impl(reinterpret_cast(buf), length, offset); +} +void ORCFileInputStream::read_impl(char* out, uint64_t length, uint64_t offset) { _statistics->fs_read_calls++; _statistics->fs_read_bytes += length; SCOPED_RAW_TIMER(&_statistics->fs_read_time); uint64_t has_read = 0; - char* out = reinterpret_cast(buf); while (has_read < length) { if (UNLIKELY(_io_ctx && _io_ctx->should_stop)) { throw orc::ParseError("stop"); @@ -891,70 +894,73 @@ Status OrcReader::set_fill_columns( _remaining_rows = _row_reader->getNumberOfRows(); - // if xxx - - vector select_column = _row_reader->getSelectedColumns(); - // for(auto col : select_column) {//include nested inner column - // std::cout << col <<" "; - // } - // std::cout <<"\n"; - - // std::cout <<" _reader->getNumberOfStripes() = " << _reader->getNumberOfStripes() <<"\n"; + // vector select_column = _row_reader->getSelectedColumns(); uint64_t number_of_stripes = _reader->getNumberOfStripes(); auto allStripesNeeded = _row_reader->getAllStripesNeeded(); - // for(auto stn : allStripesNeeded) { - // std::cout <<"stn = " << stn<<" "; - // } - // std::cout <<"\n"; - std::vector prefetch_ranges; - size_t total_io_size = 0; - //_range_start_offset _range_size - // int64_t range_end_offset = _range_start_offset + _range_size; + int64_t range_end_offset = _range_start_offset + _range_size; + + // 三个参数 todo + int tiny_stripe_size = 4096 * 5; + int big_io_size = tiny_stripe_size * 5; + int big_hole = big_io_size / 3 * 2; + + bool all_tiny_stripe = true; for (uint64_t i = 0; i < number_of_stripes; i++) { std::unique_ptr strip_info = _reader->getStripe(i); uint64_t strip_start_offset = strip_info->getOffset(); uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); - if (strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { - continue; - } - if (strip_info->getLength() < 4096) { - prefetch_ranges.emplace_back(strip_start_offset, strip_end_offset - 1); + if (strip_start_offset > range_end_offset || strip_end_offset < _range_start_offset) { continue; } - for (uint64_t stream_id = 0; stream_id < strip_info->getNumberOfStreams(); - ++stream_id) { - std::unique_ptr stream = - strip_info->getStreamInformation(stream_id); - - uint32_t column_id = stream->getColumnId(); - uint64_t stream_offset = stream->getOffset(); - uint64_t stream_length = stream->getLength(); - if (select_column[column_id]) { - total_io_size += stream_length; - // doris::io::PrefetchRange prefetch_range = {stream_offset, stream_offset + stream_length}; - // prefetch_ranges.emplace_back(prefetch_range) - prefetch_ranges.emplace_back(stream_offset, stream_offset + stream_length - 1); - } + if (strip_info->getLength() > tiny_stripe_size) { + all_tiny_stripe = false; + break; } + strip_info->getFooterLength(); } - // for (auto range : prefetch_ranges) { - // std::cout << range.start_offset <<" , " << range.end_offset <<"\n"; - // } + all_tiny_stripe = 1; // force use cache xxx todo: fix + + if (all_tiny_stripe) { + std::vector ranges; + + uint64_t max_range_size = big_io_size; + for (uint64_t i = 0; i < number_of_stripes; i++) { + std::unique_ptr strip_info = _reader->getStripe(i); + uint64_t strip_start_offset = strip_info->getOffset(); + uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); - orc::InputStream* inputStreamPtr = _reader->getStream(); - auto* orcInputStreamPtr = static_cast(inputStreamPtr); + if (strip_start_offset >= range_end_offset || + strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { + continue; + } + if (ranges.empty()) { + ranges.emplace_back(strip_start_offset, strip_end_offset); + } else if (strip_end_offset > ranges.back().start_offset + + big_io_size // big io, will read a big block + || strip_start_offset > + ranges.back().end_offset + + big_hole //big hole,will many read useless bytes + ) { // not merge + ranges.emplace_back(strip_start_offset, strip_end_offset); + } else { // merge + ranges.back().end_offset = strip_end_offset; + } + } + orc::InputStream* inputStreamPtr = _reader->getStream(); + auto* orcInputStreamPtr = static_cast(inputStreamPtr); - if (prefetch_ranges.size() != 0 && - total_io_size / prefetch_ranges.size() < io::MergeRangeFileReader::SMALL_IO) { - orcInputStreamPtr->_file_reader->collect_profile_before_close(); + for (auto x : ranges) { + max_range_size = max(max_range_size, x.end_offset - x.start_offset); + } + auto buf = std::make_unique(max_range_size); //todo: try catch bad_alloc ??? - orcInputStreamPtr->_file_reader.reset(new io::MergeRangeFileReader( - orcInputStreamPtr->_profile, orcInputStreamPtr->_inner_reader, - prefetch_ranges)); + auto cache_stream = std::make_unique( + *orcInputStreamPtr, std::move(ranges), std::move(buf)); + _reader->setStream(std::move(cache_stream)); } } catch (std::exception& e) { @@ -2481,17 +2487,34 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( void ORCFileInputStream::beforeReadStripe( std::unique_ptr current_strip_information, std::vector selected_columns) { - // auto x = assert_cast(_file_reader.get()); - // std::cout <<" x->range_cached_data().size() = " << x->range_cached_data().size() <<"\n"; - // if (_file_reader != nullptr) { - // _file_reader->collect_profile_before_close(); - // } - // if (total_io_size / num_columns < io::MergeRangeFileReader::SMALL_IO) { - // // The underlying page reader will prefetch data in column. - // _file_reader.reset(new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); - // } else { - // _file_reader = _inner_reader; - // } + if (_file_reader != nullptr) { + _file_reader->collect_profile_before_close(); + } + // Generate prefetch ranges, build stripe file reader. + uint64_t offset = current_strip_information->getOffset(); + std::vector prefetch_ranges; + size_t total_io_size = 0; + for (uint64_t stream_id = 0; stream_id < current_strip_information->getNumberOfStreams(); + ++stream_id) { + std::unique_ptr stream = + current_strip_information->getStreamInformation(stream_id); + uint32_t columnId = stream->getColumnId(); + uint64_t length = stream->getLength(); + if (selected_columns[columnId]) { + total_io_size += length; + doris::io::PrefetchRange prefetch_range = {offset, offset + length}; + prefetch_ranges.emplace_back(std::move(prefetch_range)); + } + offset += length; + } + size_t num_columns = std::count_if(selected_columns.begin(), selected_columns.end(), + [](bool selected) { return selected; }); + if (total_io_size / num_columns < io::MergeRangeFileReader::SMALL_IO) { + // The underlying page reader will prefetch data in column. + _file_reader.reset(new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); + } else { + _file_reader = _inner_reader; + } } void ORCFileInputStream::_collect_profile_before_close() { @@ -2514,4 +2537,36 @@ void OrcReader::_execute_filter_position_delete_rowids(IColumn::Filter& filter) } } +void ORCCacheFileInputStream::read(void* buf, uint64_t length, uint64_t offset) { + _orc_cache_statistics.request_io++; + _orc_cache_statistics.request_bytes += length; + SCOPED_RAW_TIMER(&_orc_cache_statistics.request_time); + + if (_current_ranges != -1 && _current_ranges < _prefetch_ranges.size() && + _prefetch_ranges[_current_ranges].end_offset > offset) [[likely]] { + // Because of apache-orc seq read, + // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. + // To be more absolute, I don’t even think this check is needed here. + int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; + memcpy_inlined(buf, _buf.get() + buffer_offset, length); + } else { + // not in cache. + _orc_cache_statistics.miss_cache_io++; + _orc_cache_statistics.miss_cache_bytes += length; + SCOPED_RAW_TIMER(&_orc_cache_statistics.read_miss_cache_time); + read_impl(reinterpret_cast(buf), length, offset); + } +} +void ORCCacheFileInputStream::beforeReadStripe( + std::unique_ptr current_strip_information, + std::vector selected_columns) { + if (_current_ranges == -1) { + read_range_to_cache(); + return; + } + uint64_t current_strip_offset = current_strip_information->getOffset(); + if (current_strip_offset >= _prefetch_ranges[_current_ranges].end_offset) { + read_range_to_cache(); + } +} } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 66ca3edc962602..2e6a0d1bbb1eff 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -642,12 +642,15 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { _statistics(statistics), _io_ctx(io_ctx), _profile(profile) {} + ORCFileInputStream(const ORCFileInputStream& orcFileInputStream) + : _file_name(orcFileInputStream._file_name), + _inner_reader(orcFileInputStream._inner_reader), + _file_reader(orcFileInputStream._inner_reader), + _statistics(orcFileInputStream._statistics), + _io_ctx(orcFileInputStream._io_ctx), + _profile(orcFileInputStream._profile) {} - ~ORCFileInputStream() override { - if (_file_reader != nullptr) { - _file_reader->collect_profile_before_close(); - } - } + ~ORCFileInputStream() override = default; uint64_t getLength() const override { return _file_reader->size(); } @@ -655,23 +658,13 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { void read(void* buf, uint64_t length, uint64_t offset) override; + void read_impl(char*, uint64_t length, uint64_t offset); + const std::string& getName() const override { return _file_name; } void beforeReadStripe(std::unique_ptr current_strip_information, std::vector selected_columns) override; - void setPreFetchRanges(std::vector prefetch_ranges, size_t total_io_size) { - // !prefetch_ranges.empty() To prevent the entire orc file be filtered. - if (!prefetch_ranges.empty() && - total_io_size / prefetch_ranges.size() < io::MergeRangeFileReader::SMALL_IO) { - // The underlying page reader will prefetch data in column. - _file_reader.reset( - new io::MergeRangeFileReader(_profile, _inner_reader, prefetch_ranges)); - } else { - _file_reader = _inner_reader; - } - } - protected: void _collect_profile_at_runtime() override {}; void _collect_profile_before_close() override; @@ -684,6 +677,97 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { OrcReader::Statistics* _statistics = nullptr; const io::IOContext* _io_ctx = nullptr; RuntimeProfile* _profile = nullptr; - // std::vector _prefetch_ranges; }; + +class ORCCacheFileInputStream : public ORCFileInputStream { + struct ORCCacheStatistics { + int64_t request_io = 0; + int64_t request_bytes = 0; + int64_t request_time = 0; + int64_t miss_cache_io = 0; + int64_t miss_cache_bytes = 0; + int64_t read_miss_cache_time = 0; + int64_t read_to_cache_time = 0; + int64_t cache_refresh_count = 0; + int64_t read_to_cache_bytes = 0; + }; + +public: + ORCCacheFileInputStream(ORCFileInputStream orcFileInputStream, + std::vector prefetch_ranges, + std::unique_ptr&& buf) + : ORCFileInputStream(orcFileInputStream), + _prefetch_ranges(prefetch_ranges), + _buf(std::move(buf)) { + if (_profile != nullptr) { + const char* random_profile = "OrcMergedStripeIO"; + ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1); + _request_io = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, + random_profile, 1); + _request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES, + random_profile, 1); + _request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1); + _miss_cache_io = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "MissCacheIO", TUnit::UNIT, + random_profile, 1); + _miss_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "MissCacheBytes", + TUnit::BYTES, random_profile, 1); + _read_miss_cache_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadMissCacheTime", random_profile, 1); + + _read_to_cache_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1); + _cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount", + TUnit::UNIT, random_profile, 1); + _read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes", + TUnit::BYTES, random_profile, 1); + } + } + + void read(void* buf, uint64_t length, uint64_t offset) override; + + void read_range_to_cache() { + _current_ranges++; + uint64_t offset = _prefetch_ranges[_current_ranges].start_offset; + uint64_t length = _prefetch_ranges[_current_ranges].end_offset - + _prefetch_ranges[_current_ranges].start_offset; + _orc_cache_statistics.cache_refresh_count++; + _orc_cache_statistics.read_to_cache_bytes += length; + SCOPED_RAW_TIMER(&_orc_cache_statistics.read_to_cache_time); + read_impl(_buf.get(), length, offset); + } + + void beforeReadStripe(std::unique_ptr current_strip_information, + std::vector selected_columns) override; + + ~ORCCacheFileInputStream() override { + if (_profile != nullptr) { + COUNTER_UPDATE(_request_io, _orc_cache_statistics.request_io); + COUNTER_UPDATE(_request_bytes, _orc_cache_statistics.request_bytes); + COUNTER_UPDATE(_request_time, _orc_cache_statistics.request_time); + COUNTER_UPDATE(_miss_cache_io, _orc_cache_statistics.miss_cache_io); + COUNTER_UPDATE(_miss_cache_bytes, _orc_cache_statistics.miss_cache_bytes); + COUNTER_UPDATE(_read_miss_cache_time, _orc_cache_statistics.read_miss_cache_time); + COUNTER_UPDATE(_read_to_cache_time, _orc_cache_statistics.read_to_cache_time); + COUNTER_UPDATE(_cache_refresh_count, _orc_cache_statistics.cache_refresh_count); + COUNTER_UPDATE(_read_to_cache_bytes, _orc_cache_statistics.read_to_cache_bytes); + } + } + +private: + RuntimeProfile::Counter* _request_io = nullptr; + RuntimeProfile::Counter* _request_bytes = nullptr; + RuntimeProfile::Counter* _request_time = nullptr; + RuntimeProfile::Counter* _miss_cache_io = nullptr; + RuntimeProfile::Counter* _miss_cache_bytes = nullptr; + RuntimeProfile::Counter* _read_miss_cache_time = nullptr; + RuntimeProfile::Counter* _read_to_cache_time = nullptr; + RuntimeProfile::Counter* _cache_refresh_count = nullptr; + RuntimeProfile::Counter* _read_to_cache_bytes = nullptr; + ORCCacheStatistics _orc_cache_statistics; + + std::vector _prefetch_ranges; + int64_t _current_ranges = -1; + std::unique_ptr _buf; +}; + } // namespace doris::vectorized From e6330e99a877a53742ce9efbbcffb091c8e2b0f6 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 02:18:20 +0800 Subject: [PATCH 03/17] fix where in xxx --- be/src/vec/exec/format/orc/vorc_reader.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index b658cc61620e4a..711bf985e045f1 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -934,7 +934,8 @@ Status OrcReader::set_fill_columns( uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); if (strip_start_offset >= range_end_offset || - strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { + strip_end_offset < _range_start_offset) { + //|| !allStripesNeeded[i] continue; } if (ranges.empty()) { From 51dffa06c1f0389be0d814a9bee11888bfa31477 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 12:11:00 +0800 Subject: [PATCH 04/17] append filter --- be/src/vec/exec/format/orc/vorc_reader.cpp | 41 ++++++++++++++-------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 711bf985e045f1..e516d82466209c 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -934,8 +934,7 @@ Status OrcReader::set_fill_columns( uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); if (strip_start_offset >= range_end_offset || - strip_end_offset < _range_start_offset) { - //|| !allStripesNeeded[i] + strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { continue; } if (ranges.empty()) { @@ -2543,20 +2542,32 @@ void ORCCacheFileInputStream::read(void* buf, uint64_t length, uint64_t offset) _orc_cache_statistics.request_bytes += length; SCOPED_RAW_TIMER(&_orc_cache_statistics.request_time); - if (_current_ranges != -1 && _current_ranges < _prefetch_ranges.size() && - _prefetch_ranges[_current_ranges].end_offset > offset) [[likely]] { - // Because of apache-orc seq read, - // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. - // To be more absolute, I don’t even think this check is needed here. - int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; - memcpy_inlined(buf, _buf.get() + buffer_offset, length); - } else { - // not in cache. - _orc_cache_statistics.miss_cache_io++; - _orc_cache_statistics.miss_cache_bytes += length; - SCOPED_RAW_TIMER(&_orc_cache_statistics.read_miss_cache_time); - read_impl(reinterpret_cast(buf), length, offset); + if (_current_ranges == -1 || _prefetch_ranges[_current_ranges].end_offset <= offset) + [[unlikely]] { + read_range_to_cache(); } + // Because of apache-orc seq read, + // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. + // To be more absolute, I don’t even think this check is needed here. + int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; + memcpy_inlined(buf, _buf.get() + buffer_offset, length); + + // if (_current_ranges != -1 && _current_ranges < _prefetch_ranges.size()) [[likely]] { + // if (_prefetch_ranges[_current_ranges].end_offset <= offset) [[unlikely]] { + // read_range_to_cache(); + // } + // // Because of apache-orc seq read, + // // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. + // // To be more absolute, I don’t even think this check is needed here. + // int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; + // memcpy_inlined(buf, _buf.get() + buffer_offset, length); + // } else { + // // not in cache and not read to cache. + // _orc_cache_statistics.miss_cache_io ++; + // _orc_cache_statistics.miss_cache_bytes += length; + // SCOPED_RAW_TIMER(&_orc_cache_statistics.read_miss_cache_time); + // read_impl(reinterpret_cast(buf),length,offset); + // } } void ORCCacheFileInputStream::beforeReadStripe( std::unique_ptr current_strip_information, From b595035aa4f14b9ec3250e82cdb748bebd1c3e95 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 21:12:14 +0800 Subject: [PATCH 05/17] change --- be/src/io/fs/buffered_reader.cpp | 101 ++++++++++++ be/src/io/fs/buffered_reader.h | 115 +++++++++++++ be/src/vec/exec/format/orc/vorc_reader.cpp | 180 +++++++-------------- be/src/vec/exec/format/orc/vorc_reader.h | 114 ++----------- 4 files changed, 285 insertions(+), 225 deletions(-) diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index 20d5684734e2d4..eb992882986d3a 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -869,5 +869,106 @@ Result DelegateReader::create_file_reader( return reader; }); } + +Status LinearProbeRangeFinder::get_range_for(int64_t desiredOffset, + io::PrefetchRange& result_range) { + while (index < _ranges.size()) { + io::PrefetchRange& range = _ranges[index]; + if (range.end_offset > desiredOffset) { + if (range.start_offset > desiredOffset) [[unlikely]] { + return Status::InvalidArgument("Invalid desiredOffset"); + } + result_range = range; + return Status::OK(); + } + ++index; + } + return Status::InvalidArgument("Invalid desiredOffset"); +} + +RangeCacheFileReader::RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader, + std::shared_ptr range_finder) + : _profile(profile), + _inner_reader(std::move(inner_reader)), + _range_finder(std::move(range_finder)) { + _size = _inner_reader->size(); + uint64_t max_cache_size = + std::max((uint64_t)4096, (uint64_t)_range_finder->get_max_range_size()); + _cache = std::make_unique(max_cache_size); + + if (_profile != nullptr) { + const char* random_profile = "RangeCacheFileReader"; + ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1); + _request_io = + ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, random_profile, 1); + _request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES, + random_profile, 1); + _request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1); + _read_to_cache_time = + ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1); + _cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount", + TUnit::UNIT, random_profile, 1); + _read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes", + TUnit::BYTES, random_profile, 1); + } +} + +Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_read, + const IOContext* io_ctx) { + auto request_size = result.size; + + _cache_statistics.request_io++; + _cache_statistics.request_bytes += request_size; + SCOPED_RAW_TIMER(&_cache_statistics.request_time); + + PrefetchRange range; + if (_range_finder->get_range_for(offset, range)) [[likely]] { + if (_current_start_offset != range.start_offset) { // need read new range to cache. + auto range_size = range.end_offset - range.start_offset; + + _cache_statistics.cache_refresh_count++; + _cache_statistics.read_to_cache_bytes += range_size; + SCOPED_RAW_TIMER(&_cache_statistics.read_to_cache_time); + + Slice cache_slice = {_cache.get(), range_size}; + RETURN_IF_ERROR( + _inner_reader->read_at(range.start_offset, cache_slice, bytes_read, io_ctx)); + + if (*bytes_read != range_size) [[unlikely]] { + return Status::InternalError( + "RangeCacheFileReader use inner reader read bytes {} not eq expect size {}", + *bytes_read, range_size); + } + + _current_start_offset = range.start_offset; + } + + int64_t buffer_offset = offset - _current_start_offset; + memcpy(result.data, _cache.get() + buffer_offset, request_size); //todo inline. + *bytes_read = request_size; + + return Status::OK(); + } else { + return Status::InternalError("RangeCacheFileReader read not in Ranges. Offset = {}", + offset); + // RETURN_IF_ERROR(_inner_reader->read_at(offset, result , bytes_read, io_ctx)); + // return Status::OK(); + } +} + +void RangeCacheFileReader::_collect_profile_before_close() { + if (_profile != nullptr) { + COUNTER_UPDATE(_request_io, _cache_statistics.request_io); + COUNTER_UPDATE(_request_bytes, _cache_statistics.request_bytes); + COUNTER_UPDATE(_request_time, _cache_statistics.request_time); + COUNTER_UPDATE(_read_to_cache_time, _cache_statistics.read_to_cache_time); + COUNTER_UPDATE(_cache_refresh_count, _cache_statistics.cache_refresh_count); + COUNTER_UPDATE(_read_to_cache_bytes, _cache_statistics.read_to_cache_bytes); + if (_inner_reader != nullptr) { + _inner_reader->collect_profile_before_close(); + } + } +} + } // namespace io } // namespace doris diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 907ea11b216ac4..7ba9777c9d1f3e 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -53,6 +53,121 @@ struct PrefetchRange { : start_offset(start_offset), end_offset(end_offset) {} PrefetchRange() : start_offset(0), end_offset(0) {} + + PrefetchRange span(const PrefetchRange& other) const { + return {std::min(start_offset, other.end_offset), std::max(start_offset, other.end_offset)}; + } + PrefetchRange seq_span(const PrefetchRange& other) const { + return {start_offset, other.end_offset}; + } + + //Range needs to be sorted. + static std::vector mergeAdjacentSeqRanges( + const std::vector& seq_ranges, int64_t max_merge_distance_bytes, + int64_t max_read_size_bytes) { + if (seq_ranges.empty()) { + return {}; + } + // Merge overlapping ranges + std::vector result; + PrefetchRange last = seq_ranges.front(); + for (size_t i = 1; i < seq_ranges.size(); ++i) { + PrefetchRange current = seq_ranges[i]; + PrefetchRange merged = last.seq_span(current); + if (merged.end_offset <= max_read_size_bytes + merged.start_offset && + last.end_offset + max_merge_distance_bytes >= current.start_offset) { + last = merged; + } else { + result.push_back(last); + last = current; + } + } + result.push_back(last); + return result; + } +}; + +class RangeFinder { +public: + virtual ~RangeFinder() = default; + virtual Status get_range_for(int64_t desiredOffset, io::PrefetchRange& result_range) = 0; + virtual size_t get_max_range_size() const = 0; +}; + +class LinearProbeRangeFinder : public RangeFinder { +public: + LinearProbeRangeFinder(std::vector&& ranges) : _ranges(std::move(ranges)) {} + + Status get_range_for(int64_t desiredOffset, io::PrefetchRange& result_range) override; + + size_t get_max_range_size() const override { + size_t max_range_size = 0; + for (const auto& range : _ranges) { + max_range_size = std::max(max_range_size, range.end_offset - range.start_offset); + } + return max_range_size; + } + + ~LinearProbeRangeFinder() override = default; + +private: + std::vector _ranges; + size_t index {0}; +}; + +class RangeCacheFileReader : public io::FileReader { + struct RangeCacheReaderStatistics { + int64_t request_io = 0; + int64_t request_bytes = 0; + int64_t request_time = 0; + int64_t read_to_cache_time = 0; + int64_t cache_refresh_count = 0; + int64_t read_to_cache_bytes = 0; + }; + +public: + RangeCacheFileReader(RuntimeProfile* profile, io::FileReaderSPtr inner_reader, + std::shared_ptr range_finder); + + ~RangeCacheFileReader() override = default; + + Status close() override { + if (!_closed) { + _closed = true; + } + return Status::OK(); + } + + const io::Path& path() const override { return _inner_reader->path(); } + + size_t size() const override { return _size; } + + bool closed() const override { return _closed; } + +protected: + Status read_at_impl(size_t offset, Slice result, size_t* bytes_read, + const IOContext* io_ctx) override; + + void _collect_profile_before_close() override; + +private: + RuntimeProfile* _profile = nullptr; + io::FileReaderSPtr _inner_reader; + std::shared_ptr _range_finder; + + std::unique_ptr _cache; + size_t _current_start_offset; + + size_t _size; + bool _closed = false; + + RuntimeProfile::Counter* _request_io = nullptr; + RuntimeProfile::Counter* _request_bytes = nullptr; + RuntimeProfile::Counter* _request_time = nullptr; + RuntimeProfile::Counter* _read_to_cache_time = nullptr; + RuntimeProfile::Counter* _cache_refresh_count = nullptr; + RuntimeProfile::Counter* _read_to_cache_bytes = nullptr; + RangeCacheReaderStatistics _cache_statistics; }; /** diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index e516d82466209c..a2cc90a06c6c92 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -84,7 +84,6 @@ namespace doris { class RuntimeState; -class ORCCacheFileInputStream; namespace io { struct IOContext; @@ -115,13 +114,11 @@ static constexpr int decimal_scale_for_hive11 = 10; M(TypeIndex::Float64, Float64, orc::DoubleVectorBatch) void ORCFileInputStream::read(void* buf, uint64_t length, uint64_t offset) { - read_impl(reinterpret_cast(buf), length, offset); -} -void ORCFileInputStream::read_impl(char* out, uint64_t length, uint64_t offset) { _statistics->fs_read_calls++; _statistics->fs_read_bytes += length; SCOPED_RAW_TIMER(&_statistics->fs_read_time); uint64_t has_read = 0; + char* out = reinterpret_cast(buf); while (has_read < length) { if (UNLIKELY(_io_ctx && _io_ctx->should_stop)) { throw orc::ParseError("stop"); @@ -861,6 +858,58 @@ Status OrcReader::set_fill_columns( _lazy_read_ctx.can_lazy_read = false; } + _row_reader_options.range(_range_start_offset, _range_size); + _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); + _row_reader_options.include(_read_cols); + _row_reader_options.setEnableLazyDecoding(true); + + uint64_t number_of_stripes = _reader->getNumberOfStripes(); + auto allStripesNeeded = _reader->getNeedReadStripes(_row_reader_options); + + int64_t range_end_offset = _range_start_offset + _range_size; + + // 三个参数 todo + int orc_tiny_stripe_threshold = 8L * 1024L * 1024L; + int orc_once_max_read_size = 8L * 1024L * 1024L; + int orc_max_merge_distance = 1L * 1024L * 1024L; + + bool all_tiny_stripes = true; + std::vector tiny_stripe_ranges; + + for (uint64_t i = 0; i < number_of_stripes; i++) { + std::unique_ptr strip_info = _reader->getStripe(i); + uint64_t strip_start_offset = strip_info->getOffset(); + uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); + + if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset || + !allStripesNeeded[i]) { + continue; + } + if (strip_info->getLength() > orc_tiny_stripe_threshold) { + all_tiny_stripes = false; + break; + } + + tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset); + } + if (all_tiny_stripes && number_of_stripes > 0) { + std::vector prefetch_merge_ranges = + io::PrefetchRange::mergeAdjacentSeqRanges( + tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); + + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); + + auto* orcInputStreamPtr = static_cast(_reader->getStream()); + orcInputStreamPtr->set_all_tiny_stripes(); + auto& orc_file_reader = orcInputStreamPtr->get_file_reader(); + orc_file_reader->collect_profile_before_close(); + auto orc_inner_reader = orcInputStreamPtr->get_inner_reader(); + orc_file_reader = std::make_shared(_profile, orc_inner_reader, + range_finder); + _lazy_read_ctx.can_lazy_read = false; + } + if (!_lazy_read_ctx.can_lazy_read) { for (auto& kv : _lazy_read_ctx.predicate_partition_columns) { _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); @@ -871,17 +920,12 @@ Status OrcReader::set_fill_columns( } _fill_all_columns = true; - - // create orc row reader try { - _row_reader_options.range(_range_start_offset, _range_size); - _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); - _row_reader_options.include(_read_cols); + // create orc row reader if (_lazy_read_ctx.can_lazy_read) { _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns); _orc_filter = std::unique_ptr(new ORCFilterImpl(this)); } - _row_reader_options.setEnableLazyDecoding(true); if (!_lazy_read_ctx.conjuncts.empty()) { _string_dict_filter = std::make_unique(this); } @@ -894,75 +938,6 @@ Status OrcReader::set_fill_columns( _remaining_rows = _row_reader->getNumberOfRows(); - // vector select_column = _row_reader->getSelectedColumns(); - uint64_t number_of_stripes = _reader->getNumberOfStripes(); - auto allStripesNeeded = _row_reader->getAllStripesNeeded(); - - std::vector prefetch_ranges; - - int64_t range_end_offset = _range_start_offset + _range_size; - - // 三个参数 todo - int tiny_stripe_size = 4096 * 5; - int big_io_size = tiny_stripe_size * 5; - int big_hole = big_io_size / 3 * 2; - - bool all_tiny_stripe = true; - for (uint64_t i = 0; i < number_of_stripes; i++) { - std::unique_ptr strip_info = _reader->getStripe(i); - uint64_t strip_start_offset = strip_info->getOffset(); - uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); - - if (strip_start_offset > range_end_offset || strip_end_offset < _range_start_offset) { - continue; - } - if (strip_info->getLength() > tiny_stripe_size) { - all_tiny_stripe = false; - break; - } - strip_info->getFooterLength(); - } - all_tiny_stripe = 1; // force use cache xxx todo: fix - - if (all_tiny_stripe) { - std::vector ranges; - - uint64_t max_range_size = big_io_size; - for (uint64_t i = 0; i < number_of_stripes; i++) { - std::unique_ptr strip_info = _reader->getStripe(i); - uint64_t strip_start_offset = strip_info->getOffset(); - uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); - - if (strip_start_offset >= range_end_offset || - strip_end_offset < _range_start_offset || !allStripesNeeded[i]) { - continue; - } - if (ranges.empty()) { - ranges.emplace_back(strip_start_offset, strip_end_offset); - } else if (strip_end_offset > ranges.back().start_offset + - big_io_size // big io, will read a big block - || strip_start_offset > - ranges.back().end_offset + - big_hole //big hole,will many read useless bytes - ) { // not merge - ranges.emplace_back(strip_start_offset, strip_end_offset); - } else { // merge - ranges.back().end_offset = strip_end_offset; - } - } - orc::InputStream* inputStreamPtr = _reader->getStream(); - auto* orcInputStreamPtr = static_cast(inputStreamPtr); - - for (auto x : ranges) { - max_range_size = max(max_range_size, x.end_offset - x.start_offset); - } - auto buf = std::make_unique(max_range_size); //todo: try catch bad_alloc ??? - - auto cache_stream = std::make_unique( - *orcInputStreamPtr, std::move(ranges), std::move(buf)); - _reader->setStream(std::move(cache_stream)); - } - } catch (std::exception& e) { std::string _err_msg = e.what(); // ignore stop exception @@ -2487,6 +2462,9 @@ MutableColumnPtr OrcReader::_convert_dict_column_to_string_column( void ORCFileInputStream::beforeReadStripe( std::unique_ptr current_strip_information, std::vector selected_columns) { + if (_is_all_tiny_stripes) { + return; + } if (_file_reader != nullptr) { _file_reader->collect_profile_before_close(); } @@ -2537,48 +2515,4 @@ void OrcReader::_execute_filter_position_delete_rowids(IColumn::Filter& filter) } } -void ORCCacheFileInputStream::read(void* buf, uint64_t length, uint64_t offset) { - _orc_cache_statistics.request_io++; - _orc_cache_statistics.request_bytes += length; - SCOPED_RAW_TIMER(&_orc_cache_statistics.request_time); - - if (_current_ranges == -1 || _prefetch_ranges[_current_ranges].end_offset <= offset) - [[unlikely]] { - read_range_to_cache(); - } - // Because of apache-orc seq read, - // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. - // To be more absolute, I don’t even think this check is needed here. - int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; - memcpy_inlined(buf, _buf.get() + buffer_offset, length); - - // if (_current_ranges != -1 && _current_ranges < _prefetch_ranges.size()) [[likely]] { - // if (_prefetch_ranges[_current_ranges].end_offset <= offset) [[unlikely]] { - // read_range_to_cache(); - // } - // // Because of apache-orc seq read, - // // so I think just check `_prefetch_ranges[_current_ranges].end_offset > offset` is ok. - // // To be more absolute, I don’t even think this check is needed here. - // int64_t buffer_offset = offset - _prefetch_ranges[_current_ranges].start_offset; - // memcpy_inlined(buf, _buf.get() + buffer_offset, length); - // } else { - // // not in cache and not read to cache. - // _orc_cache_statistics.miss_cache_io ++; - // _orc_cache_statistics.miss_cache_bytes += length; - // SCOPED_RAW_TIMER(&_orc_cache_statistics.read_miss_cache_time); - // read_impl(reinterpret_cast(buf),length,offset); - // } -} -void ORCCacheFileInputStream::beforeReadStripe( - std::unique_ptr current_strip_information, - std::vector selected_columns) { - if (_current_ranges == -1) { - read_range_to_cache(); - return; - } - uint64_t current_strip_offset = current_strip_information->getOffset(); - if (current_strip_offset >= _prefetch_ranges[_current_ranges].end_offset) { - read_range_to_cache(); - } -} } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 2e6a0d1bbb1eff..13b5a0ed26c67f 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -642,15 +642,12 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { _statistics(statistics), _io_ctx(io_ctx), _profile(profile) {} - ORCFileInputStream(const ORCFileInputStream& orcFileInputStream) - : _file_name(orcFileInputStream._file_name), - _inner_reader(orcFileInputStream._inner_reader), - _file_reader(orcFileInputStream._inner_reader), - _statistics(orcFileInputStream._statistics), - _io_ctx(orcFileInputStream._io_ctx), - _profile(orcFileInputStream._profile) {} - ~ORCFileInputStream() override = default; + ~ORCFileInputStream() override { + if (_file_reader != nullptr) { + _file_reader->collect_profile_before_close(); + } + } uint64_t getLength() const override { return _file_reader->size(); } @@ -658,13 +655,17 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { void read(void* buf, uint64_t length, uint64_t offset) override; - void read_impl(char*, uint64_t length, uint64_t offset); - const std::string& getName() const override { return _file_name; } void beforeReadStripe(std::unique_ptr current_strip_information, std::vector selected_columns) override; + void set_all_tiny_stripes() { _is_all_tiny_stripes = true; } + + io::FileReaderSPtr& get_file_reader() { return _file_reader; } + + io::FileReaderSPtr& get_inner_reader() { return _inner_reader; } + protected: void _collect_profile_at_runtime() override {}; void _collect_profile_before_close() override; @@ -673,101 +674,10 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { const std::string& _file_name; io::FileReaderSPtr _inner_reader; io::FileReaderSPtr _file_reader; + bool _is_all_tiny_stripes = false; // Owned by OrcReader OrcReader::Statistics* _statistics = nullptr; const io::IOContext* _io_ctx = nullptr; RuntimeProfile* _profile = nullptr; }; - -class ORCCacheFileInputStream : public ORCFileInputStream { - struct ORCCacheStatistics { - int64_t request_io = 0; - int64_t request_bytes = 0; - int64_t request_time = 0; - int64_t miss_cache_io = 0; - int64_t miss_cache_bytes = 0; - int64_t read_miss_cache_time = 0; - int64_t read_to_cache_time = 0; - int64_t cache_refresh_count = 0; - int64_t read_to_cache_bytes = 0; - }; - -public: - ORCCacheFileInputStream(ORCFileInputStream orcFileInputStream, - std::vector prefetch_ranges, - std::unique_ptr&& buf) - : ORCFileInputStream(orcFileInputStream), - _prefetch_ranges(prefetch_ranges), - _buf(std::move(buf)) { - if (_profile != nullptr) { - const char* random_profile = "OrcMergedStripeIO"; - ADD_TIMER_WITH_LEVEL(_profile, random_profile, 1); - _request_io = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestIO", TUnit::UNIT, - random_profile, 1); - _request_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "RequestBytes", TUnit::BYTES, - random_profile, 1); - _request_time = ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RequestTime", random_profile, 1); - _miss_cache_io = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "MissCacheIO", TUnit::UNIT, - random_profile, 1); - _miss_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "MissCacheBytes", - TUnit::BYTES, random_profile, 1); - _read_miss_cache_time = - ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadMissCacheTime", random_profile, 1); - - _read_to_cache_time = - ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadToCacheTime", random_profile, 1); - _cache_refresh_count = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "CacheRefreshCount", - TUnit::UNIT, random_profile, 1); - _read_to_cache_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "ReadToCacheBytes", - TUnit::BYTES, random_profile, 1); - } - } - - void read(void* buf, uint64_t length, uint64_t offset) override; - - void read_range_to_cache() { - _current_ranges++; - uint64_t offset = _prefetch_ranges[_current_ranges].start_offset; - uint64_t length = _prefetch_ranges[_current_ranges].end_offset - - _prefetch_ranges[_current_ranges].start_offset; - _orc_cache_statistics.cache_refresh_count++; - _orc_cache_statistics.read_to_cache_bytes += length; - SCOPED_RAW_TIMER(&_orc_cache_statistics.read_to_cache_time); - read_impl(_buf.get(), length, offset); - } - - void beforeReadStripe(std::unique_ptr current_strip_information, - std::vector selected_columns) override; - - ~ORCCacheFileInputStream() override { - if (_profile != nullptr) { - COUNTER_UPDATE(_request_io, _orc_cache_statistics.request_io); - COUNTER_UPDATE(_request_bytes, _orc_cache_statistics.request_bytes); - COUNTER_UPDATE(_request_time, _orc_cache_statistics.request_time); - COUNTER_UPDATE(_miss_cache_io, _orc_cache_statistics.miss_cache_io); - COUNTER_UPDATE(_miss_cache_bytes, _orc_cache_statistics.miss_cache_bytes); - COUNTER_UPDATE(_read_miss_cache_time, _orc_cache_statistics.read_miss_cache_time); - COUNTER_UPDATE(_read_to_cache_time, _orc_cache_statistics.read_to_cache_time); - COUNTER_UPDATE(_cache_refresh_count, _orc_cache_statistics.cache_refresh_count); - COUNTER_UPDATE(_read_to_cache_bytes, _orc_cache_statistics.read_to_cache_bytes); - } - } - -private: - RuntimeProfile::Counter* _request_io = nullptr; - RuntimeProfile::Counter* _request_bytes = nullptr; - RuntimeProfile::Counter* _request_time = nullptr; - RuntimeProfile::Counter* _miss_cache_io = nullptr; - RuntimeProfile::Counter* _miss_cache_bytes = nullptr; - RuntimeProfile::Counter* _read_miss_cache_time = nullptr; - RuntimeProfile::Counter* _read_to_cache_time = nullptr; - RuntimeProfile::Counter* _cache_refresh_count = nullptr; - RuntimeProfile::Counter* _read_to_cache_bytes = nullptr; - ORCCacheStatistics _orc_cache_statistics; - - std::vector _prefetch_ranges; - int64_t _current_ranges = -1; - std::unique_ptr _buf; -}; - } // namespace doris::vectorized From 5c1cafa2c505cb19a3a73107afcbb4d819293eb9 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 23:19:50 +0800 Subject: [PATCH 06/17] fix init --- be/src/io/fs/buffered_reader.h | 2 +- be/src/vec/exec/format/orc/vorc_reader.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 7ba9777c9d1f3e..45ed2d416265f3 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -156,7 +156,7 @@ class RangeCacheFileReader : public io::FileReader { std::shared_ptr _range_finder; std::unique_ptr _cache; - size_t _current_start_offset; + int64_t _current_start_offset = -1; size_t _size; bool _closed = false; diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 13b5a0ed26c67f..0807f4949e5850 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -670,7 +670,7 @@ class ORCFileInputStream : public orc::InputStream, public ProfileCollector { void _collect_profile_at_runtime() override {}; void _collect_profile_before_close() override; -public: +private: const std::string& _file_name; io::FileReaderSPtr _inner_reader; io::FileReaderSPtr _file_reader; From 1cdf7aec821db5a56783f7a3b6ea244b956bc52b Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Fri, 18 Oct 2024 23:32:38 +0800 Subject: [PATCH 07/17] build --- be/src/apache-orc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/apache-orc b/be/src/apache-orc index 903ea6ccdc463b..db01184f765c03 160000 --- a/be/src/apache-orc +++ b/be/src/apache-orc @@ -1 +1 @@ -Subproject commit 903ea6ccdc463b8a17af2604975107ba7d895380 +Subproject commit db01184f765c03496e4107bd3ac37c077ac4bc5f From ece971b10e799d1178811cf62ff35f047d682e4e Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Sat, 19 Oct 2024 02:21:17 +0800 Subject: [PATCH 08/17] add fe. --- be/src/io/fs/buffered_reader.h | 2 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 14 ++-- .../org/apache/doris/qe/SessionVariable.java | 77 +++++++++++++++++++ gensrc/thrift/PaloInternalService.thrift | 3 + 4 files changed, 90 insertions(+), 6 deletions(-) diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 45ed2d416265f3..fa3be668971d1a 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -61,7 +61,7 @@ struct PrefetchRange { return {start_offset, other.end_offset}; } - //Range needs to be sorted. + //Ranges needs to be sorted. static std::vector mergeAdjacentSeqRanges( const std::vector& seq_ranges, int64_t max_merge_distance_bytes, int64_t max_read_size_bytes) { diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index a2cc90a06c6c92..c9c9edb2da2772 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -868,10 +868,15 @@ Status OrcReader::set_fill_columns( int64_t range_end_offset = _range_start_offset + _range_size; - // 三个参数 todo - int orc_tiny_stripe_threshold = 8L * 1024L * 1024L; - int orc_once_max_read_size = 8L * 1024L * 1024L; - int orc_max_merge_distance = 1L * 1024L * 1024L; + int64_t orc_tiny_stripe_threshold = 8L * 1024L * 1024L; + int64_t orc_once_max_read_size = 8L * 1024L * 1024L; + int64_t orc_max_merge_distance = 1L * 1024L * 1024L; + + if (_state != nullptr) { + orc_tiny_stripe_threshold = _state->query_options().orc_tiny_stripe_threshold; + orc_once_max_read_size = _state->query_options().orc_once_max_read_size; + orc_max_merge_distance = _state->query_options().orc_max_merge_distance; + } bool all_tiny_stripes = true; std::vector tiny_stripe_ranges; @@ -896,7 +901,6 @@ Status OrcReader::set_fill_columns( std::vector prefetch_merge_ranges = io::PrefetchRange::mergeAdjacentSeqRanges( tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); - auto range_finder = std::make_shared(std::move(prefetch_merge_ranges)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 88919c290d8407..55687a30fa4f6a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -470,6 +470,12 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_ORC_LAZY_MAT = "enable_orc_lazy_materialization"; + public static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold"; + + public static final String ORC_ONCE_MAX_READ_SIZE = "orc_once_max_read_size"; + + public static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance"; + public static final String ENABLE_PARQUET_FILTER_BY_MIN_MAX = "enable_parquet_filter_by_min_max"; public static final String ENABLE_ORC_FILTER_BY_MIN_MAX = "enable_orc_filter_by_min_max"; @@ -1719,6 +1725,46 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { public boolean enableOrcLazyMat = true; + @VariableMgr.VarAttr( + name = ORC_TINY_STRIPE_THRESHOLD, + description = {"在orc文件中如果一个stripe的字节大小小于`orc_tiny_stripe_threshold`," + + "我们认为该stripe为 tiny stripe。对于多个连续的tiny stripe我们会进行读取优化,即一次性读多个tiny stripe." + + "如果你不想使用该优化,可以将该值设置为0。默认为 8M。", + "In an orc file, if the byte size of a stripe is less than `orc_tiny_stripe_threshold`," + + "we consider the stripe to be a tiny stripe. For multiple consecutive tiny stripes," + + "we will perform read optimization, that is, read multiple tiny stripes at a time." + + "If you do not want to use this optimization, you can set this value to 0." + + "The default is 8M."}, + needForward = true, + setter = "setOrcTinyStripeThreshold") + public long orcTinyStripeThreshold = 8L * 1024L * 1024L; + + + @VariableMgr.VarAttr( + name = ORC_ONCE_MAX_READ_SIZE, + description = {"在使用tiny stripe读取优化的时候,会对多个tiny stripe合并成一次IO," + + "该参数用来控制每次IO请求的最大字节大小。你不应该将值设置的小于`orc_tiny_stripe_threshold`。默认为 8M。", + "When using tiny stripe read optimization, multiple tiny stripes will be merged into one IO." + + "This parameter is used to control the maximum byte size of each IO request." + + "You should not set the value less than `orc_tiny_stripe_threshold`." + + "The default is 8M."}, + needForward = true, + setter = "setOrcOnceMaxReadSize") + public long orcOnceMaxReadSize = 8L * 1024L * 1024L; + + + @VariableMgr.VarAttr( + name = ORC_MAX_MERGE_DISTANCE, + description = {"在使用tiny stripe读取优化的时候,由于tiny stripe并不一定连续。" + + "当两个tiny stripe之间距离大于该参数时,我们不会将其合并成一次IO。默认为 1M。", + "When using tiny stripe read optimization, since tiny stripes are not necessarily continuous," + + "when the distance between two tiny stripes is greater than this parameter," + + "we will not merge them into one IO. The default value is 1M."}, + needForward = true, + setter = "setOrcMaxMergeDistance") + public long orcMaxMergeDistance = 1024L * 1024L; + + @VariableMgr.VarAttr( name = ENABLE_PARQUET_FILTER_BY_MIN_MAX, description = {"控制 parquet reader 是否启用 min-max 值过滤。默认为 true。", @@ -2799,6 +2845,32 @@ public void setFragmentInstanceNum(String value) throws Exception { this.parallelExecInstanceNum = val; } + public void setOrcTinyStripeThreshold(String value) throws Exception { + long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD, 0, value); + this.orcTinyStripeThreshold = val; + } + + public void setOrcOnceMaxReadSize(String value) throws Exception { + long val = checkFieldLongValue(ORC_ONCE_MAX_READ_SIZE, 0, value); + this.orcOnceMaxReadSize = val; + } + + public void setOrcMaxMergeDistance(String value) throws Exception { + long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE, 0, value); + this.orcMaxMergeDistance = val; + } + + private long checkFieldLongValue(String variableName, long minValue, String value) throws Exception { + long val = Long.parseLong(value); + if (val < minValue) { + throw new Exception( + variableName + " value should greater than or equal " + String.valueOf(minValue) + + ", you set value is: " + value); + } + return val; + } + + private int checkFieldValue(String variableName, int minValue, String value) throws Exception { int val = Integer.valueOf(value); if (val < minValue) { @@ -3867,6 +3939,11 @@ public TQueryOptions toThrift() { tResult.setInListValueCountThreshold(inListValueCountThreshold); tResult.setEnablePhraseQuerySequentialOpt(enablePhraseQuerySequentialOpt); tResult.setEnableAutoCreateWhenOverwrite(enableAutoCreateWhenOverwrite); + + tResult.setOrcTinyStripeThreshold(orcTinyStripeThreshold); + tResult.setOrcMaxMergeDistance(orcMaxMergeDistance); + tResult.setOrcOnceMaxReadSize(orcOnceMaxReadSize); + return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index f531db3028224a..485d4c930d4a22 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -350,6 +350,9 @@ struct TQueryOptions { 135: optional bool enable_parallel_outfile = false; 136: optional bool enable_phrase_query_sequential_opt = true; + 137: optional i64 orc_tiny_stripe_threshold = 8388608; + 138: optional i64 orc_once_max_read_size = 8388608; + 139: optional i64 orc_max_merge_distance = 1048576; 137: optional bool enable_auto_create_when_overwrite = false; // For cloud, to control if the content would be written into file cache From d02131430ed02137f0bf027f17f5c31843256f81 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Mon, 21 Oct 2024 21:36:44 +0800 Subject: [PATCH 09/17] fix comment and add case Co-authored-by: kaka11chen --- be/src/io/fs/buffered_reader.cpp | 9 +- be/src/io/fs/buffered_reader.h | 36 +- be/src/vec/exec/format/orc/vorc_reader.cpp | 112 +- be/test/io/fs/buffered_reader_test.cpp | 108 + .../orc/orc_tiny_stripes/output_60_3.orc | Bin 0 -> 19688 bytes .../orc/orc_tiny_stripes/output_6_1.orc | Bin 0 -> 1902 bytes .../orc_tiny_stripes/random_output_60_3.orc | Bin 0 -> 24248 bytes .../orc_tiny_stripes/random_output_6_1.orc | Bin 0 -> 2293 bytes .../org/apache/doris/qe/SessionVariable.java | 48 +- gensrc/thrift/PaloInternalService.thrift | 6 +- .../hive/test_orc_tiny_stripes.out | 2311 +++++++++++++++++ .../hive/test_orc_tiny_stripes.groovy | 203 ++ 12 files changed, 2742 insertions(+), 91 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_60_3.orc create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_6_1.orc create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/random_output_60_3.orc create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/random_output_6_1.orc create mode 100644 regression-test/data/external_table_p0/hive/test_orc_tiny_stripes.out create mode 100644 regression-test/suites/external_table_p0/hive/test_orc_tiny_stripes.groovy diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index eb992882986d3a..2d0ef7edcaba93 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -870,12 +870,12 @@ Result DelegateReader::create_file_reader( }); } -Status LinearProbeRangeFinder::get_range_for(int64_t desiredOffset, +Status LinearProbeRangeFinder::get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) { while (index < _ranges.size()) { io::PrefetchRange& range = _ranges[index]; - if (range.end_offset > desiredOffset) { - if (range.start_offset > desiredOffset) [[unlikely]] { + if (range.end_offset > desired_offset) { + if (range.start_offset > desired_offset) [[unlikely]] { return Status::InvalidArgument("Invalid desiredOffset"); } result_range = range; @@ -944,7 +944,7 @@ Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* b } int64_t buffer_offset = offset - _current_start_offset; - memcpy(result.data, _cache.get() + buffer_offset, request_size); //todo inline. + memcpy(result.data, _cache.get() + buffer_offset, request_size); *bytes_read = request_size; return Status::OK(); @@ -953,6 +953,7 @@ Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* b offset); // RETURN_IF_ERROR(_inner_reader->read_at(offset, result , bytes_read, io_ctx)); // return Status::OK(); + // think return error is ok,otherwise it will cover up the error. } } diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index fa3be668971d1a..06fbd4bc83f397 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -54,6 +54,12 @@ struct PrefetchRange { PrefetchRange() : start_offset(0), end_offset(0) {} + bool operator==(const PrefetchRange& other) const { + return (start_offset == other.start_offset) && (end_offset == other.end_offset); + } + + bool operator!=(const PrefetchRange& other) const { return !(*this == other); } + PrefetchRange span(const PrefetchRange& other) const { return {std::min(start_offset, other.end_offset), std::max(start_offset, other.end_offset)}; } @@ -62,9 +68,9 @@ struct PrefetchRange { } //Ranges needs to be sorted. - static std::vector mergeAdjacentSeqRanges( + static std::vector merge_adjacent_seq_ranges( const std::vector& seq_ranges, int64_t max_merge_distance_bytes, - int64_t max_read_size_bytes) { + int64_t once_max_read_bytes) { if (seq_ranges.empty()) { return {}; } @@ -74,7 +80,7 @@ struct PrefetchRange { for (size_t i = 1; i < seq_ranges.size(); ++i) { PrefetchRange current = seq_ranges[i]; PrefetchRange merged = last.seq_span(current); - if (merged.end_offset <= max_read_size_bytes + merged.start_offset && + if (merged.end_offset <= once_max_read_bytes + merged.start_offset && last.end_offset + max_merge_distance_bytes >= current.start_offset) { last = merged; } else { @@ -90,7 +96,7 @@ struct PrefetchRange { class RangeFinder { public: virtual ~RangeFinder() = default; - virtual Status get_range_for(int64_t desiredOffset, io::PrefetchRange& result_range) = 0; + virtual Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) = 0; virtual size_t get_max_range_size() const = 0; }; @@ -98,7 +104,7 @@ class LinearProbeRangeFinder : public RangeFinder { public: LinearProbeRangeFinder(std::vector&& ranges) : _ranges(std::move(ranges)) {} - Status get_range_for(int64_t desiredOffset, io::PrefetchRange& result_range) override; + Status get_range_for(int64_t desired_offset, io::PrefetchRange& result_range) override; size_t get_max_range_size() const override { size_t max_range_size = 0; @@ -115,6 +121,13 @@ class LinearProbeRangeFinder : public RangeFinder { size_t index {0}; }; +/** + * The reader provides a solution to read one range at a time. You can customize RangeFinder to meet your scenario. + * For me, since there will be tiny stripes when reading orc files, in order to reduce the requests to hdfs, + * I first merge the access to the orc files to be read (of course there is a problem of read amplification, + * but in my scenario, compared with reading hdfs multiple times, it is faster to read more data on hdfs at one time), + * and then because the actual reading of orc files is in order from front to back, I provide LinearProbeRangeFinder. + */ class RangeCacheFileReader : public io::FileReader { struct RangeCacheReaderStatistics { int64_t request_io = 0; @@ -168,6 +181,19 @@ class RangeCacheFileReader : public io::FileReader { RuntimeProfile::Counter* _cache_refresh_count = nullptr; RuntimeProfile::Counter* _read_to_cache_bytes = nullptr; RangeCacheReaderStatistics _cache_statistics; + /** + * `RangeCacheFileReader`: + * 1. `CacheRefreshCount`: how many IOs are merged + * 2. `ReadToCacheBytes`: how much data is actually read after merging + * 3. `ReadToCacheTime`: how long it takes to read data after merging + * 4. `RequestBytes`: how many bytes does the apache-orc library actually need to read the orc file + * 5. `RequestIO`: how many times the apache-orc library calls this read interface + * 6. `RequestTime`: how long it takes the apache-orc library to call this read interface + * + * It should be noted that `RangeCacheFileReader` is a wrapper of the reader that actually reads data,such as + * the hdfs reader, so strictly speaking, `CacheRefreshCount` is not equal to how many IOs are initiated to hdfs, + * because each time the hdfs reader is requested, the hdfs reader may not be able to read all the data at once. + */ }; /** diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index c9c9edb2da2772..1c309470bed4da 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -857,74 +857,76 @@ Status OrcReader::set_fill_columns( if (_colname_to_value_range == nullptr || !_init_search_argument(_colname_to_value_range)) { _lazy_read_ctx.can_lazy_read = false; } + try { + _row_reader_options.range(_range_start_offset, _range_size); + _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); + _row_reader_options.include(_read_cols); + _row_reader_options.setEnableLazyDecoding(true); - _row_reader_options.range(_range_start_offset, _range_size); - _row_reader_options.setTimezoneName(_ctz == "CST" ? "Asia/Shanghai" : _ctz); - _row_reader_options.include(_read_cols); - _row_reader_options.setEnableLazyDecoding(true); + uint64_t number_of_stripes = _reader->getNumberOfStripes(); + auto all_stripes_needed = _reader->getNeedReadStripes(_row_reader_options); - uint64_t number_of_stripes = _reader->getNumberOfStripes(); - auto allStripesNeeded = _reader->getNeedReadStripes(_row_reader_options); + int64_t range_end_offset = _range_start_offset + _range_size; - int64_t range_end_offset = _range_start_offset + _range_size; + // If you set "orc_tiny_stripe_threshold_bytes" = 0, the use tiny stripes merge io optimization will not be used. + int64_t orc_tiny_stripe_threshold_bytes = 8L * 1024L * 1024L; + int64_t orc_once_max_read_bytes = 8L * 1024L * 1024L; + int64_t orc_max_merge_distance_bytes = 1L * 1024L * 1024L; - int64_t orc_tiny_stripe_threshold = 8L * 1024L * 1024L; - int64_t orc_once_max_read_size = 8L * 1024L * 1024L; - int64_t orc_max_merge_distance = 1L * 1024L * 1024L; + if (_state != nullptr) { + orc_tiny_stripe_threshold_bytes = + _state->query_options().orc_tiny_stripe_threshold_bytes; + orc_once_max_read_bytes = _state->query_options().orc_once_max_read_bytes; + orc_max_merge_distance_bytes = _state->query_options().orc_max_merge_distance_bytes; + } - if (_state != nullptr) { - orc_tiny_stripe_threshold = _state->query_options().orc_tiny_stripe_threshold; - orc_once_max_read_size = _state->query_options().orc_once_max_read_size; - orc_max_merge_distance = _state->query_options().orc_max_merge_distance; - } + bool all_tiny_stripes = true; + std::vector tiny_stripe_ranges; - bool all_tiny_stripes = true; - std::vector tiny_stripe_ranges; + for (uint64_t i = 0; i < number_of_stripes; i++) { + std::unique_ptr strip_info = _reader->getStripe(i); + uint64_t strip_start_offset = strip_info->getOffset(); + uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); - for (uint64_t i = 0; i < number_of_stripes; i++) { - std::unique_ptr strip_info = _reader->getStripe(i); - uint64_t strip_start_offset = strip_info->getOffset(); - uint64_t strip_end_offset = strip_start_offset + strip_info->getLength(); + if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset || + !all_stripes_needed[i]) { + continue; + } + if (strip_info->getLength() > orc_tiny_stripe_threshold_bytes) { + all_tiny_stripes = false; + break; + } - if (strip_start_offset >= range_end_offset || strip_end_offset < _range_start_offset || - !allStripesNeeded[i]) { - continue; + tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset); } - if (strip_info->getLength() > orc_tiny_stripe_threshold) { - all_tiny_stripes = false; - break; - } - - tiny_stripe_ranges.emplace_back(strip_start_offset, strip_end_offset); - } - if (all_tiny_stripes && number_of_stripes > 0) { - std::vector prefetch_merge_ranges = - io::PrefetchRange::mergeAdjacentSeqRanges( - tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); - auto range_finder = - std::make_shared(std::move(prefetch_merge_ranges)); - - auto* orcInputStreamPtr = static_cast(_reader->getStream()); - orcInputStreamPtr->set_all_tiny_stripes(); - auto& orc_file_reader = orcInputStreamPtr->get_file_reader(); - orc_file_reader->collect_profile_before_close(); - auto orc_inner_reader = orcInputStreamPtr->get_inner_reader(); - orc_file_reader = std::make_shared(_profile, orc_inner_reader, - range_finder); - _lazy_read_ctx.can_lazy_read = false; - } + if (all_tiny_stripes && number_of_stripes > 0) { + std::vector prefetch_merge_ranges = + io::PrefetchRange::merge_adjacent_seq_ranges(tiny_stripe_ranges, + orc_max_merge_distance_bytes, + orc_once_max_read_bytes); + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); - if (!_lazy_read_ctx.can_lazy_read) { - for (auto& kv : _lazy_read_ctx.predicate_partition_columns) { - _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); + auto* orc_input_stream_ptr = static_cast(_reader->getStream()); + orc_input_stream_ptr->set_all_tiny_stripes(); + auto& orc_file_reader = orc_input_stream_ptr->get_file_reader(); + orc_file_reader->collect_profile_before_close(); + auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader(); + orc_file_reader = std::make_shared(_profile, orc_inner_reader, + range_finder); + _lazy_read_ctx.can_lazy_read = false; } - for (auto& kv : _lazy_read_ctx.predicate_missing_columns) { - _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); + + if (!_lazy_read_ctx.can_lazy_read) { + for (auto& kv : _lazy_read_ctx.predicate_partition_columns) { + _lazy_read_ctx.partition_columns.emplace(kv.first, kv.second); + } + for (auto& kv : _lazy_read_ctx.predicate_missing_columns) { + _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second); + } } - } - _fill_all_columns = true; - try { + _fill_all_columns = true; // create orc row reader if (_lazy_read_ctx.can_lazy_read) { _row_reader_options.filter(_lazy_read_ctx.predicate_orc_columns); diff --git a/be/test/io/fs/buffered_reader_test.cpp b/be/test/io/fs/buffered_reader_test.cpp index d0a504162d3bb7..658c98ba51465b 100644 --- a/be/test/io/fs/buffered_reader_test.cpp +++ b/be/test/io/fs/buffered_reader_test.cpp @@ -114,6 +114,34 @@ class MockOffsetFileReader : public io::FileReader { io::Path _path = "/tmp/mock"; }; +class TestingRangeCacheFileReader : public io::FileReader { +public: + TestingRangeCacheFileReader(std::shared_ptr delegate) : _delegate(delegate) {}; + + ~TestingRangeCacheFileReader() override = default; + + Status close() override { return _delegate->close(); } + + const io::Path& path() const override { return _delegate->path(); } + + size_t size() const override { return _delegate->size(); } + + bool closed() const override { return _delegate->closed(); } + + const io::PrefetchRange& last_read_range() const { return *_last_read_range; } + +protected: + Status read_at_impl(size_t offset, Slice result, size_t* bytes_read, + const io::IOContext* io_ctx) override { + _last_read_range = std::make_unique(offset, offset + result.size); + return _delegate->read_at_impl(offset, result, bytes_read, io_ctx); + } + +private: + std::shared_ptr _delegate; + std::unique_ptr _last_read_range; +}; + TEST_F(BufferedReaderTest, normal_use) { // buffered_reader_test_file 950 bytes io::FileReaderSPtr local_reader; @@ -398,4 +426,84 @@ TEST_F(BufferedReaderTest, test_merged_io) { } } +TEST_F(BufferedReaderTest, test_range_cache_file_reader) { + io::FileReaderSPtr offset_reader = std::make_shared(128 * 1024 * 1024); + auto testing_reader = std::make_shared(offset_reader); + + int64_t orc_max_merge_distance = 1L * 1024L * 1024L; + int64_t orc_once_max_read_size = 8L * 1024L * 1024L; + + { + std::vector tiny_stripe_ranges = { + io::PrefetchRange(3, 33), + io::PrefetchRange(33, 63), + io::PrefetchRange(63, 8L * 1024L * 1024L + 63), + }; + std::vector prefetch_merge_ranges = + io::PrefetchRange::merge_adjacent_seq_ranges( + tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); + io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder); + char data[1]; + Slice result(data, 1); + size_t bytes_read; + EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok()); + EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range()); + + EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok()); + EXPECT_EQ(io::PrefetchRange(63, 8 * 1024L * 1024L + 63), testing_reader->last_read_range()); + EXPECT_TRUE(range_cache_file_reader.close().ok()); + } + + { + std::vector tiny_stripe_ranges = { + io::PrefetchRange(3, 33), + io::PrefetchRange(33, 63), + io::PrefetchRange(63, 8L * 1024L * 1024L + 63), + }; + std::vector prefetch_merge_ranges = + io::PrefetchRange::merge_adjacent_seq_ranges( + tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); + io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder); + char data[1]; + Slice result(data, 1); + size_t bytes_read; + EXPECT_TRUE(range_cache_file_reader.read_at(62, result, &bytes_read, nullptr).ok()); + EXPECT_EQ(io::PrefetchRange(3, 63), testing_reader->last_read_range()); + + EXPECT_TRUE(range_cache_file_reader.read_at(63, result, &bytes_read, nullptr).ok()); + EXPECT_EQ(io::PrefetchRange(63, 8L * 1024L * 1024L + 63), + testing_reader->last_read_range()); + EXPECT_TRUE(range_cache_file_reader.close().ok()); + } + + { + std::vector tiny_stripe_ranges = { + io::PrefetchRange(3, 3), + io::PrefetchRange(4, 1048576L * 5L + 4), + io::PrefetchRange(1048576L * 5L + 4, 1048576L * 3L + 1048576L * 5L + 4), + }; + std::vector prefetch_merge_ranges = + io::PrefetchRange::merge_adjacent_seq_ranges( + tiny_stripe_ranges, orc_max_merge_distance, orc_once_max_read_size); + auto range_finder = + std::make_shared(std::move(prefetch_merge_ranges)); + io::RangeCacheFileReader range_cache_file_reader(nullptr, testing_reader, range_finder); + char data[1]; + Slice result(data, 1); + size_t bytes_read; + EXPECT_TRUE(range_cache_file_reader.read_at(3, result, &bytes_read, nullptr).ok()); + EXPECT_EQ(io::PrefetchRange(3, 1 + 1048576 * 5 + 3), testing_reader->last_read_range()); + + EXPECT_TRUE(range_cache_file_reader.read_at(4 + 1048576 * 5, result, &bytes_read, nullptr) + .ok()); + EXPECT_EQ(io::PrefetchRange(4 + 1048576 * 5, 3 * 1048576 + 4 + 1048576 * 5), + testing_reader->last_read_range()); + EXPECT_TRUE(range_cache_file_reader.close().ok()); + } +} + } // end namespace doris diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_60_3.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_60_3.orc new file mode 100644 index 0000000000000000000000000000000000000000..34c95840549584e239278f124840878cdc5df341 GIT binary patch literal 19688 zcmchfacouBeaGLqHshpCgbA_gYlgl+V=Ifdo@;P2F z8p5yrcA+j8esNa5u9X+2{q@^hzg=v?7f;LyfBR*8%S+)~p3isp{bFo6-v&`^AN=6- zFWAalhvO{H(c9p?E(Gsgr+(nYvpER(tNKDizJcdN9$%T5iLZ`$8DHzfE8#2SOu(OW zxFa0Q9q|QxtrO3M-+zwh!sx_fbKO^8TW!T_-~}P9cqPIsC-Al--VVgu7vXK`C%h7n z`4Gy4*8q5V2!-(5%owkcb1~nF7Z6@GfwvR!`VjA^#Orh#XK>;R0IdR{N_>rgHUk3x z75S!xf9`Yo>uch(#G;UI4prxiVRb%NXn^m|mE~Cztwwq7M!>xYcrpTPnmMb(SpOdM3ka=c8=cRAs$w%>l35^zGeusQy}31YzQILgQy_YgBX5WpA~WrZxvm3 z$g+g2Bp?qW<^V%e26(O-`y&^%*1-^L@+OmOs9{>6Ot?-}!>G}D*U2F^g$q##iS+AZeDN6~; zqeyuiDYyNInntHHe?dD@dekwnYP}Ls&IiB+5ZVpO7ch6QPCv(b)hUbI7kg7EeMuQ4 zCp7%BHglts^%l-M{tEL$%i$|3-I5d7>Z1HMoUq)=8QWi3HDf|Mtaav-7%stYA0 zwhs^M)~gX^0Dx5p3$v$O#21SbcFIzbC`(>~@+4A@A?1mPa?z`ciLz9L=dy%#Tas31 z5dgjlVR819OL&JkpF&xZl;s5FX{0=Zl; z*Q_NPuayci6beL()*N?-sc8V-a^VF5oOoLO_CCOh>vw!mfoie05?L|ls)BU z{)QN}Q?z;mZ;5j$l+yc@ zliueZQa(V+a}nj6-+fC`(uZcO+p_dNZvxXWY z39xPh>3!Y>!0$nLFB{64_?cuz_?aZ1f1md-(G6!?$qyxgG(SPo{5(UF7A}vrKAnt~+CwtDljPGaSYzn6|L6xKl+KZg~k@IZCxp&`w$%)ORg4J7*CTK4p z?t`#Dd(H!l_cL)Vg;Uy~YSIQBK+b;TycTgDIM^>av7J<~daKd~9RS3G5c;#{JjD1B zChpletI`NnlSb$eavnj>dlBcM!$*j-iVdZT)mxQD=nxF z(h3bA=P~4b7I6+7JtjG^qg1hatI`S$0OC;y$Fk=<&iD`$ef8G^o=K?p;Bj+e`9*Q`JM@EUW zh8?Ab)mxKxXc!PjAdF_ud4lm%Obn-RN96LQOIkBVEuzG9K4~+rhX$a%lbDm-R z91~Z8vxN@b!%1xG^M&a+8Yrs5t%){#f?>K#quMhFdJaLaMxbZTp6hV_>@9e64V#Kv z#0qwc(iEKm&a)8CrNGi~B{?Zl<1iTQ{vXC+y2(Ay_(dk}0_-aU+Z@8qRIpx?r1o+0 zsSl>lBke_`y&KVg|1!12?u$L}hQLxxv+#=SpTa@_CbNgV&iG9x`cq(CSHYGNu-6gx zCc^edu-9+gl&~%~8rP+2cBN6e4xBe2+{_;K7UOrA7)pcn6>N}zy@jxM5Oyeny>U`sbW68@^EQM#*~8vt{5})oX|NRqTT8&+McDfYJ08K_y?0;1V#{$|s%BSO zrn|s-55oQIVIMI5kclfPuwGHZdR_wd0m42+*een2gHIkxSdV%RO@3WZ`lbiK`3Z!F z*~30!{0S3x(qKypww!={gs@K#_D%%*=tV*a#IsC0F^_4&8P-#wNV|Cn%41itmi!r z-{*+aX}ro!`AIjm2YL4)@98PtnQRslikDhYiOSk1Lp3oIP-j69+4Jsq_(4ZpPT}>H zq4JZ4YCrNGMBd9&yv=MjnDcDc75_x6U~4+8^WCIdHWrH*b%qWc$KH}lb)&{ zc@HD+?J3?C*2?B8UTQ=>m9DTOrKNp7)5uk2>OU8n3cde$rMQLEfXt`*@0X z9&2Os6)!a+pUT=NXEiYoP}?BP&z|>~!v`I)W4gT`m6fw9C!N(XhP0}IG)C<{8c&WuSSvgB=R1g;;piUY?0!nMpUMLj z2#d1kJ>~Gzju=bhRsJeS`m0mOdm4Girg&dvi`f#zOO1#I&u$?7)vJKI7{Ze5dB+`o z))ALdcmw6Hf~3D1N8Yo@dufVyDeGX%ByT|eDxk6sq`z7Us2vcNWzT!g;TIfnD~(tA zt03vG&LQswiu6}20d*CG)!Fl2ariYy^ycmTsG|H;CF!rOAn!Hg?H!H! z(Hgdvt&_YJ@>evOb}Q0ftpU`v5Y}bSJL&Knj@X~ZtNc|Z>8~b{_XhIrpWPWnGc}YCWKCfY6ma?@fo_cEqtXUgfVUNq=<{d2b`{u_@k-Y!ln8c&QOpsH`i} zUu^``O%OK!dU?NEpqp*+J8fx~+0w7E72-nb_LwFVyEaBbwhLp1kh-U(3B@jGk&x|b zR$*BTOT`GQc zhNNtlW(vzCpm9${(=sloke2OgilG(V2GOTmCh)qJ3LkE52w;TDBEiLo3>9)wGOtR?@O9vKGE3|D7;*M|H9I z(kO}9)9wmGM(#aHG?&9*FOsA>CYnwqhgMryW|G(%0>0@Kyv>tCd1Tl_NA zv|TG*Exu$$YPR(%LrvRg($tJqCQ`F4F&S#w-jJ>qUl<}a+p3VErfuiwYVoxkQnM}O z6kdxxS=tUv_@1=o7m~BBy%aiQa@v;3|F&L2dbY(9Lr+`ei0P@1#YOs9OnSE64MR`c zm5Ax7kDYQ!g7j?b5r&?&j-czAdkCaw+dwe%w26OAPkn51>0=Y=*@pavo;FmE>8Xz; zE`2N^J=+}J(9`DDx}G_(COzAz+R)QR!!bQ|Z0XUlCF$9w!G@kTeU0g1Iq%9lOb~Ch6G*nueY>J=FEgu_5W%28M>7Hl2&bg)>iLjk8`854Yz#eZIug@U&xag~ke+QI zV(4kp2VKt`JCL4j;9%%!r~5HI^?b-3K{~Jsu`lV_4txzg?Q}J!r=AaatV(*e164y$JH3qQspmr;Taupb zz|zpuP6u^8^H`AdYzKmdo_6{c(^JofJa!{J+ku;*r=4!a^wjeqkF`k8cA%B7=Ysrsa(2{?AQA_??Xvx1dYstUP){-C1(UN~?(UMPJ(UM<& zQA;L%hj&Ha_28HIpGL{P-_?@tFKfx21zK{kT}wubTGH=nNw2RZ56W6{wW1|sH7yzV zeZEn&%m49z{|Ed#;`e0I^M_jUpNq9*a*38qEY*^c4lViNGA-#_t|gy-Eli5?zw_~H zTJrPPwdA8e){>unT}wXv6D|4R8(Q-HZ)(Y>>$K$K^;+`t4O;S1mzMl&qn3QQNlQN1 ztR>&yq9vdHxt4tV7bDde^~vDF*r7Tw(+^ehMtFYb+51b%1i$PDcX}B literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_6_1.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/output_6_1.orc new file mode 100644 index 0000000000000000000000000000000000000000..2735240826b24224d75583be5297c818eb449186 GIT binary patch literal 1902 zcmchYF^d#26vtmC*=&qkgi+3S3*IeTZFS7d?lyniulwebv0Hh-$QfS6h^e!KXV0JB&r%=I(()E>LE4g<>h+JWw;U_7maqqK0448w0qXsgZ(z02 zWNkPKyv#AdZH^Kr6onHgDJCxAC@2niUgD6BN=GA2+B%juZr_QLtZb4fN$yGV9?3D2 zup(;-*P*IOa!-=*iPuV^oI({PDLhHhBROFbR%9*dLiTEs!jq`=lW3<=M@iqJub;#?gCaPh$V?og7%-yV_mrymobNqq1$Ere_2`t!CkUB5k7M(^PEW(<)W( z6DiV;Pg5^}PpeS8PozXTK25d;KCLwMK9L0N_%yi^__UJ5`$W>iNcZ`|*iyKQ>Ezx_ zj4zt+A{e|0SAMBr<%13ee}~Orx@|_O6p0+GM^7H-tOe_Q0yD^9YK%Jj8lHXCbD7>% zhFa0?$Y_4{EN;Uh0!!WZ(baX&FqR3d)nG;j zLS|$Q!^*N^gHaHe#afmfV_Bil5-rRSV@$9ZD_BCtGA86PCNNeoc*Eb9UsWnItE#K3 zHUDIHR#v`O-!Fgfz3=CeBEiui+7%J z@MmW6;lh^s5uVn9R_&B$WtxH`;#Qhk{K;3hFTM&6R~+y^8@5KU246)AsJ}e&4(czT zM4wNPcY$*r&ZDmLaXz`4I_HxH>hA=(3E;H~Uj>eQ_HkyyX!i|-^Y|2g>Ijd#@Gl_* zIt1s#08RzARg_Xl^WkbSo6;EPV~RpKtTdL0*+{5TOxVk@WGN#okIW$M6@Du-xN51(qgk}~&vlT$I5#xynfva3C%xAS#@*oF>^Arp( zQ*3#iX3Od$(mCS^2cIX{BI@0$j2i-?fGL+QXUc(0tP=Klm1x{jIOLb%;fW#Ro*Oal zo#Tyr8|61};5RVAwO#nh4*BZ$(3s3<*cveGL)Z?jG-4H*B$~3hY_b_!samE}c4=B! zRlG*b6b!y#BCOC!g8pVZB~nR?F15y6$p~%a39f6pKwpL2{!=H(d-F?Wx#{}DM-Rr3oEv`z~GFS@&h9cQ35SVv{03#a#475xha84`m+lx8; z>61M2i~j?Wc@QD92OzT>n~r8P5~V{QQAXWT0x#8?@rubFsfNTpvqEYl9UN5Npa^w_ zLggxLbTX5TuR7)6Pfy`Yc^O}#E)$D&rL>1D7=dyI;vs~GXE~tsf|+OpDEEyA3e)`F zpT>`c$&4`{yZ)9pXD#^&G)HqG>;dYjIu(UMc803lp|*X0x~m8fFTxm$&rt11qp$tO5uI7ykyMGA`F z(B`6gZTDC9b70a;ObnM5>!@t0NZuT6*Rm~#u0CG?%5QOFk%ZhBhR!H%Jh*ahoPP-a z7I@j`Pe627i;bOTg+~-`becME#A&mp)#ixVC~`((Eqio9O5BlkSEJ&%NEsJ~^Q}xl zZLH-D3+cAPMn1!h=;tWhkP-dnuX+^PdEQpL^GgSR=ky&*s`4eu7De1DHhUxbp-K_a z_Cbz8-+blCAw(Os(9odyLVYr;_qZqZ2==z&ew7*e^wW$sF0Dm)0GHzx{^~8+Mj<*%e)+)+uN-8OM61Hej z-$uB=1;iECQnAGjEWjbeh-?Cod)5sL= znB6FJb{6E>s3D%oxLLw`q7fPt5Fv=h0S+45sdGNLkNVs1wtLI=z2l+5WX)Fz{8Wv+ z`5&M$sSq4u0LOl8##ONR7jk*4CG1etwUvfPOuV4$i1*zzxkzMXO40$y~10I*Nv*=nZJ{0Z$;Esd{prQfs(chEfW+D;UJ= zF>qMaae(X5=%Eq7jp>G%z-6GXBn(_@z>I@+Qr<&LM_vRXzqOa-DA z1c%8&w`30B}oQ2$a6md%hxFx{|b)vSQ5Ep}ny1(36 zP*q(fVuz&@$hEySt0t1t7t&?BDX&+h&H9Ri zPpsY1uR!O^HN0-e5?T;s1Vn2UJjgku>P;abX_oTg((xc;lIrYM{Og~PZ~qk})k6p| zDM0K1He-p3<<(>;rS@6HjgVBRa{4@hjx?sx66;$!VOpM$+7eZ_%HMB~Xbk*uTvOZr+xY^;*rnhE*SXki_~2OrX=Ko_JGk|IRFnhhb_G!-#cW;y(u>_9$T1^ zY>|hbQjssh)#i3IVz7C%8^b0fEiJ(dkwVTODSN>eDKB$t$7j{z?5VA9fWXC@Q0ZJ2 z5Uzy6WL;BB&S!(7lqdv*p~p+@0WQMF`wC~Qy*Py|WO$r{n5@v5e7y^Q@mU`EyCcw< zu>9?;v*huljPn(tsg-UzLIs`0UvHR`noLL*CMFZYYO`;0^Dz~|zoZ`bt;H5z(G&YV zaPTK*x5B%YlIwE?%-T?1s%z-HF-;T9+D)+#g6U((h73G#x4C)_WR%Yxb8^Go4%gcR%fm=R3v5vb`>9MD+DaGbYNVTr!bFl{RKe-jLNmtgEB~(xf7%RrERA4=J#I zPtoQK9nTMaci$Nk*eKkvZf8X)(@IJiox&;crkb>%B^4s9eh=C7Av|2;P^yR3s0co$ zvt^pk=}q|4Qu0h25K4;7=K)|o_hHiwesqGJfg+pDe12ZiM8JFl`9p=G5(k)`t)b{N z+}6h25@bf9?>68!)#T^Lp(|zW2+m~eX>(Xwu4jdIQ#c(p8`9EFN9M~)Dg?ik>f87F ziUQ>e3Y6z)=B#b(x;giagU{bS(@JIaqMX6sRI3{WN493yEQo9xQO-165yIyJ#E07V z%5m`N`Q1^wgSWBob#)7V^#n+D{Q&~RT7j-H7F#B3mDuD~xw7S;$ytbq^O8(X;w2A( z?=mxk&rw$OF@?=pI^tlD?OkJe%)wu~X^lCVFC`u7K1oOg1;dp9G@a)l7HCp(E8_O- zXViid<}g<1lE1$jKlL(z;S}^Wto^RFqDa=cAh!|gvO$Mi5{+9@q?9xMPbh9&q`b=8 z^ptng_l}td_8Iu6D)gOUZ(7;O7z*u5S1MuvTre%3hKGwB2K0tLYO@QjJ;xY2(?XuV z6F>eGdF&lX09JZtr%joZ#NwhzRGpS(WMQFbA>vT_MU7S!ESCr}WYO2%rHojUNkqjO zZz+^eR|1lFjeH4;V)ElUh0g_wA0N@SbNKkiuQopK;BVVWIyBB=z!Mj0^pc>^?NK{p zx`ZjPr2b*r5t*nBox>2(6bI3sO^wRW!0RhS^z)7Q5ij}fvk->8$Q$nl-gp+0_fww__rmb{7>JP6J_P~xDW(N){)~rz0{0AG zgOv-cwD1XWH+{$rB6la7*Vb5ZmkiTGpR#Nn%oQEvz!@ zLrsa%X7Z5@4w;^%%kY^&xP`I! z6Arn04h*I{ms)7TxzQNpVmh!-2>e9{x%ew+ZUzM4BLF}ZFgHb~tyVkj3juMh=CHOj z0|?95fG7H6uV zSzeS>O~r1R54E53G9(T+Guj(2EbkdaoZgyq^LWHzn%-Nx@k@WkBR_c#njUM~JW;RA z6LTH4Mq|_F%T2#|KIL?{o$6e+stmjGhWV5(*yoteQ6RiS@#G>Mr2+TRih(jDQ^vr71%y~@5#3aLwL-^4)`RboTa74(g_5m{7 zgRQrQE7rD1ER~6jMklc;oYuK?7LUIf*1IxgOCs2}L_J1X&?O3)59#^Qp`+5b<_Zq} z_Vt?_Qj;ugO}fi$@6NRtzxJWL@p~b)sk9WuA#Ix9EL_+)|V_r)nI6g`7PRZW_}{w zxBUk2&cteiDpL?P`z28-5K%sX(0CHi8yY$y^N?1k8TJthent=4+jLq>oA=B)8J zWlw0<_!ezXgb>SY0V^)v+y>jy3fSTiTc|1FafsGe9H4q@QS}J6JOHk*Fi@SI!{1Jj zr~d}BMUIeq5FoV|TOVmHvFnWb&8E<`J8OJmm{w0gFs z9D&xsm810~hJT$TUwRRubr7K?0cbsd&C0EkoFP-vs`Zr=UPp0?azsbyR4IO8bw>QPgTH!ewk)gUjSLA(sX`cCr>qVhxLagB zjF9ghKCgzRxk5%A%i%|VOuqeJppQO`Fj4@FMA%MCJgPEEl%;05ql{*4)>f(MkxP8_ zMyI1G5bN7zUBgpW1nl;_FGW(jBTUmnQ=StPPPdAD9@-}pJbVSAU^!5>w-^Tb?J(o$!uCYw`KYvjr$aV}j|v{f}t zt!hl!y?r~di-)-^T2NevZC!s$l*OkhOAS)3E}u@jB^q%^p0tEm(vfiqASl=10UN@kQB#B4@K0bt zaaBjY{AZ9!tV9P9aY$;$gs5dVizT6)#+$7X8%o|>$!L}Ng??MJ=^<_4CZDPl`SpyOI93I~0z|!+MqcR&X88(L7W^(1` z2K+BT0_eSe4t)-bD{Iv{y)|pD5O{gP=yOPY#RYxJW>9M-WP$_72NWDHQE*(P;n3fE zc>mfK*mT8rbVYFm6>Uy~#-eHj6RDDQKAo>L6rorwo+>oW$c#P!s9f0wp}EO{rssP` zF$Qjzi~$XEM*X@D{~=7C`yY@oO5}S)!1vsb%^J-?r^;4|)a(^=DHGKim7$>6D-0M5 zswA<_7Y!QI8EG~``Zze9pd8K(h!%OBwy7T-*?nOD4-Wpi+12!|OKC7RQVAo%2S)rw zDrVi_;M1cHob@cV;@CGHK1}L-FThU%>RfmpQs*uN&@~vlL^VrMKx1zosWAI|lyZF6PbQL@285IHMXFvX@Cn3TlVv2hN~-q_}&? zpnD7TZQ~7^Ns8m^@b|prx4(v@co>aY4#q6Rw%K&0P9vJ}S=}nJEZ+*1JVt*zNNj1S z6Sj)Y?~nxYMn@5pOVnKrvc!=SFy9i5*q z74kwwXt2IP+7N8_K(2fQ4>=BOJ+3@51h(Gl6wV%3XiMK?_zgSx?Jps0tewRfjlG}@ zCS#sp)+CkKMcQbk)d(2G60ohmqh$&OjbUx29gQ{}N^wV)aI}*;QNGqJlN}C%C{y?@ zh2ZNng7@B7-MQ;HsP&oI5jJEK9jm=HT;USxkhzs0dayS93f$RZc7pXFw2XidR%(m^ zA#-he2`t%^kAI;1!*j%7`u->;nglfso zQDA*QgSAOGtK6soQ$)ekFiAaKF|*R>bt?n^ka^Bf-FGMS)KrEj2M~Df(d7;lYU!#08N~+$VW7yuG-nY1s z5BBmWHV+a71exR3m?j*O7LvJ+&a4x*?NNkC3?f2Jo?&<%M3zypYfRtFbW>+`;m02$ zZ~h!2!eYy`#uInsESYFqm(LSha!Gwo)`?nlrn2)NSRYZ02)OZ;mWP)4FdmE zMc$}_q3%F#d<$^n8?Z@#EmS5Z)6%e=+{MA+O9}^MHP6sivrahs>1MFC{LG_4ZD!sR zh=Oj*upNq_hKYMjd_o&j}rQMk^bP!EMsOYqs-K;druwHo=^HxLC@12UmbS2VG#Top|Evs#y4Z%-3* zcCV&p0J*`My)Ln}bv|3Tr7~#jVu|0Kh=}^DC{I!7pegY+Iw5#){{xjxmjF7%oHg9g z6`K`%)GLoDA_}=z-LH0OrHDy4Abd0M@YoOxQA{7cjd2@QlwUlAfBY1AlxnwSrQvb4 zE|;`cN=36T9aG5*HeyY!nsp?+q?kh^6tbT|A)B_d!yoVb;vTRb$Jb17B_TN>b+_;K zC>Z(`ICUOc)eQsiAUD*T{=wPx3f<6~+wmKZlkZRgn#G~bHc2QQ5BseNZ^%`M$NiE{ zjF?L*lL}CD81|K803;JU4DC4{Ri-dzffO^QUA`0l>`C(SYkh8cT9(kp^9rfQS5!BM^|^pgC^eL$ zHGkT#Ax#|8AbUDSk>&#Jcs5?%wp;jwf&be#n6g-sd}fzDY$-}y?lb~}%J{+gZ(o2P z<(+#0{+{I#qm7~0Z47wP#vkPk8^hnNfi199vtCMZn{T5)N>S#K{T>SJPgzP?uX7mV zJq}30pf@;RR+a+ROA&^=4FL%o^d1DP*%j9Rrhz}kD+Z9_K`$4;qAbPCj{Naj@sWx_ zP59UoETCAu?aOa!NAd@?vtx1A5SVS^RYs zp~UK2Ubmr1~5cb=W<$ zSeS(us{wUcou^3jpq^6>p_o0Kh)~OK-b6|UwQXYl?(gqwfLuoBBN98P=MhV@jDgun zxRMq^qzFYjcJm;|aWFgYkmFcx%NO|^qK|)dnH6LLy7V4}XAM|9G z+uVb63~KAaGAtde1{p-t@j7FWra?V1G))mEjP{Z-`$~|CK^-O7<1E%N8#PdPTTX`t z5|1|W*}WMYN5|@OK#p!ig98#hsI`G6A(|7idIwO<8P+s_WDaT<;LK#0MfqqZb9>eJ zNb#ULd@RU%49wc~OVpWHByXH z^6cVitj5BOS%7@omB&acD!^ga9AkEtR%Ws8@G9NzAgEamyFQl=*9bjUF)T9jWmmx> zC4=f;>8TX**UXyIaf(Tif6Z%h>q;&!7wXy&@^Sar8!8&prRbkuV+=epkF_t%ms-aRN#W8SkHo4Eg}MDGG>_w z(le+)1aq+n#j3hks@wue98_q5+6jt)JKlbB@1fCZQ3vMr5CnRS9s@}chcPOrm34R@w{>3>h*4bsp(d3FQ7 zKY8Bb}#+g9eAmGX-e>) zb^>wU?v+2>iRVaT_vB~0@t@H5T)GGR9%onF7x&`N@JLVhd%xI=|CIjThxdcu@L>1K z-#&ncNqP6=FCWC4w4zT$_*1liXQYg={z{Hl>EZo5CHSR>x&kgA1fmtx$+Y0#AA)FQ zyO&;g1TT>0?#aJWxcmduNX&9W=im$36GJ&?p=Rl0XXzT{GAPe zM}y+Pi}X`E;>Q0mkF<7g`n4BtlcDa(-}%9q(4(Ce{2>Uxm+W4;5Wy?mr#UV7kB{M? z$hwE#(LV)4>=ELZQGkT5d()e7aK+U<`E3&aF=^_a{LeI=Bqh|zwBTA62k-fgbkF`p z0sp5w@=v;7yjr56m=gTo3K&OE#LpIhCP(*c=j!<5;KR?X_ltoW&TJYbV;Q=XXcy0Hcf@xxc$Df#) zjF$@{>L{j;l3lCztP-x;Ef7r7|E;C}>wYFY&pWtUa2{mv4sHT}z?+$4)JqRu`R{q~ z(GKtts3HYT59}h+=k5m=XW;iY3()2tdezLwi(mYc{|eq^^{Cjyi>($P5FQW-F0Tde cX+LsgayNg~^qwyd2-iIQ?ZkxBqrKz*1F*3}4*&oF literal 0 HcmV?d00001 diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/random_output_6_1.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc/orc_tiny_stripes/random_output_6_1.orc new file mode 100644 index 0000000000000000000000000000000000000000..7fce353b526ed55e0a0e0cbd27fe3097c3cb4856 GIT binary patch literal 2293 zcmbuB%}*0S6u@UcScajA;~?^ABCOQ_5@j!ZTI=jJ^?ox_}eQ)RO&YStYeednW;vu3j6-y=} zwM0gUoxu{T!Wn~R49FPVe2XZE+XGng8KgE44F|%};nZxTJGPb_?}{(SlS$@mhE_hk zL3jo3mEoONKfer7AwftJO(JNBBIo&=NPS=TO(h!EC;}|B1a(5o&?~e8I@c1FAore3 zWl+RehFYN&sN{HvA}-}Z=~cpaM;+-8()1t=2Wh_jZj0XF^sR&~Cub*;_g1#%`;-3e zRsWiQjyc%i@=3URpG1+vKaL0Qzm_P!fM0z3A&dJa=ME59 z7&GsTjE9HY{h^18kBxBOz+%_^z_l22eFC}EDf#V(_M^zvg>xjlA_6iDl!yeB7m57j z&n9R#0oep6xL}3hJP!jU6#WagX6T2=>=Ef2< z6{n}!P0`&H?56m3J1v^4oTiD;na-t%xe;BS3J3blM=Nuz$O8Hi#+QOoNDwl|c%wCn zil$n)#09sj1TH;2?p~1Y1z|79ciwE_>KxZg$m)g}*;rp3otR>I_i*kch@ft4Cm==iDmWvwlX*;oN4<^$EOXYRH^4p&KB5U>{PTl7J+Kl^}kpi&1 zx^^pQ%aubZlLrN^@`|dwYO)q^s3S5`us~iKm(E{Tr&ytHs=b7(G+fdXEt0nyKb7ew z=wjf(e-!#cf+qXXwnitR#vXjHr0dXN4?b1Vd4Sx&*N8n#kz~cIPOmJr3)vxLr;b{X zfqWRNhj*2Scec&YdQ_~LsOK!!Sat54A^tf(RBmd3L uGkW*y!_H^LM8S^R(o{u9o6**evl2fwY;UVR8F^a1s)qLLx1z-2q58jQP|VK& literal 0 HcmV?d00001 diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 55687a30fa4f6a..aac7cf3bab799a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -470,11 +470,11 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_ORC_LAZY_MAT = "enable_orc_lazy_materialization"; - public static final String ORC_TINY_STRIPE_THRESHOLD = "orc_tiny_stripe_threshold"; + public static final String ORC_TINY_STRIPE_THRESHOLD_BYTES = "orc_tiny_stripe_threshold_bytes"; - public static final String ORC_ONCE_MAX_READ_SIZE = "orc_once_max_read_size"; + public static final String ORC_ONCE_MAX_READ_BYTES = "orc_once_max_read_bytes"; - public static final String ORC_MAX_MERGE_DISTANCE = "orc_max_merge_distance"; + public static final String ORC_MAX_MERGE_DISTANCE_BYTES = "orc_max_merge_distance_bytes"; public static final String ENABLE_PARQUET_FILTER_BY_MIN_MAX = "enable_parquet_filter_by_min_max"; @@ -1726,7 +1726,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { @VariableMgr.VarAttr( - name = ORC_TINY_STRIPE_THRESHOLD, + name = ORC_TINY_STRIPE_THRESHOLD_BYTES, description = {"在orc文件中如果一个stripe的字节大小小于`orc_tiny_stripe_threshold`," + "我们认为该stripe为 tiny stripe。对于多个连续的tiny stripe我们会进行读取优化,即一次性读多个tiny stripe." + "如果你不想使用该优化,可以将该值设置为0。默认为 8M。", @@ -1736,12 +1736,12 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { + "If you do not want to use this optimization, you can set this value to 0." + "The default is 8M."}, needForward = true, - setter = "setOrcTinyStripeThreshold") - public long orcTinyStripeThreshold = 8L * 1024L * 1024L; + setter = "setOrcTinyStripeThresholdBytes") + public long orcTinyStripeThresholdBytes = 8L * 1024L * 1024L; @VariableMgr.VarAttr( - name = ORC_ONCE_MAX_READ_SIZE, + name = ORC_ONCE_MAX_READ_BYTES, description = {"在使用tiny stripe读取优化的时候,会对多个tiny stripe合并成一次IO," + "该参数用来控制每次IO请求的最大字节大小。你不应该将值设置的小于`orc_tiny_stripe_threshold`。默认为 8M。", "When using tiny stripe read optimization, multiple tiny stripes will be merged into one IO." @@ -1749,20 +1749,20 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { + "You should not set the value less than `orc_tiny_stripe_threshold`." + "The default is 8M."}, needForward = true, - setter = "setOrcOnceMaxReadSize") - public long orcOnceMaxReadSize = 8L * 1024L * 1024L; + setter = "setOrcOnceMaxReadBytes") + public long orcOnceMaxReadBytes = 8L * 1024L * 1024L; @VariableMgr.VarAttr( - name = ORC_MAX_MERGE_DISTANCE, + name = ORC_MAX_MERGE_DISTANCE_BYTES, description = {"在使用tiny stripe读取优化的时候,由于tiny stripe并不一定连续。" + "当两个tiny stripe之间距离大于该参数时,我们不会将其合并成一次IO。默认为 1M。", "When using tiny stripe read optimization, since tiny stripes are not necessarily continuous," + "when the distance between two tiny stripes is greater than this parameter," + "we will not merge them into one IO. The default value is 1M."}, needForward = true, - setter = "setOrcMaxMergeDistance") - public long orcMaxMergeDistance = 1024L * 1024L; + setter = "setOrcMaxMergeDistanceBytes") + public long orcMaxMergeDistanceBytes = 1024L * 1024L; @VariableMgr.VarAttr( @@ -2845,19 +2845,19 @@ public void setFragmentInstanceNum(String value) throws Exception { this.parallelExecInstanceNum = val; } - public void setOrcTinyStripeThreshold(String value) throws Exception { - long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD, 0, value); - this.orcTinyStripeThreshold = val; + public void setOrcTinyStripeThresholdBytes(String value) throws Exception { + long val = checkFieldLongValue(ORC_TINY_STRIPE_THRESHOLD_BYTES, 0, value); + this.orcTinyStripeThresholdBytes = val; } - public void setOrcOnceMaxReadSize(String value) throws Exception { - long val = checkFieldLongValue(ORC_ONCE_MAX_READ_SIZE, 0, value); - this.orcOnceMaxReadSize = val; + public void setOrcOnceMaxReadBytes(String value) throws Exception { + long val = checkFieldLongValue(ORC_ONCE_MAX_READ_BYTES, 0, value); + this.orcOnceMaxReadBytes = val; } - public void setOrcMaxMergeDistance(String value) throws Exception { - long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE, 0, value); - this.orcMaxMergeDistance = val; + public void setOrcMaxMergeDistanceBytes(String value) throws Exception { + long val = checkFieldLongValue(ORC_MAX_MERGE_DISTANCE_BYTES, 0, value); + this.orcMaxMergeDistanceBytes = val; } private long checkFieldLongValue(String variableName, long minValue, String value) throws Exception { @@ -3940,9 +3940,9 @@ public TQueryOptions toThrift() { tResult.setEnablePhraseQuerySequentialOpt(enablePhraseQuerySequentialOpt); tResult.setEnableAutoCreateWhenOverwrite(enableAutoCreateWhenOverwrite); - tResult.setOrcTinyStripeThreshold(orcTinyStripeThreshold); - tResult.setOrcMaxMergeDistance(orcMaxMergeDistance); - tResult.setOrcOnceMaxReadSize(orcOnceMaxReadSize); + tResult.setOrcTinyStripeThresholdBytes(orcTinyStripeThresholdBytes); + tResult.setOrcMaxMergeDistanceBytes(orcMaxMergeDistanceBytes); + tResult.setOrcOnceMaxReadBytes(orcOnceMaxReadBytes); return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 485d4c930d4a22..c4f83ad537d534 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -350,9 +350,9 @@ struct TQueryOptions { 135: optional bool enable_parallel_outfile = false; 136: optional bool enable_phrase_query_sequential_opt = true; - 137: optional i64 orc_tiny_stripe_threshold = 8388608; - 138: optional i64 orc_once_max_read_size = 8388608; - 139: optional i64 orc_max_merge_distance = 1048576; + 137: optional i64 orc_tiny_stripe_threshold_bytes = 8388608; + 138: optional i64 orc_once_max_read_bytes = 8388608; + 139: optional i64 orc_max_merge_distance_bytes = 1048576; 137: optional bool enable_auto_create_when_overwrite = false; // For cloud, to control if the content would be written into file cache diff --git a/regression-test/data/external_table_p0/hive/test_orc_tiny_stripes.out b/regression-test/data/external_table_p0/hive/test_orc_tiny_stripes.out new file mode 100644 index 00000000000000..d08eb7c887263e --- /dev/null +++ b/regression-test/data/external_table_p0/hive/test_orc_tiny_stripes.out @@ -0,0 +1,2311 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + +-- !test_1 -- +372 + +-- !test_2 -- +1 str_1 10000000001 +1 str_1 10000000001 + +-- !test_3 -- +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 + +-- !test_4 -- +4 str_4 10000000004 +4 str_4 10000000004 + +-- !test_5 -- +348 + +-- !test_6 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_7 -- +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 + +-- !test_8 -- +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 + +-- !test_9 -- +10 + +-- !test_10 -- +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW + +-- !test_11 -- +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 + +-- !test_12 -- +0 +0 +6 + +-- !test_13 -- +20 +60 + +-- !test_14 -- +0 +0 +40 + diff --git a/regression-test/suites/external_table_p0/hive/test_orc_tiny_stripes.groovy b/regression-test/suites/external_table_p0/hive/test_orc_tiny_stripes.groovy new file mode 100644 index 00000000000000..bc585340fccb54 --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_orc_tiny_stripes.groovy @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_orc_tiny_stripes", "p0,external,hive,external_docker,external_docker_hive") { + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + + } + + for (String hivePrefix : ["hive2"]) { + try { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_orc_tiny_stripes" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`default`""" + + + + + def orc_configs = [ + [0,0,0], + [0,10230,1024], + [1,1,1], + [201,130,0], + [1024,1024,0], + [1024,1024,1024], + [4096,1024,0], + [1024,4096,0], + [1,10240,10000000], + [1000000,888888888,0], + [1000000000000,1000000000000,100000000000] + ] + def li = [ "set enable_orc_lazy_materialization=true;","set enable_orc_lazy_materialization=false;"] + + + li.each { it1 -> + sql it1 + + orc_configs.each { it2 -> + def value1 = it2[0].toString() + def value2 = it2[1].toString() + def value3 = it2[2].toString() + + sql "set orc_tiny_stripe_threshold_bytes = " + value1 + ";" + sql "set orc_once_max_read_bytes = " + value2 + ";" + sql "set orc_max_merge_distance_bytes = " + value3 + ";" + + + qt_test_1 """ select count(*) from orc_tiny_stripes; """ //372 + +/* +*/ + + qt_test_2 """ select * from orc_tiny_stripes where col1 = 1 order by col1,col2,col3; """ +/* +1 str_1 10000000001 +1 str_1 10000000001 +*/ + qt_test_3 """ select * from orc_tiny_stripes where col1%100 = 0 order by col1,col2,col3 ; """ +/* +0 str_0 10000000000 +0 str_0 10000000000 +100 9DPJaFc00euBteqiW1f1 10000000027 +100 str_100 10000000100 +2200 tQ7BRFEuf8h56kahqsLPa1vu 10000000034 +4800 TaWGgh4iZ 10000000115 +5700 SwOaGJj9fVbk5j0Np 10000000050 +*/ + + qt_test_4 """ select * from orc_tiny_stripes where col2 = "str_4" order by col1,col2,col3; """ +/* +4 str_4 10000000004 +4 str_4 10000000004 +*/ + qt_test_5 """ select count(*) from orc_tiny_stripes where col3 > 10000000005; """ //348 + qt_test_6 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """ +/* +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 +*/ + + qt_test_7 """ select * from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col1,col2,col3 ; """ +/* +5 str_5 10000000005 +5 str_5 10000000005 +53 str_53 10000000053 +146 str_146 10000000146 +3961 hMgIY4oui0MHYgaIFg4zz5Ti3p 10000000053 +4129 qwPIwtkTZb 10000000005 +4942 vAdLpLUN3VkGNmTjvuPv 10000000053 +5349 koTeYPr2Qaqqnlk07X 10000000146 +5745 1cx1jZ6QGRWAkskiOgURj6dscYxDOl 10000000005 +7573 e3lIPwNnbG6DPmog 10000000005 +8614 TtyopDvRptLB5 10000000005 +*/ + + qt_test_8 """ select col3 from orc_tiny_stripes where col3 in ( 10000000005,10000000053,10000000146) order by col3 ; """ +/* +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000005 +10000000053 +10000000053 +10000000053 +10000000146 +10000000146 +*/ + + qt_test_9 """ select col1 from orc_tiny_stripes where col1 in (10,1000) order by col1 ; """ // 10 + qt_test_10 """ select col2 from orc_tiny_stripes where length(col2) > 29 order by col2 ; """ +/* +1cx1jZ6QGRWAkskiOgURj6dscYxDOl +Asn3tnIg1xYm8Lbgey8baqw3EmooFm +MSBtFURjtMu3LyDTLYx9FBM23UQdZ1 +e8e7xgwaSI2JKI65FEThzSQBVmKeAZ +w3xAirHLO1tvjon2jgr7y9tBtrGfMS +zABBLCkowUIqfONQOAjir8YPkFqfDW +*/ + qt_test_11 """ select * from orc_tiny_stripes where col1 < 10 order by col1,col2,col3; """ +/* +0 str_0 10000000000 +0 str_0 10000000000 +1 str_1 10000000001 +1 str_1 10000000001 +2 str_2 10000000002 +2 str_2 10000000002 +3 str_3 10000000003 +3 str_3 10000000003 +4 str_4 10000000004 +4 str_4 10000000004 +5 str_5 10000000005 +5 str_5 10000000005 +6 str_6 10000000006 +7 str_7 10000000007 +8 str_8 10000000008 +9 str_9 10000000009 +*/ + + qt_test_12 """ select col1 from orc_tiny_stripes where col1 in(0,6 ) order by col1; """ +/* +0 +0 +6 +*/ + + qt_test_13 """ select col1 from orc_tiny_stripes where col1 in(20,60 ) order by col1; """ + /* +20 +60 +*/ + + qt_test_14 """ select col1 from orc_tiny_stripes where col1 in(40,0 ) order by col1; """ +/* +0 +0 +40 +*/ + + + } + } + + sql """drop catalog if exists ${catalog_name}""" + } finally { + } + } +} + From c6c6f2662528b95449e0369a60cebeac009c66e1 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Tue, 22 Oct 2024 09:57:55 +0800 Subject: [PATCH 10/17] delete useless code --- be/src/vec/exec/format/orc/vorc_reader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 1c309470bed4da..909373fbbdbae8 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -910,7 +910,6 @@ Status OrcReader::set_fill_columns( auto* orc_input_stream_ptr = static_cast(_reader->getStream()); orc_input_stream_ptr->set_all_tiny_stripes(); auto& orc_file_reader = orc_input_stream_ptr->get_file_reader(); - orc_file_reader->collect_profile_before_close(); auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader(); orc_file_reader = std::make_shared(_profile, orc_inner_reader, range_finder); From cdff624e8befa0f61fb64eb773dec32102aea30c Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Tue, 22 Oct 2024 11:00:04 +0800 Subject: [PATCH 11/17] cache use OwnedSlice --- be/src/io/fs/buffered_reader.cpp | 6 +++--- be/src/io/fs/buffered_reader.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index 2d0ef7edcaba93..7fd85caa43b6c0 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -894,7 +894,7 @@ RangeCacheFileReader::RangeCacheFileReader(RuntimeProfile* profile, io::FileRead _size = _inner_reader->size(); uint64_t max_cache_size = std::max((uint64_t)4096, (uint64_t)_range_finder->get_max_range_size()); - _cache = std::make_unique(max_cache_size); + _cache = OwnedSlice(max_cache_size); if (_profile != nullptr) { const char* random_profile = "RangeCacheFileReader"; @@ -930,7 +930,7 @@ Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* b _cache_statistics.read_to_cache_bytes += range_size; SCOPED_RAW_TIMER(&_cache_statistics.read_to_cache_time); - Slice cache_slice = {_cache.get(), range_size}; + Slice cache_slice = {_cache.data(), range_size}; RETURN_IF_ERROR( _inner_reader->read_at(range.start_offset, cache_slice, bytes_read, io_ctx)); @@ -944,7 +944,7 @@ Status RangeCacheFileReader::read_at_impl(size_t offset, Slice result, size_t* b } int64_t buffer_offset = offset - _current_start_offset; - memcpy(result.data, _cache.get() + buffer_offset, request_size); + memcpy(result.data, _cache.data() + buffer_offset, request_size); *bytes_read = request_size; return Status::OK(); diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 06fbd4bc83f397..67e07665fbfd9f 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -168,7 +168,7 @@ class RangeCacheFileReader : public io::FileReader { io::FileReaderSPtr _inner_reader; std::shared_ptr _range_finder; - std::unique_ptr _cache; + OwnedSlice _cache; int64_t _current_start_offset = -1; size_t _size; From c8720abafafc14d49b76c3f25167290fa7e7fd7b Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Tue, 22 Oct 2024 14:24:37 +0800 Subject: [PATCH 12/17] run.sql --- .../scripts/create_preinstalled_scripts/run67.hql | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql new file mode 100644 index 00000000000000..32fdc59e588636 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql @@ -0,0 +1,11 @@ +use `default`; + +CREATE TABLE `orc_tiny_stripes`( + col1 bigint, + col2 string, + col3 bigint +) +STORED AS orc +LOCATION '/user/doris/preinstalled_data/preinstalled_data/orc/orc_tiny_stripes'; + +msck repair table orc_tiny_stripes; From e7d2408ba575b351a5e1799ee745dc58c638a4d0 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Tue, 22 Oct 2024 18:42:43 +0800 Subject: [PATCH 13/17] fix regression --- .../hive/scripts/create_preinstalled_scripts/run67.hql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql index 32fdc59e588636..f84cc11f040cda 100644 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run67.hql @@ -6,6 +6,6 @@ CREATE TABLE `orc_tiny_stripes`( col3 bigint ) STORED AS orc -LOCATION '/user/doris/preinstalled_data/preinstalled_data/orc/orc_tiny_stripes'; +LOCATION '/user/doris/preinstalled_data/orc/orc_tiny_stripes'; msck repair table orc_tiny_stripes; From d9aed763577c625c195c9b81095740a33317e92e Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Mon, 28 Oct 2024 11:24:49 +0800 Subject: [PATCH 14/17] open orc lazy mat. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 909373fbbdbae8..2d47f236774265 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -913,7 +913,7 @@ Status OrcReader::set_fill_columns( auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader(); orc_file_reader = std::make_shared(_profile, orc_inner_reader, range_finder); - _lazy_read_ctx.can_lazy_read = false; + //_lazy_read_ctx.can_lazy_read = false; } if (!_lazy_read_ctx.can_lazy_read) { From ecfebcedcd22c1b65b3f9f3de57e5f4d1a4d2157 Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 6 Nov 2024 15:57:26 +0800 Subject: [PATCH 15/17] rm unsed code. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 2d47f236774265..e2161d8a6dc48a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -913,7 +913,6 @@ Status OrcReader::set_fill_columns( auto orc_inner_reader = orc_input_stream_ptr->get_inner_reader(); orc_file_reader = std::make_shared(_profile, orc_inner_reader, range_finder); - //_lazy_read_ctx.can_lazy_read = false; } if (!_lazy_read_ctx.can_lazy_read) { From 0248f4e32023cddff9da41b7a801dc7891996950 Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 6 Nov 2024 16:16:43 +0800 Subject: [PATCH 16/17] fix thrift --- gensrc/thrift/PaloInternalService.thrift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index c4f83ad537d534..e7871194965301 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -354,7 +354,7 @@ struct TQueryOptions { 138: optional i64 orc_once_max_read_bytes = 8388608; 139: optional i64 orc_max_merge_distance_bytes = 1048576; - 137: optional bool enable_auto_create_when_overwrite = false; + 140: optional bool enable_auto_create_when_overwrite = false; // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query. From c4dc35f1b22c4498a4138306c94ad0b6b9058a85 Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 6 Nov 2024 16:19:43 +0800 Subject: [PATCH 17/17] fix thrift --- gensrc/thrift/PaloInternalService.thrift | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index e7871194965301..29fecc27539ff4 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -350,11 +350,12 @@ struct TQueryOptions { 135: optional bool enable_parallel_outfile = false; 136: optional bool enable_phrase_query_sequential_opt = true; - 137: optional i64 orc_tiny_stripe_threshold_bytes = 8388608; - 138: optional i64 orc_once_max_read_bytes = 8388608; - 139: optional i64 orc_max_merge_distance_bytes = 1048576; - 140: optional bool enable_auto_create_when_overwrite = false; + 137: optional bool enable_auto_create_when_overwrite = false; + + 138: optional i64 orc_tiny_stripe_threshold_bytes = 8388608; + 139: optional i64 orc_once_max_read_bytes = 8388608; + 140: optional i64 orc_max_merge_distance_bytes = 1048576; // For cloud, to control if the content would be written into file cache // In write path, to control if the content would be written into file cache. // In read path, read from file cache or remote storage when execute query.