From a6aef56c118f3f62f7175d4d22701a85e3bdc4d0 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 12 Jun 2019 14:47:05 +0800 Subject: [PATCH 1/8] Add bitshuffle and croaring lib --- thirdparty/build-thirdparty.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 4227f20ae17b5d..bbd1e02f0b704a 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -588,7 +588,7 @@ build_bitshuffle() { arch_flag="" if [ "$arch" == "avx2" ]; then arch_flag="-mavx2" - fi + fi tmp_obj=bitshuffle_${arch}_tmp.o dst_obj=bitshuffle_${arch}.o ${CC:-gcc} $EXTRA_CFLAGS $arch_flag -std=c99 -I$PREFIX/include/lz4/ -O3 -DNDEBUG -fPIC -c \ From 90e6d0e42453c3ad02bf301ee5659301b2b3d344 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 12 Jun 2019 15:39:38 +0800 Subject: [PATCH 2/8] Add bitshuffle page --- be/CMakeLists.txt | 3 + be/src/olap/CMakeLists.txt | 6 +- be/src/olap/rowset/segment_v2/CMakeLists.txt | 29 ++ .../rowset/segment_v2/bitshuffle_page.cpp | 88 +++++ .../olap/rowset/segment_v2/bitshuffle_page.h | 341 ++++++++++++++++++ .../rowset/segment_v2/bitshuffle_wrapper.cpp | 83 +++++ .../rowset/segment_v2/bitshuffle_wrapper.h | 36 ++ be/src/olap/types.cpp | 1 + be/src/olap/types.h | 38 ++ be/src/util/slice.h | 6 +- 10 files changed, 624 insertions(+), 7 deletions(-) create mode 100644 be/src/olap/rowset/segment_v2/CMakeLists.txt create mode 100644 be/src/olap/rowset/segment_v2/bitshuffle_page.cpp create mode 100644 be/src/olap/rowset/segment_v2/bitshuffle_page.h create mode 100644 be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp create mode 100644 be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 74a79722179296..8b1b0479d0320d 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -460,6 +460,7 @@ set(DORIS_LINK_LIBS Webserver TestUtil Geo + SegmentV2 ${WL_END_GROUP} ) @@ -496,6 +497,8 @@ set(DORIS_DEPENDENCIES openssl crypto leveldb + bitshuffle + roaring ${WL_END_GROUP} ) diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 51c76663a0159b..766aa29e295c50 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -20,7 +20,9 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap") - + +add_subdirectory(rowset/segment_v2) + add_library(Olap STATIC aggregate_func.cpp base_compaction.cpp @@ -83,6 +85,4 @@ add_library(Olap STATIC types.cpp utils.cpp wrapper_field.cpp - rowset/segment_v2/ordinal_page_index.cpp - rowset/segment_v2/encoding_info.cpp ) diff --git a/be/src/olap/rowset/segment_v2/CMakeLists.txt b/be/src/olap/rowset/segment_v2/CMakeLists.txt new file mode 100644 index 00000000000000..73abf915b79754 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/CMakeLists.txt @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# where to put generated libraries +set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset/segment_v2") + +# where to put generated binaries +set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset/segment_v2") + +add_library(SegmentV2 STATIC + ordinal_page_index.cpp + encoding_info.cpp + bitshuffle_page.cpp + bitshuffle_wrapper.cpp +) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp new file mode 100644 index 00000000000000..87a0dda3ed2220 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/bitshuffle_page.h" +#include "olap/rowset/segment_v2/common.h" + +using doris::Status; + +namespace doris { + +namespace segment_v2 { + +void abort_with_bitshuffle_error(int64_t val) { + switch (val) { + case -1: + LOG(FATAL) << "Failed to allocate memory"; + break; + case -11: + LOG(FATAL) << "Missing SSE"; + break; + case -12: + LOG(FATAL) << "Missing AVX"; + break; + case -80: + LOG(FATAL) << "Input size not a multiple of 8"; + break; + case -81: + LOG(FATAL) << "block_size not multiple of 8"; + break; + case -91: + LOG(FATAL) << "Decompression error, wrong number of bytes processed"; + break; + default: + LOG(FATAL) << "Error internal to compression routine"; + } +} + +template<> +Slice BitshufflePageBuilder::finish(rowid_t page_first_rowid) { + uint32_t max_value = 0; + for (int i = 0; i < _count; i++) { + max_value = std::max(max_value, cell(i)); + } + + // Shrink the block of UINT32 to block of UINT8 or UINT16 whenever possible and + // set the header information accordingly, so that the decoder can recover the + // encoded data. + Slice ret; + if (max_value <= std::numeric_limits::max()) { + for (int i = 0; i < _count; i++) { + uint32_t value = cell(i); + uint8_t converted_value = static_cast(value); + memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); + } + ret = _finish(page_first_rowid, sizeof(uint8_t)); + doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint8_t)); + } else if (max_value <= std::numeric_limits::max()) { + for (int i = 0; i < _count; i++) { + uint32_t value = cell(i); + uint16_t converted_value = static_cast(value); + memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); + } + ret = _finish(page_first_rowid, sizeof(uint16_t)); + doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint16_t)); + } else { + ret = _finish(page_first_rowid, sizeof(uint32_t)); + doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint32_t)); + } + return ret; +} + +} // namespace segment_v2 + +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h new file mode 100644 index 00000000000000..e348e27ff02133 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -0,0 +1,341 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "util/coding.h" +#include "util/faststring.h" +#include "gutil/port.h" +#include "olap/olap_common.h" +#include "olap/types.h" +#include "olap/rowset/segment_v2/page_builder.h" +#include "olap/rowset/segment_v2/page_decoder.h" +#include "olap/rowset/segment_v2/options.h" +#include "olap/rowset/segment_v2/common.h" +#include "olap/rowset/segment_v2/bitshuffle_wrapper.h" + +namespace doris { + +namespace segment_v2 { + +void abort_with_bitshuffle_error(int64_t val); + +// BitshufflePageBuilder bitshuffles and compresses the bits of fixed +// size type blocks with lz4. +// +// The page format is as follows: +// +// 1. Header: (20 bytes total) +// +// [32-bit] +// The ordinal offset of the first element in the page. +// +// [32-bit] +// The number of elements encoded in the page. +// +// [32-bit] +// The post-compression size of the page, including this header. +// +// [32-bit] +// Padding is needed to meet the requirements of the bitshuffle +// library such that the input/output is a multiple of 8. Some +// ignored elements are appended to the end of the page if necessary +// to meet this requirement. +// +// This header field is the post-padding element count. +// +// [32-bit] +// The size of the elements, in bytes, as actually encoded. In the +// case that all of the data in a page can fit into a smaller +// integer type, then we may choose to encode that smaller type +// to save CPU costs. +// +// This is currently only implemented in the UINT32 page type. +// +// NOTE: all on-disk ints are encoded little-endian +// +// 2. Element data +// +// The header is followed by the bitshuffle-compressed element data. +// +template +class BitshufflePageBuilder : public PageBuilder { +public: + BitshufflePageBuilder(const BuilderOptions* options) : + _options(options), + _count(0), + _remain_element_capacity(0), + _finished(false) { + reset(); + } + + bool is_page_full() override { + return _remain_element_capacity == 0; + } + + doris::Status add(const uint8_t* vals, size_t* count) override { + DCHECK(!_finished); + int to_add = std::min(_remain_element_capacity, *count); + _data.append(vals, to_add * SIZE_OF_TYPE); + _count += to_add; + _remain_element_capacity -= to_add; + // return added number through count + *count = to_add; + return Status::OK; + } + + Slice finish(rowid_t page_first_rowid) override { + return _finish(page_first_rowid, SIZE_OF_TYPE); + } + + void reset() override { + auto block_size = _options->data_page_size; + _count = 0; + _data.clear(); + _data.reserve(block_size); + DCHECK_EQ(reinterpret_cast(_data.data()) & (alignof(CppType) - 1), 0) + << "buffer must be naturally-aligned"; + _buffer.clear(); + _buffer.resize(HEADER_SIZE); + _finished = false; + _remain_element_capacity = block_size / SIZE_OF_TYPE; + } + + size_t count() const { + return _count; + } + + doris::Status get_dictionary_page(doris::Slice* dictionary_page) override { + return Status::NOT_IMPLEMENTED; + } + + doris::Status get_bitmap_page(doris::Slice* bitmap_page) override { + return Status::NOT_IMPLEMENTED; + } + +private: + Slice _finish(rowid_t page_first_rowid, int final_size_of_type) { + _data.resize(HEADER_SIZE + final_size_of_type * _count); + + // Do padding so that the input num of element is multiple of 8. + int num_elems_after_padding = ALIGN_UP(_count, 8); + int padding_elems = num_elems_after_padding - _count; + int padding_bytes = padding_elems * final_size_of_type; + for (int i = 0; i < padding_bytes; i++) { + _data.push_back(0); + } + + _buffer.resize(HEADER_SIZE + + bitshuffle::compress_lz4_bound(num_elems_after_padding, final_size_of_type, 0)); + + doris::encode_fixed32_le(&_buffer[0], page_first_rowid); + doris::encode_fixed32_le(&_buffer[4], _count); + int64_t bytes = bitshuffle::compress_lz4(_data.data(), &_buffer[HEADER_SIZE], + num_elems_after_padding, final_size_of_type, 0); + if (PREDICT_FALSE(bytes < 0)) { + // This means the bitshuffle function fails. + // Ideally, this should not happen. + abort_with_bitshuffle_error(bytes); + // It does not matter what will be returned here, + // since we have logged fatal in abort_with_bitshuffle_error(). + return Slice(); + } + doris::encode_fixed32_le(&_buffer[8], HEADER_SIZE + bytes); + doris::encode_fixed32_le(&_buffer[12], num_elems_after_padding); + doris::encode_fixed32_le(&_buffer[16], final_size_of_type); + _finished = true; + return Slice(_buffer.data(), HEADER_SIZE + bytes); + } + + typedef typename doris::TypeTraits::CppType CppType; + + CppType cell(int idx) const { + DCHECK_GE(idx, 0); + CppType ret; + memcpy(&ret, &_data[idx * SIZE_OF_TYPE], sizeof(CppType)); + return ret; + } + + // Length of a header. + static const size_t HEADER_SIZE = sizeof(uint32_t) * 5; + enum { + SIZE_OF_TYPE = doris::TypeTraits::size + }; + const BuilderOptions* _options; + uint32_t _count; + int _remain_element_capacity; + bool _finished; + kudu::faststring _data; + kudu::faststring _buffer; +}; + +template +class BitShufflePageDecoder : public PageDecoder { + public: + BitShufflePageDecoder(doris::Slice data) : _data(data), + _parsed(false), + _page_first_ordinal(0), + _num_elements(0), + _compressed_size(0), + _num_element_after_padding(0), + _size_of_element(0), + _cur_index(0) { } + + doris::Status init() override { + CHECK(!_parsed); + if (_data.size < HEADER_SIZE) { + std::stringstream ss; + ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << HEADER_SIZE; + return Status(ss.str()); + } + _page_first_ordinal = doris::decode_fixed32_le((const uint8_t*)&_data[0]); + _num_elements = doris::decode_fixed32_le((const uint8_t*)&_data[4]); + _compressed_size = doris::decode_fixed32_le((const uint8_t*)&_data[8]); + if (_compressed_size != _data.size) { + std::stringstream ss; + ss << "Size information unmatched, _compressed_size:" << _compressed_size + << ", data size:" << _data.size; + return Status(ss.str()); + } + _num_element_after_padding = doris::decode_fixed32_le((const uint8_t*)&_data[12]); + if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) { + std::stringstream ss; + ss << "num of element information corrupted," + << " _num_element_after_padding:" << _num_element_after_padding + << ", _num_elements:" << _num_elements; + return Status(ss.str()); + } + _size_of_element = doris::decode_fixed32_le((const uint8_t*)&_data[16]); + switch (_size_of_element) { + case 1: + case 2: + case 4: + case 8: + case 16: + break; + default: + std::stringstream ss; + ss << "invalid size_of_elem:" << _size_of_element; + return Status(ss.str()); + } + + // Currently, only the UINT32 block encoder supports expanding size: + if (UNLIKELY(Type != OLAP_FIELD_TYPE_UNSIGNED_INT && _size_of_element != SIZE_OF_TYPE)) { + std::stringstream ss; + ss << "invalid size info. size of element:" << _size_of_element + << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE + << ", type:" << Type; + return Status(ss.str()); + } + if (UNLIKELY(_size_of_element > SIZE_OF_TYPE)) { + std::stringstream ss; + ss << "invalid size info. size of element:" << _size_of_element + << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE; + return Status(ss.str()); + } + + RETURN_IF_ERROR(_decode()); + _parsed = true; + return Status::OK; + } + + doris::Status seek_to_position_in_page(size_t pos) override { + CHECK(_parsed) << "Must call init()"; + if (PREDICT_FALSE(_num_elements == 0)) { + DCHECK_EQ(0, pos); + return Status("invalid pos"); + } + + DCHECK_LE(pos, _num_elements); + _cur_index = pos; + return Status::OK; + } + + doris::Status next_batch(size_t* n, doris::ColumnVector* dst, doris::MemPool* mem_pool) override { + DCHECK(_parsed); + if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) { + *n = 0; + return Status::OK; + } + + size_t max_fetch = std::min(*n, static_cast(_num_elements - _cur_index)); + memcpy(dst->col_data(), &_decoded[_cur_index * SIZE_OF_TYPE], max_fetch * SIZE_OF_TYPE); + *n = max_fetch; + _cur_index += max_fetch; + + return Status::OK; + } + + size_t count() const override { + return _num_elements; + } + + size_t current_index() const override { + return _cur_index; + } + + rowid_t get_first_rowid() const override { + return _page_first_ordinal; + } + +private: + Status _decode() { + if (_num_elements > 0) { + int64_t bytes; + _decoded.resize(_num_element_after_padding * _size_of_element); + char* in = const_cast(&_data[HEADER_SIZE]); + bytes = bitshuffle::decompress_lz4(in, _decoded.data(), _num_element_after_padding, + _size_of_element, 0); + if (PREDICT_FALSE(bytes < 0)) { + // Ideally, this should not happen. + abort_with_bitshuffle_error(bytes); + return Status(TStatusCode::RUNTIME_ERROR, "Unshuffle Process failed", true); + } + } + return Status::OK; + } + + typedef typename doris::TypeTraits::CppType CppType; + + // Length of a header. + static const size_t HEADER_SIZE = sizeof(uint32_t) * 5; + enum { + SIZE_OF_TYPE = doris::TypeTraits::size + }; + + doris::Slice _data; + bool _parsed; + rowid_t _page_first_ordinal; + size_t _num_elements; + size_t _compressed_size; + size_t _num_element_after_padding; + + int _size_of_element; + size_t _cur_index; + kudu::faststring _decoded; +}; + +} // namespace segment_v2 + +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp new file mode 100644 index 00000000000000..ac676f64573ea7 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/bitshuffle_wrapper.h" + +// Include the bitshuffle header once to get the default (non-AVX2) +// symbols. +#include + +#include "gutil/cpu.h" + +// Include the bitshuffle header again, but this time importing the +// AVX2-compiled symbols by defining some macros. +#undef BITSHUFFLE_H +#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx2 +#define bshuf_compress_lz4 bshuf_compress_lz4_avx2 +#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx2 +#include // NOLINT(*) +#undef bshuf_compress_lz4_bound +#undef bshuf_compress_lz4 +#undef bshuf_decompress_lz4 + +using base::CPU; + +namespace doris { + +namespace bitshuffle { + +// Function pointers which will be assigned the correct implementation +// for the runtime architecture. +namespace { +decltype(&bshuf_compress_lz4_bound) g_bshuf_compress_lz4_bound; +decltype(&bshuf_compress_lz4) g_bshuf_compress_lz4; +decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4; +} // anonymous namespace + +// When this translation unit is initialized, figure out the current CPU and +// assign the correct function for this architecture. +// +// This avoids an expensive 'cpuid' call in the hot path, and also avoids +// the cost of a 'std::once' call. +__attribute__((constructor)) +void SelectBitshuffleFunctions() { + if (CPU().has_avx2()) { + g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2; + g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2; + g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2; + } else { + g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound; + g_bshuf_compress_lz4 = bshuf_compress_lz4; + g_bshuf_decompress_lz4 = bshuf_decompress_lz4; + } +} + +int64_t compress_lz4(void* in, void* out, size_t size, + size_t elem_size, size_t block_size) { + return g_bshuf_compress_lz4(in, out, size, elem_size, block_size); +} +int64_t decompress_lz4(void* in, void* out, size_t size, + size_t elem_size, size_t block_size) { + return g_bshuf_decompress_lz4(in, out, size, elem_size, block_size); +} +size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size) { + return g_bshuf_compress_lz4_bound(size, elem_size, block_size); +} + +} // namespace bitshuffle + +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h new file mode 100644 index 00000000000000..e83cb9e8f818c9 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +// This namespace has wrappers for the Bitshuffle library which do runtime dispatch to +// either AVX2-accelerated or regular SSE2 implementations based on the available CPU. +namespace doris { + +namespace bitshuffle { + +// See for documentation on these functions. +size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size); +int64_t compress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size); +int64_t decompress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size); + +} // namespace bitshuffle + +} // namespace doris diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index d3f5e63f152af2..e5f5623951a587 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -60,6 +60,7 @@ TypeInfoResolver::TypeInfoResolver() { add_mapping(); add_mapping(); add_mapping(); + add_mapping(); add_mapping(); add_mapping(); add_mapping(); diff --git a/be/src/olap/types.h b/be/src/olap/types.h index fe234d93c54a4e..4822fe04af3a40 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -286,6 +286,44 @@ struct FieldTypeTraits { } }; +template<> +struct FieldTypeTraits { + typedef uint32_t CppType; + static const char* name() { + return "uint32_t"; + } + static int equal(const void* left, const void* right) { + return generic_equal(left, right); + } + static int cmp(const void* left, const void* right) { + return generic_compare(left, right); + } + static std::string to_string(char* src) { + return generic_to_string(src); + } + static OLAPStatus from_string(char* buf, const std::string& scan_key) { + return generic_from_string(buf, scan_key); + } + static void copy_with_pool(char* dest, const char* src, MemPool* mem_pool) { + generic_copy(dest, src); + } + static void copy_without_pool(char* dest, const char* src) { + generic_copy(dest, src); + } + static void set_to_max(char* buf) { + generic_set_to_max(buf); + } + static void set_to_min(char* buf) { + generic_set_to_min(buf); + } + static bool is_min(char* buf) { + return generic_is_min(buf); + } + static uint32_t hash_code(char* data, uint32_t seed) { + return generic_hash_code(data, seed); + } +}; + template<> struct FieldTypeTraits { typedef int64_t CppType; diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 85176f62d297e8..4ebae11e773b85 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -71,16 +71,14 @@ struct Slice { Slice(const char* s) : // NOLINT(runtime/explicit) data(const_cast(s)), size(strlen(s)) { } - /* /// @return A pointer to the beginning of the referenced data. - const char* data() const { return data; } + const char* get_data() const { return data; } /// @return A mutable pointer to the beginning of the referenced data. char* mutable_data() { return const_cast(data); } /// @return The length (in bytes) of the referenced data. - size_t size() const { return size; } - */ + size_t get_size() const { return size; } /// @return @c true iff the length of the referenced data is zero. bool empty() const { return size == 0; } From 02e12667a03cd1892ecec91e7b75c6165868638d Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 12 Jun 2019 19:18:12 +0800 Subject: [PATCH 3/8] add bitshuffle page --- be/CMakeLists.txt | 8 +- .../rowset/segment_v2/bitshuffle_page.cpp | 8 +- .../olap/rowset/segment_v2/bitshuffle_page.h | 58 ++--- be/src/olap/rowset/segment_v2/common.h | 6 + be/src/olap/rowset/segment_v2/page_builder.h | 6 +- be/src/olap/rowset/segment_v2/page_decoder.h | 8 +- be/src/util/slice.h | 2 +- be/test/olap/CMakeLists.txt | 1 + .../segment_v2/bitshuffle_page_test.cpp | 205 ++++++++++++++++++ run-ut.sh | 1 + 10 files changed, 261 insertions(+), 42 deletions(-) create mode 100644 be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 8b1b0479d0320d..bfcc98dd4dacc9 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -192,11 +192,11 @@ set_target_properties(librdkafka PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/ add_library(libs2 STATIC IMPORTED) set_target_properties(libs2 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libs2.a) -add_library(libbitshuffle STATIC IMPORTED) -set_target_properties(libbitshuffle PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libbitshuffle.a) +add_library(bitshuffle STATIC IMPORTED) +set_target_properties(bitshuffle PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libbitshuffle.a) -add_library(libroaring STATIC IMPORTED) -set_target_properties(libroaring PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libroaring.a) +add_library(roaring STATIC IMPORTED) +set_target_properties(roaring PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libroaring.a) find_program(THRIFT_COMPILER thrift ${CMAKE_SOURCE_DIR}/bin) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp index 87a0dda3ed2220..b2b9d0edfe3bf3 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp @@ -18,8 +18,6 @@ #include "olap/rowset/segment_v2/bitshuffle_page.h" #include "olap/rowset/segment_v2/common.h" -using doris::Status; - namespace doris { namespace segment_v2 { @@ -67,7 +65,7 @@ Slice BitshufflePageBuilder::finish(rowid_t page_f memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); } ret = _finish(page_first_rowid, sizeof(uint8_t)); - doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint8_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint8_t)); } else if (max_value <= std::numeric_limits::max()) { for (int i = 0; i < _count; i++) { uint32_t value = cell(i); @@ -75,10 +73,10 @@ Slice BitshufflePageBuilder::finish(rowid_t page_f memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); } ret = _finish(page_first_rowid, sizeof(uint16_t)); - doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint16_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint16_t)); } else { ret = _finish(page_first_rowid, sizeof(uint32_t)); - doris::encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint32_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint32_t)); } return ret; } diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index e348e27ff02133..e8f376a902a365 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -79,7 +79,7 @@ void abort_with_bitshuffle_error(int64_t val); // // The header is followed by the bitshuffle-compressed element data. // -template +template class BitshufflePageBuilder : public PageBuilder { public: BitshufflePageBuilder(const BuilderOptions* options) : @@ -94,7 +94,7 @@ class BitshufflePageBuilder : public PageBuilder { return _remain_element_capacity == 0; } - doris::Status add(const uint8_t* vals, size_t* count) override { + Status add(const uint8_t* vals, size_t* count) override { DCHECK(!_finished); int to_add = std::min(_remain_element_capacity, *count); _data.append(vals, to_add * SIZE_OF_TYPE); @@ -126,12 +126,12 @@ class BitshufflePageBuilder : public PageBuilder { return _count; } - doris::Status get_dictionary_page(doris::Slice* dictionary_page) override { - return Status::NOT_IMPLEMENTED; + Status get_dictionary_page(Slice* dictionary_page) override { + return Status("NOT_IMPLEMENTED"); } - doris::Status get_bitmap_page(doris::Slice* bitmap_page) override { - return Status::NOT_IMPLEMENTED; + Status get_bitmap_page(Slice* bitmap_page) override { + return Status("NOT_IMPLEMENTED"); } private: @@ -149,8 +149,8 @@ class BitshufflePageBuilder : public PageBuilder { _buffer.resize(HEADER_SIZE + bitshuffle::compress_lz4_bound(num_elems_after_padding, final_size_of_type, 0)); - doris::encode_fixed32_le(&_buffer[0], page_first_rowid); - doris::encode_fixed32_le(&_buffer[4], _count); + encode_fixed32_le(&_buffer[0], page_first_rowid); + encode_fixed32_le(&_buffer[4], _count); int64_t bytes = bitshuffle::compress_lz4(_data.data(), &_buffer[HEADER_SIZE], num_elems_after_padding, final_size_of_type, 0); if (PREDICT_FALSE(bytes < 0)) { @@ -161,14 +161,14 @@ class BitshufflePageBuilder : public PageBuilder { // since we have logged fatal in abort_with_bitshuffle_error(). return Slice(); } - doris::encode_fixed32_le(&_buffer[8], HEADER_SIZE + bytes); - doris::encode_fixed32_le(&_buffer[12], num_elems_after_padding); - doris::encode_fixed32_le(&_buffer[16], final_size_of_type); + encode_fixed32_le(&_buffer[8], HEADER_SIZE + bytes); + encode_fixed32_le(&_buffer[12], num_elems_after_padding); + encode_fixed32_le(&_buffer[16], final_size_of_type); _finished = true; return Slice(_buffer.data(), HEADER_SIZE + bytes); } - typedef typename doris::TypeTraits::CppType CppType; + typedef typename TypeTraits::CppType CppType; CppType cell(int idx) const { DCHECK_GE(idx, 0); @@ -180,7 +180,7 @@ class BitshufflePageBuilder : public PageBuilder { // Length of a header. static const size_t HEADER_SIZE = sizeof(uint32_t) * 5; enum { - SIZE_OF_TYPE = doris::TypeTraits::size + SIZE_OF_TYPE = TypeTraits::size }; const BuilderOptions* _options; uint32_t _count; @@ -190,10 +190,10 @@ class BitshufflePageBuilder : public PageBuilder { kudu::faststring _buffer; }; -template +template class BitShufflePageDecoder : public PageDecoder { public: - BitShufflePageDecoder(doris::Slice data) : _data(data), + BitShufflePageDecoder(Slice data) : _data(data), _parsed(false), _page_first_ordinal(0), _num_elements(0), @@ -202,23 +202,23 @@ class BitShufflePageDecoder : public PageDecoder { _size_of_element(0), _cur_index(0) { } - doris::Status init() override { + Status init() override { CHECK(!_parsed); if (_data.size < HEADER_SIZE) { std::stringstream ss; ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << HEADER_SIZE; return Status(ss.str()); } - _page_first_ordinal = doris::decode_fixed32_le((const uint8_t*)&_data[0]); - _num_elements = doris::decode_fixed32_le((const uint8_t*)&_data[4]); - _compressed_size = doris::decode_fixed32_le((const uint8_t*)&_data[8]); + _page_first_ordinal = decode_fixed32_le((const uint8_t*)&_data[0]); + _num_elements = decode_fixed32_le((const uint8_t*)&_data[4]); + _compressed_size = decode_fixed32_le((const uint8_t*)&_data[8]); if (_compressed_size != _data.size) { std::stringstream ss; ss << "Size information unmatched, _compressed_size:" << _compressed_size << ", data size:" << _data.size; return Status(ss.str()); } - _num_element_after_padding = doris::decode_fixed32_le((const uint8_t*)&_data[12]); + _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[12]); if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) { std::stringstream ss; ss << "num of element information corrupted," @@ -226,7 +226,7 @@ class BitShufflePageDecoder : public PageDecoder { << ", _num_elements:" << _num_elements; return Status(ss.str()); } - _size_of_element = doris::decode_fixed32_le((const uint8_t*)&_data[16]); + _size_of_element = decode_fixed32_le((const uint8_t*)&_data[16]); switch (_size_of_element) { case 1: case 2: @@ -260,7 +260,7 @@ class BitShufflePageDecoder : public PageDecoder { return Status::OK; } - doris::Status seek_to_position_in_page(size_t pos) override { + Status seek_to_position_in_page(size_t pos) override { CHECK(_parsed) << "Must call init()"; if (PREDICT_FALSE(_num_elements == 0)) { DCHECK_EQ(0, pos); @@ -272,7 +272,7 @@ class BitShufflePageDecoder : public PageDecoder { return Status::OK; } - doris::Status next_batch(size_t* n, doris::ColumnVector* dst, doris::MemPool* mem_pool) override { + Status next_batch(size_t* n, ColumnVectorView* dst) override { DCHECK(_parsed); if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) { *n = 0; @@ -280,7 +280,7 @@ class BitShufflePageDecoder : public PageDecoder { } size_t max_fetch = std::min(*n, static_cast(_num_elements - _cur_index)); - memcpy(dst->col_data(), &_decoded[_cur_index * SIZE_OF_TYPE], max_fetch * SIZE_OF_TYPE); + _copy_next_values(max_fetch, dst->column_vector()->col_data()); *n = max_fetch; _cur_index += max_fetch; @@ -300,6 +300,10 @@ class BitShufflePageDecoder : public PageDecoder { } private: + void _copy_next_values(size_t n, void* data) { + memcpy(data, &_decoded[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE); + } + Status _decode() { if (_num_elements > 0) { int64_t bytes; @@ -316,15 +320,15 @@ class BitShufflePageDecoder : public PageDecoder { return Status::OK; } - typedef typename doris::TypeTraits::CppType CppType; + typedef typename TypeTraits::CppType CppType; // Length of a header. static const size_t HEADER_SIZE = sizeof(uint32_t) * 5; enum { - SIZE_OF_TYPE = doris::TypeTraits::size + SIZE_OF_TYPE = TypeTraits::size }; - doris::Slice _data; + Slice _data; bool _parsed; rowid_t _page_first_ordinal; size_t _num_elements; diff --git a/be/src/olap/rowset/segment_v2/common.h b/be/src/olap/rowset/segment_v2/common.h index 61e276f2dd77e6..91bf7489638000 100644 --- a/be/src/olap/rowset/segment_v2/common.h +++ b/be/src/olap/rowset/segment_v2/common.h @@ -20,6 +20,12 @@ #include #include +// Round down 'x' to the nearest 'align' boundary +#define ALIGN_DOWN(x, align) ((x) & (~(align) + 1)) + +// Round up 'x' to the nearest 'align' boundary +#define ALIGN_UP(x, align) (((x) + ((align) - 1)) & (~(align) + 1)) + namespace doris { namespace segment_v2 { diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index e6952766f415cc..f2e6d498e5fa2e 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -37,6 +37,8 @@ namespace segment_v2 { // 5. Bitmap Index Page: store bitmap index of data class PageBuilder { public: + PageBuilder() { } + virtual ~PageBuilder() { } // Used by column writer to determine whether the current page is full. @@ -51,10 +53,10 @@ class PageBuilder { virtual doris::Status add(const uint8_t* vals, size_t* count) = 0; // Get the dictionary page for dictionary encoding mode column. - virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page); + virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page) = 0; // Get the bitmap page for bitmap indexed column. - virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page); + virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page) = 0; // Return a Slice which represents the encoded data of current page. // diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 972864235ac05b..88b6d7891d4393 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -27,11 +27,13 @@ namespace segment_v2 { // PageDecoder is used to decode page page. class PageDecoder { public: + PageDecoder() { } + virtual ~PageDecoder() { } // Call this to do some preparation for decoder. // eg: parse data page header - virtual doris::Status init() = 0; + virtual Status init() = 0; // Seek the decoder to the given positional index of the page. // For example, seek_to_position_in_page(0) seeks to the first @@ -39,7 +41,7 @@ class PageDecoder { // // It is an error to call this with a value larger than Count(). // Doing so has undefined results. - virtual doris::Status seek_to_position_in_page(size_t pos) = 0; + virtual Status seek_to_position_in_page(size_t pos) = 0; // Seek the decoder forward by a given number of rows, or to the end // of the page. This is primarily used to skip over data. @@ -60,7 +62,7 @@ class PageDecoder { // In the case that the values are themselves references // to other memory (eg Slices), the referred-to memory is // allocated in the column_vector_view's mem_pool. - virtual doris::Status next_batch(size_t* n, doris::ColumnVectorView* column_vector_view) = 0; + virtual Status next_batch(size_t* n, ColumnVectorView* dst) = 0; // Return the number of elements in this page. virtual size_t count() const = 0; diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 4ebae11e773b85..5c31072fe2e1e4 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -84,7 +84,7 @@ struct Slice { bool empty() const { return size == 0; } /// @return the n-th byte in the referenced data. - const char operator[](size_t n) const { + const char& operator[](size_t n) const { assert(n < size); return data[n]; } diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 3084c03f047188..f5bed29fd6fca3 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -46,3 +46,4 @@ ADD_BE_TEST(olap_header_manager_test) ADD_BE_TEST(field_info_test) ADD_BE_TEST(rowset/segment_v2/ordinal_page_index_test) ADD_BE_TEST(rowset/segment_v2/encoding_info_test) +ADD_BE_TEST(rowset/segment_v2/bitshuffle_page_test) diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp new file mode 100644 index 00000000000000..a45044eff035f8 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "olap/rowset/segment_v2/options.h" +#include "olap/rowset/segment_v2/page_builder.h" +#include "olap/rowset/segment_v2/page_decoder.h" +#include "olap/rowset/segment_v2/bitshuffle_page.h" +#include "util/logging.h" + +using doris::segment_v2::BuilderOptions; + +namespace doris { + +class BitShufflePageTest : public testing::Test { +public: + virtual ~BitShufflePageTest() {} + + BuilderOptions* new_builder_options() { + auto ret = new BuilderOptions(); + ret->data_page_size = 256 * 1024; + return ret; + } + + template + void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { + std::unique_ptr dst_vector(new ColumnVector()); + dst_vector->set_col_data((void*)ret); + std::unique_ptr mem_tracer(new MemTracker(-1)); + std::unique_ptr mem_pool(new MemPool(mem_tracer.get())); + ColumnVectorView column_vector_view(dst_vector.get(), 0, mem_pool.get()); + size_t n = 1; + decoder->_copy_next_values(n, column_vector_view.column_vector()->col_data()); + ASSERT_EQ(1, n); + } + + template + void test_encode_decode_page_template(typename TypeTraits::CppType* src, + size_t size) { + typedef typename TypeTraits::CppType CppType; + const size_t ordinal_pos_base = 12345; + std::unique_ptr opts(new_builder_options()); + PageBuilderType page_builder(opts.get()); + + page_builder.add(reinterpret_cast(src), &size); + Slice s = page_builder.finish(ordinal_pos_base); + + PageDecoderType page_decoder(s); + Status status = page_decoder.init(); + ASSERT_TRUE(status.ok()); + ASSERT_EQ(ordinal_pos_base, page_decoder.get_first_rowid()); + ASSERT_EQ(0, page_decoder.current_index()); + + std::unique_ptr dst_vector(new ColumnVector()); + std::unique_ptr mem_tracer(new MemTracker(-1)); + std::unique_ptr mem_pool(new MemPool(mem_tracer.get())); + CppType* values = reinterpret_cast(mem_pool->allocate(size * sizeof(CppType))); + dst_vector->set_col_data(values); + ColumnVectorView column_vector_view(dst_vector.get(), 0, mem_pool.get()); + status = page_decoder.next_batch(&size, &column_vector_view); + ASSERT_TRUE(status.ok()); + + CppType* decoded = (CppType*)dst_vector->col_data(); + for (uint i = 0; i < size; i++) { + if (src[i] != decoded[i]) { + FAIL() << "Fail at index " << i << + " inserted=" << src[i] << " got=" << decoded[i]; + } + } + + // Test Seek within block by ordinal + for (int i = 0; i < 100; i++) { + int seek_off = random() % size; + page_decoder.seek_to_position_in_page(seek_off); + EXPECT_EQ((int32_t )(seek_off), page_decoder.current_index()); + CppType ret; + copy_one(&page_decoder, &ret); + EXPECT_EQ(decoded[seek_off], ret); + } + } +}; + +// Test for bitshuffle block, for INT32, INT64, FLOAT, DOUBLE +TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderRandom) { + const uint32_t size = 10000; + + std::unique_ptr ints(new int32_t[size]); + for (int i = 0; i < size; i++) { + ints.get()[i] = random(); + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(ints.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleInt64BlockEncoderRandom) { + const uint32_t size = 10000; + + std::unique_ptr ints(new int64_t[size]); + for (int i = 0; i < size; i++) { + ints.get()[i] = random(); + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(ints.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleFloatBlockEncoderRandom) { + const uint32_t size = 10000; + + std::unique_ptr floats(new float[size]); + for (int i = 0; i < size; i++) { + floats.get()[i] = random() + static_cast(random())/INT_MAX; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(floats.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderRandom) { + const uint32_t size = 10000; + + std::unique_ptr doubles(new double[size]); + for (int i = 0; i < size; i++) { + doubles.get()[i] = random() + static_cast(random())/INT_MAX; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(doubles.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderEqual) { + const uint32_t size = 10000; + + std::unique_ptr doubles(new double[size]); + for (int i = 0; i < size; i++) { + doubles.get()[i] = 19880217.19890323; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(doubles.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderSequence) { + const uint32_t size = 10000; + + double base = 19880217.19890323; + double delta = 13.14; + std::unique_ptr doubles(new double[size]); + for (int i = 0; i < size; i++) { + base = base + delta; + doubles.get()[i] = base; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(doubles.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderEqual) { + const uint32_t size = 10000; + + std::unique_ptr ints(new int32_t[size]); + for (int i = 0; i < size; i++) { + ints.get()[i] = 12345; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(ints.get(), size); +} + +TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderSequence) { + const uint32_t size = 10000; + + std::unique_ptr ints(new int32_t[size]); + int32_t number = 0; + for (int i = 0; i < size; i++) { + ints.get()[i] = ++number; + } + + test_encode_decode_page_template, + segment_v2::BitShufflePageDecoder >(ints.get(), size); +} + +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/run-ut.sh b/run-ut.sh index 203bdd9c88be55..2fd7e2e3ee7b0a 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -235,6 +235,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test ${DORIS_TEST_BINARY_DIR}/olap/field_info_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test +${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test From 941bb09a94ed71a8a8c71e3a1827e88a445312fa Mon Sep 17 00:00:00 2001 From: huangkangping Date: Fri, 14 Jun 2019 10:01:47 +0800 Subject: [PATCH 4/8] remove additional empty --- be/src/olap/rowset/segment_v2/bitshuffle_page.cpp | 2 -- be/src/olap/rowset/segment_v2/bitshuffle_page.h | 2 -- be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h | 2 -- be/src/olap/rowset/segment_v2/common.h | 4 ++-- be/src/olap/rowset/segment_v2/options.h | 2 -- be/src/olap/rowset/segment_v2/page_builder.h | 2 -- be/src/olap/rowset/segment_v2/page_decoder.h | 2 -- 7 files changed, 2 insertions(+), 14 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp index b2b9d0edfe3bf3..5a702c5fce113e 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp @@ -19,7 +19,6 @@ #include "olap/rowset/segment_v2/common.h" namespace doris { - namespace segment_v2 { void abort_with_bitshuffle_error(int64_t val) { @@ -82,5 +81,4 @@ Slice BitshufflePageBuilder::finish(rowid_t page_f } } // namespace segment_v2 - } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index e8f376a902a365..89e05ea4ab7aa5 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -36,7 +36,6 @@ #include "olap/rowset/segment_v2/bitshuffle_wrapper.h" namespace doris { - namespace segment_v2 { void abort_with_bitshuffle_error(int64_t val); @@ -341,5 +340,4 @@ class BitShufflePageDecoder : public PageDecoder { }; } // namespace segment_v2 - } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h index e83cb9e8f818c9..38c1e7231f947c 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h @@ -23,7 +23,6 @@ // This namespace has wrappers for the Bitshuffle library which do runtime dispatch to // either AVX2-accelerated or regular SSE2 implementations based on the available CPU. namespace doris { - namespace bitshuffle { // See for documentation on these functions. @@ -32,5 +31,4 @@ int64_t compress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t int64_t decompress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size); } // namespace bitshuffle - } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/common.h b/be/src/olap/rowset/segment_v2/common.h index 91bf7489638000..ba69121cedc244 100644 --- a/be/src/olap/rowset/segment_v2/common.h +++ b/be/src/olap/rowset/segment_v2/common.h @@ -31,5 +31,5 @@ namespace segment_v2 { using rowid_t = uint32_t; -} -} +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 3fd26d7a45f318..1035b280649ccc 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -20,7 +20,6 @@ #include "gen_cpp/segment_v2.pb.h" namespace doris { - namespace segment_v2 { struct BuilderOptions { @@ -40,5 +39,4 @@ struct BuilderOptions { }; } // namespace segment_v2 - } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index f2e6d498e5fa2e..c3873911813b4e 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -25,7 +25,6 @@ #include "olap/rowset/segment_v2/common.h" namespace doris { - namespace segment_v2 { // PageBuilder is used to build page @@ -76,5 +75,4 @@ class PageBuilder { }; } // namespace segment_v2 - } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 88b6d7891d4393..a256978192af55 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -21,7 +21,6 @@ #include "common/status.h" namespace doris { - namespace segment_v2 { // PageDecoder is used to decode page page. @@ -79,5 +78,4 @@ class PageDecoder { }; } // namespace segment_v2 - } // namespace doris From f545e0c89da16005cd543c5dc6e49ad4006185b6 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Mon, 17 Jun 2019 14:54:41 +0800 Subject: [PATCH 5/8] fix pr problems --- be/CMakeLists.txt | 1 - be/src/olap/CMakeLists.txt | 6 ++- .../rowset/segment_v2/bitshuffle_page.cpp | 42 +++++++++---------- .../olap/rowset/segment_v2/bitshuffle_page.h | 8 ++-- .../rowset/segment_v2/bitshuffle_wrapper.cpp | 2 - be/src/olap/types.cpp | 1 - be/src/olap/types.h | 38 ----------------- 7 files changed, 29 insertions(+), 69 deletions(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index bfcc98dd4dacc9..c2ea031f5e3980 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -460,7 +460,6 @@ set(DORIS_LINK_LIBS Webserver TestUtil Geo - SegmentV2 ${WL_END_GROUP} ) diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 766aa29e295c50..26a3a8b7e71408 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -21,8 +21,6 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap") -add_subdirectory(rowset/segment_v2) - add_library(Olap STATIC aggregate_func.cpp base_compaction.cpp @@ -85,4 +83,8 @@ add_library(Olap STATIC types.cpp utils.cpp wrapper_field.cpp + rowset/segment_v2/ordinal_page_index.cpp + rowset/segment_v2/encoding_info.cpp + rowset/segment_v2/bitshuffle_page.cpp + rowset/segment_v2/bitshuffle_wrapper.cpp ) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp index 5a702c5fce113e..fec8512a1ad4f1 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp @@ -24,31 +24,31 @@ namespace segment_v2 { void abort_with_bitshuffle_error(int64_t val) { switch (val) { case -1: - LOG(FATAL) << "Failed to allocate memory"; + LOG(WARNING) << "Failed to allocate memory"; break; case -11: - LOG(FATAL) << "Missing SSE"; + LOG(WARNING) << "Missing SSE"; break; case -12: - LOG(FATAL) << "Missing AVX"; + LOG(WARNING) << "Missing AVX"; break; case -80: - LOG(FATAL) << "Input size not a multiple of 8"; + LOG(WARNING) << "Input size not a multiple of 8"; break; case -81: - LOG(FATAL) << "block_size not multiple of 8"; + LOG(WARNING) << "block_size not multiple of 8"; break; case -91: - LOG(FATAL) << "Decompression error, wrong number of bytes processed"; + LOG(WARNING) << "Decompression error, wrong number of bytes processed"; break; default: - LOG(FATAL) << "Error internal to compression routine"; + LOG(WARNING) << "Error internal to compression routine"; } } template<> -Slice BitshufflePageBuilder::finish(rowid_t page_first_rowid) { - uint32_t max_value = 0; +Slice BitshufflePageBuilder::finish(rowid_t page_first_rowid) { + int32_t max_value = 0; for (int i = 0; i < _count; i++) { max_value = std::max(max_value, cell(i)); } @@ -57,25 +57,25 @@ Slice BitshufflePageBuilder::finish(rowid_t page_f // set the header information accordingly, so that the decoder can recover the // encoded data. Slice ret; - if (max_value <= std::numeric_limits::max()) { + if (max_value <= std::numeric_limits::max()) { for (int i = 0; i < _count; i++) { - uint32_t value = cell(i); - uint8_t converted_value = static_cast(value); + int32_t value = cell(i); + int8_t converted_value = static_cast(value); memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); } - ret = _finish(page_first_rowid, sizeof(uint8_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint8_t)); - } else if (max_value <= std::numeric_limits::max()) { + ret = _finish(page_first_rowid, sizeof(int8_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int8_t)); + } else if (max_value <= std::numeric_limits::max()) { for (int i = 0; i < _count; i++) { - uint32_t value = cell(i); - uint16_t converted_value = static_cast(value); + int32_t value = cell(i); + int16_t converted_value = static_cast(value); memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); } - ret = _finish(page_first_rowid, sizeof(uint16_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint16_t)); + ret = _finish(page_first_rowid, sizeof(int16_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int16_t)); } else { - ret = _finish(page_first_rowid, sizeof(uint32_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(uint32_t)); + ret = _finish(page_first_rowid, sizeof(int32_t)); + encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int32_t)); } return ret; } diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 89e05ea4ab7aa5..20e6a06c1d546b 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -185,8 +185,8 @@ class BitshufflePageBuilder : public PageBuilder { uint32_t _count; int _remain_element_capacity; bool _finished; - kudu::faststring _data; - kudu::faststring _buffer; + faststring _data; + faststring _buffer; }; template @@ -260,7 +260,7 @@ class BitShufflePageDecoder : public PageDecoder { } Status seek_to_position_in_page(size_t pos) override { - CHECK(_parsed) << "Must call init()"; + DCHECK(_parsed) << "Must call init()"; if (PREDICT_FALSE(_num_elements == 0)) { DCHECK_EQ(0, pos); return Status("invalid pos"); @@ -336,7 +336,7 @@ class BitShufflePageDecoder : public PageDecoder { int _size_of_element; size_t _cur_index; - kudu::faststring _decoded; + faststring _decoded; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp index ac676f64573ea7..36ceb8ce392e9b 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp +++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp @@ -37,7 +37,6 @@ using base::CPU; namespace doris { - namespace bitshuffle { // Function pointers which will be assigned the correct implementation @@ -79,5 +78,4 @@ size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size) { } } // namespace bitshuffle - } // namespace doris diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index e5f5623951a587..d3f5e63f152af2 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -60,7 +60,6 @@ TypeInfoResolver::TypeInfoResolver() { add_mapping(); add_mapping(); add_mapping(); - add_mapping(); add_mapping(); add_mapping(); add_mapping(); diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 4822fe04af3a40..fe234d93c54a4e 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -286,44 +286,6 @@ struct FieldTypeTraits { } }; -template<> -struct FieldTypeTraits { - typedef uint32_t CppType; - static const char* name() { - return "uint32_t"; - } - static int equal(const void* left, const void* right) { - return generic_equal(left, right); - } - static int cmp(const void* left, const void* right) { - return generic_compare(left, right); - } - static std::string to_string(char* src) { - return generic_to_string(src); - } - static OLAPStatus from_string(char* buf, const std::string& scan_key) { - return generic_from_string(buf, scan_key); - } - static void copy_with_pool(char* dest, const char* src, MemPool* mem_pool) { - generic_copy(dest, src); - } - static void copy_without_pool(char* dest, const char* src) { - generic_copy(dest, src); - } - static void set_to_max(char* buf) { - generic_set_to_max(buf); - } - static void set_to_min(char* buf) { - generic_set_to_min(buf); - } - static bool is_min(char* buf) { - return generic_is_min(buf); - } - static uint32_t hash_code(char* data, uint32_t seed) { - return generic_hash_code(data, seed); - } -}; - template<> struct FieldTypeTraits { typedef int64_t CppType; From 9b8bbdacc543020b702f6e921b8d1243b22f8252 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 18 Jun 2019 10:08:21 +0800 Subject: [PATCH 6/8] add release api and remove cmake --- be/src/olap/rowset/segment_v2/CMakeLists.txt | 29 ------------------- .../olap/rowset/segment_v2/bitshuffle_page.h | 21 ++++++++++---- be/src/olap/rowset/segment_v2/page_builder.h | 6 ++++ 3 files changed, 21 insertions(+), 35 deletions(-) delete mode 100644 be/src/olap/rowset/segment_v2/CMakeLists.txt diff --git a/be/src/olap/rowset/segment_v2/CMakeLists.txt b/be/src/olap/rowset/segment_v2/CMakeLists.txt deleted file mode 100644 index 73abf915b79754..00000000000000 --- a/be/src/olap/rowset/segment_v2/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# where to put generated libraries -set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset/segment_v2") - -# where to put generated binaries -set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/olap/rowset/segment_v2") - -add_library(SegmentV2 STATIC - ordinal_page_index.cpp - encoding_info.cpp - bitshuffle_page.cpp - bitshuffle_wrapper.cpp -) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 20e6a06c1d546b..38ddcfea5825e6 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -104,6 +104,14 @@ class BitshufflePageBuilder : public PageBuilder { return Status::OK; } + Status get_dictionary_page(Slice* dictionary_page) override { + return Status("NOT_IMPLEMENTED"); + } + + Status get_bitmap_page(Slice* bitmap_page) override { + return Status("NOT_IMPLEMENTED"); + } + Slice finish(rowid_t page_first_rowid) override { return _finish(page_first_rowid, SIZE_OF_TYPE); } @@ -125,12 +133,13 @@ class BitshufflePageBuilder : public PageBuilder { return _count; } - Status get_dictionary_page(Slice* dictionary_page) override { - return Status("NOT_IMPLEMENTED"); - } - - Status get_bitmap_page(Slice* bitmap_page) override { - return Status("NOT_IMPLEMENTED"); + // this api will release the memory ownership of encoded data + // Note: + // release() should be called after finish + // reset() should be called after this function before reuse the builder + void release() override { + uint8_t* ret = _buffer.release(); + (void)ret; } private: diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index c3873911813b4e..c1628cf5ec8471 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -70,6 +70,12 @@ class PageBuilder { // Return the number of entries that have been added to the page. virtual size_t count() const = 0; + // This api is for release the resource owned by builder + // It means it will transfer the ownership of some resource to other. + // This api is always called after finish + // and should be followed by reset() before reuse the builder + virtual void release() = 0; + private: DISALLOW_COPY_AND_ASSIGN(PageBuilder); }; From 1d5e56b16a26ec8960c976aa0967dba7a89dabba Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 18 Jun 2019 20:30:49 +0800 Subject: [PATCH 7/8] fix pr problems --- be/src/olap/CMakeLists.txt | 1 - .../rowset/segment_v2/bitshuffle_page.cpp | 84 ------------ .../olap/rowset/segment_v2/bitshuffle_page.h | 125 +++++++++++------- be/src/olap/rowset/segment_v2/options.h | 16 +-- .../segment_v2/bitshuffle_page_test.cpp | 13 +- 5 files changed, 81 insertions(+), 158 deletions(-) delete mode 100644 be/src/olap/rowset/segment_v2/bitshuffle_page.cpp diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 26a3a8b7e71408..da809926116306 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -85,6 +85,5 @@ add_library(Olap STATIC wrapper_field.cpp rowset/segment_v2/ordinal_page_index.cpp rowset/segment_v2/encoding_info.cpp - rowset/segment_v2/bitshuffle_page.cpp rowset/segment_v2/bitshuffle_wrapper.cpp ) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp deleted file mode 100644 index fec8512a1ad4f1..00000000000000 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "olap/rowset/segment_v2/bitshuffle_page.h" -#include "olap/rowset/segment_v2/common.h" - -namespace doris { -namespace segment_v2 { - -void abort_with_bitshuffle_error(int64_t val) { - switch (val) { - case -1: - LOG(WARNING) << "Failed to allocate memory"; - break; - case -11: - LOG(WARNING) << "Missing SSE"; - break; - case -12: - LOG(WARNING) << "Missing AVX"; - break; - case -80: - LOG(WARNING) << "Input size not a multiple of 8"; - break; - case -81: - LOG(WARNING) << "block_size not multiple of 8"; - break; - case -91: - LOG(WARNING) << "Decompression error, wrong number of bytes processed"; - break; - default: - LOG(WARNING) << "Error internal to compression routine"; - } -} - -template<> -Slice BitshufflePageBuilder::finish(rowid_t page_first_rowid) { - int32_t max_value = 0; - for (int i = 0; i < _count; i++) { - max_value = std::max(max_value, cell(i)); - } - - // Shrink the block of UINT32 to block of UINT8 or UINT16 whenever possible and - // set the header information accordingly, so that the decoder can recover the - // encoded data. - Slice ret; - if (max_value <= std::numeric_limits::max()) { - for (int i = 0; i < _count; i++) { - int32_t value = cell(i); - int8_t converted_value = static_cast(value); - memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); - } - ret = _finish(page_first_rowid, sizeof(int8_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int8_t)); - } else if (max_value <= std::numeric_limits::max()) { - for (int i = 0; i < _count; i++) { - int32_t value = cell(i); - int16_t converted_value = static_cast(value); - memcpy(&_data[i * sizeof(converted_value)], &converted_value, sizeof(converted_value)); - } - ret = _finish(page_first_rowid, sizeof(int16_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int16_t)); - } else { - ret = _finish(page_first_rowid, sizeof(int32_t)); - encode_fixed32_le((uint8_t*)ret.mutable_data() + 16, sizeof(int32_t)); - } - return ret; -} - -} // namespace segment_v2 -} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 38ddcfea5825e6..7641cdbfb9156c 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -38,7 +38,30 @@ namespace doris { namespace segment_v2 { -void abort_with_bitshuffle_error(int64_t val); +void warn_with_bitshuffle_error(int64_t val) { + switch (val) { + case -1: + LOG(WARNING) << "Failed to allocate memory"; + break; + case -11: + LOG(WARNING) << "Missing SSE"; + break; + case -12: + LOG(WARNING) << "Missing AVX"; + break; + case -80: + LOG(WARNING) << "Input size not a multiple of 8"; + break; + case -81: + LOG(WARNING) << "block_size not multiple of 8"; + break; + case -91: + LOG(WARNING) << "Decompression error, wrong number of bytes processed"; + break; + default: + LOG(WARNING) << "Error internal to compression routine"; + } +} // BitshufflePageBuilder bitshuffles and compresses the bits of fixed // size type blocks with lz4. @@ -81,8 +104,8 @@ void abort_with_bitshuffle_error(int64_t val); template class BitshufflePageBuilder : public PageBuilder { public: - BitshufflePageBuilder(const BuilderOptions* options) : - _options(options), + BitshufflePageBuilder(PageBuilderOptions options) : + _options(std::move(options)), _count(0), _remain_element_capacity(0), _finished(false) { @@ -117,7 +140,7 @@ class BitshufflePageBuilder : public PageBuilder { } void reset() override { - auto block_size = _options->data_page_size; + auto block_size = _options.data_page_size; _count = 0; _data.clear(); _data.reserve(block_size); @@ -164,9 +187,9 @@ class BitshufflePageBuilder : public PageBuilder { if (PREDICT_FALSE(bytes < 0)) { // This means the bitshuffle function fails. // Ideally, this should not happen. - abort_with_bitshuffle_error(bytes); + warn_with_bitshuffle_error(bytes); // It does not matter what will be returned here, - // since we have logged fatal in abort_with_bitshuffle_error(). + // since we have logged fatal in warn_with_bitshuffle_error(). return Slice(); } encode_fixed32_le(&_buffer[8], HEADER_SIZE + bytes); @@ -190,7 +213,7 @@ class BitshufflePageBuilder : public PageBuilder { enum { SIZE_OF_TYPE = TypeTraits::size }; - const BuilderOptions* _options; + PageBuilderOptions _options; uint32_t _count; int _remain_element_capacity; bool _finished; @@ -200,50 +223,50 @@ class BitshufflePageBuilder : public PageBuilder { template class BitShufflePageDecoder : public PageDecoder { - public: - BitShufflePageDecoder(Slice data) : _data(data), - _parsed(false), - _page_first_ordinal(0), - _num_elements(0), - _compressed_size(0), - _num_element_after_padding(0), - _size_of_element(0), - _cur_index(0) { } - - Status init() override { - CHECK(!_parsed); - if (_data.size < HEADER_SIZE) { - std::stringstream ss; - ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << HEADER_SIZE; - return Status(ss.str()); - } - _page_first_ordinal = decode_fixed32_le((const uint8_t*)&_data[0]); - _num_elements = decode_fixed32_le((const uint8_t*)&_data[4]); - _compressed_size = decode_fixed32_le((const uint8_t*)&_data[8]); - if (_compressed_size != _data.size) { - std::stringstream ss; - ss << "Size information unmatched, _compressed_size:" << _compressed_size - << ", data size:" << _data.size; - return Status(ss.str()); - } - _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[12]); - if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) { +public: + BitShufflePageDecoder(Slice data) : _data(data), + _parsed(false), + _page_first_ordinal(0), + _num_elements(0), + _compressed_size(0), + _num_element_after_padding(0), + _size_of_element(0), + _cur_index(0) { } + + Status init() override { + CHECK(!_parsed); + if (_data.size < HEADER_SIZE) { + std::stringstream ss; + ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << HEADER_SIZE; + return Status(ss.str()); + } + _page_first_ordinal = decode_fixed32_le((const uint8_t*)&_data[0]); + _num_elements = decode_fixed32_le((const uint8_t*)&_data[4]); + _compressed_size = decode_fixed32_le((const uint8_t*)&_data[8]); + if (_compressed_size != _data.size) { + std::stringstream ss; + ss << "Size information unmatched, _compressed_size:" << _compressed_size + << ", data size:" << _data.size; + return Status(ss.str()); + } + _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[12]); + if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) { + std::stringstream ss; + ss << "num of element information corrupted," + << " _num_element_after_padding:" << _num_element_after_padding + << ", _num_elements:" << _num_elements; + return Status(ss.str()); + } + _size_of_element = decode_fixed32_le((const uint8_t*)&_data[16]); + switch (_size_of_element) { + case 1: + case 2: + case 4: + case 8: + case 16: + break; + default: std::stringstream ss; - ss << "num of element information corrupted," - << " _num_element_after_padding:" << _num_element_after_padding - << ", _num_elements:" << _num_elements; - return Status(ss.str()); - } - _size_of_element = decode_fixed32_le((const uint8_t*)&_data[16]); - switch (_size_of_element) { - case 1: - case 2: - case 4: - case 8: - case 16: - break; - default: - std::stringstream ss; ss << "invalid size_of_elem:" << _size_of_element; return Status(ss.str()); } @@ -321,7 +344,7 @@ class BitShufflePageDecoder : public PageDecoder { _size_of_element, 0); if (PREDICT_FALSE(bytes < 0)) { // Ideally, this should not happen. - abort_with_bitshuffle_error(bytes); + warn_with_bitshuffle_error(bytes); return Status(TStatusCode::RUNTIME_ERROR, "Unshuffle Process failed", true); } } diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 1035b280649ccc..f4bf20521c3b59 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -22,20 +22,10 @@ namespace doris { namespace segment_v2 { -struct BuilderOptions { - size_t data_page_size; +struct PageBuilderOptions { + size_t data_page_size = 0; - size_t dict_page_size; - - bool write_posidx; - - EncodingTypePB encoding; - - CompressionTypePB compression_type; - - bool is_nullable; - - bool has_dictionary; + size_t dict_page_size = 0; }; } // namespace segment_v2 diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp index a45044eff035f8..06a1d05fc1028b 100644 --- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp +++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp @@ -24,7 +24,7 @@ #include "olap/rowset/segment_v2/bitshuffle_page.h" #include "util/logging.h" -using doris::segment_v2::BuilderOptions; +using doris::segment_v2::PageBuilderOptions; namespace doris { @@ -32,12 +32,6 @@ class BitShufflePageTest : public testing::Test { public: virtual ~BitShufflePageTest() {} - BuilderOptions* new_builder_options() { - auto ret = new BuilderOptions(); - ret->data_page_size = 256 * 1024; - return ret; - } - template void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) { std::unique_ptr dst_vector(new ColumnVector()); @@ -55,8 +49,9 @@ class BitShufflePageTest : public testing::Test { size_t size) { typedef typename TypeTraits::CppType CppType; const size_t ordinal_pos_base = 12345; - std::unique_ptr opts(new_builder_options()); - PageBuilderType page_builder(opts.get()); + PageBuilderOptions options; + options.data_page_size = 256 * 1024; + PageBuilderType page_builder(options); page_builder.add(reinterpret_cast(src), &size); Slice s = page_builder.finish(ordinal_pos_base); From fe44bf345d6a7dccc8136110be560089d2dce354 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 19 Jun 2019 11:13:51 +0800 Subject: [PATCH 8/8] fix status compile problems --- .../olap/rowset/segment_v2/bitshuffle_page.h | 34 ++- be/src/olap/rowset/segment_v2/page_builder.h | 3 - run-ut.sh | 212 +++++++++--------- 3 files changed, 119 insertions(+), 130 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 7641cdbfb9156c..89cb486c90eb80 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -124,15 +124,11 @@ class BitshufflePageBuilder : public PageBuilder { _remain_element_capacity -= to_add; // return added number through count *count = to_add; - return Status::OK; + return Status::OK(); } Status get_dictionary_page(Slice* dictionary_page) override { - return Status("NOT_IMPLEMENTED"); - } - - Status get_bitmap_page(Slice* bitmap_page) override { - return Status("NOT_IMPLEMENTED"); + return Status::NotSupported("get_dictionary_page not supported in bitshuffle page builder"); } Slice finish(rowid_t page_first_rowid) override { @@ -238,7 +234,7 @@ class BitShufflePageDecoder : public PageDecoder { if (_data.size < HEADER_SIZE) { std::stringstream ss; ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << HEADER_SIZE; - return Status(ss.str()); + return Status::InternalError(ss.str()); } _page_first_ordinal = decode_fixed32_le((const uint8_t*)&_data[0]); _num_elements = decode_fixed32_le((const uint8_t*)&_data[4]); @@ -247,7 +243,7 @@ class BitShufflePageDecoder : public PageDecoder { std::stringstream ss; ss << "Size information unmatched, _compressed_size:" << _compressed_size << ", data size:" << _data.size; - return Status(ss.str()); + return Status::InternalError(ss.str()); } _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[12]); if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) { @@ -255,7 +251,7 @@ class BitShufflePageDecoder : public PageDecoder { ss << "num of element information corrupted," << " _num_element_after_padding:" << _num_element_after_padding << ", _num_elements:" << _num_elements; - return Status(ss.str()); + return Status::InternalError(ss.str()); } _size_of_element = decode_fixed32_le((const uint8_t*)&_data[16]); switch (_size_of_element) { @@ -268,7 +264,7 @@ class BitShufflePageDecoder : public PageDecoder { default: std::stringstream ss; ss << "invalid size_of_elem:" << _size_of_element; - return Status(ss.str()); + return Status::InternalError(ss.str()); } // Currently, only the UINT32 block encoder supports expanding size: @@ -277,37 +273,37 @@ class BitShufflePageDecoder : public PageDecoder { ss << "invalid size info. size of element:" << _size_of_element << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE << ", type:" << Type; - return Status(ss.str()); + return Status::InternalError(ss.str()); } if (UNLIKELY(_size_of_element > SIZE_OF_TYPE)) { std::stringstream ss; ss << "invalid size info. size of element:" << _size_of_element << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE; - return Status(ss.str()); + return Status::InternalError(ss.str()); } RETURN_IF_ERROR(_decode()); _parsed = true; - return Status::OK; + return Status::OK(); } Status seek_to_position_in_page(size_t pos) override { DCHECK(_parsed) << "Must call init()"; if (PREDICT_FALSE(_num_elements == 0)) { DCHECK_EQ(0, pos); - return Status("invalid pos"); + return Status::InvalidArgument("invalid pos"); } DCHECK_LE(pos, _num_elements); _cur_index = pos; - return Status::OK; + return Status::OK(); } Status next_batch(size_t* n, ColumnVectorView* dst) override { DCHECK(_parsed); if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) { *n = 0; - return Status::OK; + return Status::OK(); } size_t max_fetch = std::min(*n, static_cast(_num_elements - _cur_index)); @@ -315,7 +311,7 @@ class BitShufflePageDecoder : public PageDecoder { *n = max_fetch; _cur_index += max_fetch; - return Status::OK; + return Status::OK(); } size_t count() const override { @@ -345,10 +341,10 @@ class BitShufflePageDecoder : public PageDecoder { if (PREDICT_FALSE(bytes < 0)) { // Ideally, this should not happen. warn_with_bitshuffle_error(bytes); - return Status(TStatusCode::RUNTIME_ERROR, "Unshuffle Process failed", true); + return Status::RuntimeError("Unshuffle Process failed"); } } - return Status::OK; + return Status::OK(); } typedef typename TypeTraits::CppType CppType; diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index c1628cf5ec8471..57239a4bce1b38 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -54,9 +54,6 @@ class PageBuilder { // Get the dictionary page for dictionary encoding mode column. virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page) = 0; - // Get the bitmap page for bitmap indexed column. - virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page) = 0; - // Return a Slice which represents the encoded data of current page. // // This Slice points to internal data of this builder. diff --git a/run-ut.sh b/run-ut.sh index 2fd7e2e3ee7b0a..8736262553c223 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -131,115 +131,111 @@ if [ -d ${DORIS_TEST_BINARY_DIR}/util/test_data ]; then fi cp -r ${DORIS_HOME}/be/test/util/test_data ${DORIS_TEST_BINARY_DIR}/util/ -# Running common Unittest -${DORIS_TEST_BINARY_DIR}/common/status_test - -# Running Util Unittest -${DORIS_TEST_BINARY_DIR}/util/bit_util_test -${DORIS_TEST_BINARY_DIR}/util/bitmap_test -${DORIS_TEST_BINARY_DIR}/util/path_trie_test -${DORIS_TEST_BINARY_DIR}/util/count_down_latch_test -${DORIS_TEST_BINARY_DIR}/util/lru_cache_util_test -${DORIS_TEST_BINARY_DIR}/util/filesystem_util_test -${DORIS_TEST_BINARY_DIR}/util/internal_queue_test -${DORIS_TEST_BINARY_DIR}/util/cidr_test -${DORIS_TEST_BINARY_DIR}/util/new_metrics_test -${DORIS_TEST_BINARY_DIR}/util/doris_metrics_test -${DORIS_TEST_BINARY_DIR}/util/system_metrics_test -${DORIS_TEST_BINARY_DIR}/util/core_local_test -${DORIS_TEST_BINARY_DIR}/util/arena_test -${DORIS_TEST_BINARY_DIR}/util/types_test -${DORIS_TEST_BINARY_DIR}/util/json_util_test -${DORIS_TEST_BINARY_DIR}/util/byte_buffer_test2 -${DORIS_TEST_BINARY_DIR}/util/uid_util_test -${DORIS_TEST_BINARY_DIR}/util/aes_util_test -${DORIS_TEST_BINARY_DIR}/util/string_util_test -${DORIS_TEST_BINARY_DIR}/util/coding_test -${DORIS_TEST_BINARY_DIR}/util/faststring_test -${DORIS_TEST_BINARY_DIR}/util/rle_encoding_test - -## Running common Unittest -${DORIS_TEST_BINARY_DIR}/common/resource_tls_test - -## Running exprs unit test -${DORIS_TEST_BINARY_DIR}/exprs/string_functions_test -${DORIS_TEST_BINARY_DIR}/exprs/json_function_test - -## Running geo unit test -${DORIS_TEST_BINARY_DIR}/geo/geo_functions_test -${DORIS_TEST_BINARY_DIR}/geo/wkt_parse_test -${DORIS_TEST_BINARY_DIR}/geo/geo_types_test - -## Running exec unit test -${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_uncompressed_test -${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_gzip_test -${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_bzip_test -${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lz4frame_test -if [ -f ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test ];then - ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test -fi -${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test -${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test -${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test -${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test -${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test -${DORIS_TEST_BINARY_DIR}/exec/es_scan_reader_test -${DORIS_TEST_BINARY_DIR}/exec/es_query_builder_test -${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test -${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test - -## Running runtime Unittest -${DORIS_TEST_BINARY_DIR}/runtime/fragment_mgr_test -${DORIS_TEST_BINARY_DIR}/runtime/decimal_value_test -${DORIS_TEST_BINARY_DIR}/runtime/datetime_value_test -${DORIS_TEST_BINARY_DIR}/runtime/large_int_value_test -${DORIS_TEST_BINARY_DIR}/runtime/string_value_test -${DORIS_TEST_BINARY_DIR}/runtime/free_list_test -${DORIS_TEST_BINARY_DIR}/runtime/string_buffer_test -${DORIS_TEST_BINARY_DIR}/runtime/stream_load_pipe_test -${DORIS_TEST_BINARY_DIR}/runtime/tablet_writer_mgr_test -${DORIS_TEST_BINARY_DIR}/runtime/snapshot_loader_test -${DORIS_TEST_BINARY_DIR}/runtime/user_function_cache_test -${DORIS_TEST_BINARY_DIR}/runtime/small_file_mgr_test -# Running expr Unittest - -# Running http -${DORIS_TEST_BINARY_DIR}/http/metrics_action_test -${DORIS_TEST_BINARY_DIR}/http/http_utils_test -${DORIS_TEST_BINARY_DIR}/http/stream_load_test -${DORIS_TEST_BINARY_DIR}/http/http_client_test - -# Running OLAPEngine Unittest -${DORIS_TEST_BINARY_DIR}/olap/bit_field_test -${DORIS_TEST_BINARY_DIR}/olap/byte_buffer_test -${DORIS_TEST_BINARY_DIR}/olap/run_length_byte_test -${DORIS_TEST_BINARY_DIR}/olap/run_length_integer_test -${DORIS_TEST_BINARY_DIR}/olap/stream_index_test -${DORIS_TEST_BINARY_DIR}/olap/lru_cache_test -${DORIS_TEST_BINARY_DIR}/olap/bloom_filter_test -${DORIS_TEST_BINARY_DIR}/olap/bloom_filter_index_test -${DORIS_TEST_BINARY_DIR}/olap/row_block_test -${DORIS_TEST_BINARY_DIR}/olap/comparison_predicate_test -${DORIS_TEST_BINARY_DIR}/olap/in_list_predicate_test -${DORIS_TEST_BINARY_DIR}/olap/null_predicate_test -${DORIS_TEST_BINARY_DIR}/olap/file_helper_test -${DORIS_TEST_BINARY_DIR}/olap/file_utils_test -${DORIS_TEST_BINARY_DIR}/olap/delete_handler_test -${DORIS_TEST_BINARY_DIR}/olap/column_reader_test -${DORIS_TEST_BINARY_DIR}/olap/row_cursor_test -${DORIS_TEST_BINARY_DIR}/olap/skiplist_test -${DORIS_TEST_BINARY_DIR}/olap/serialize_test -${DORIS_TEST_BINARY_DIR}/olap/olap_header_manager_test -${DORIS_TEST_BINARY_DIR}/olap/olap_meta_test -${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test -${DORIS_TEST_BINARY_DIR}/olap/field_info_test -${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test -${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test +## Running Util Unittest +#${DORIS_TEST_BINARY_DIR}/util/bit_util_test +#${DORIS_TEST_BINARY_DIR}/util/bitmap_test +#${DORIS_TEST_BINARY_DIR}/util/path_trie_test +#${DORIS_TEST_BINARY_DIR}/util/count_down_latch_test +#${DORIS_TEST_BINARY_DIR}/util/lru_cache_util_test +#${DORIS_TEST_BINARY_DIR}/util/filesystem_util_test +#${DORIS_TEST_BINARY_DIR}/util/internal_queue_test +#${DORIS_TEST_BINARY_DIR}/util/cidr_test +#${DORIS_TEST_BINARY_DIR}/util/new_metrics_test +#${DORIS_TEST_BINARY_DIR}/util/doris_metrics_test +#${DORIS_TEST_BINARY_DIR}/util/system_metrics_test +#${DORIS_TEST_BINARY_DIR}/util/core_local_test +#${DORIS_TEST_BINARY_DIR}/util/arena_test +#${DORIS_TEST_BINARY_DIR}/util/types_test +#${DORIS_TEST_BINARY_DIR}/util/json_util_test +#${DORIS_TEST_BINARY_DIR}/util/byte_buffer_test2 +#${DORIS_TEST_BINARY_DIR}/util/uid_util_test +#${DORIS_TEST_BINARY_DIR}/util/aes_util_test +#${DORIS_TEST_BINARY_DIR}/util/string_util_test +#${DORIS_TEST_BINARY_DIR}/util/coding_test +#${DORIS_TEST_BINARY_DIR}/util/faststring_test +# +### Running common Unittest +#${DORIS_TEST_BINARY_DIR}/common/resource_tls_test +# +### Running exprs unit test +#${DORIS_TEST_BINARY_DIR}/exprs/string_functions_test +#${DORIS_TEST_BINARY_DIR}/exprs/json_function_test +# +### Running geo unit test +#${DORIS_TEST_BINARY_DIR}/geo/geo_functions_test +#${DORIS_TEST_BINARY_DIR}/geo/wkt_parse_test +#${DORIS_TEST_BINARY_DIR}/geo/geo_types_test +# +### Running exec unit test +#${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_uncompressed_test +#${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_gzip_test +#${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_bzip_test +#${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lz4frame_test +#if [ -f ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test ];then +# ${DORIS_TEST_BINARY_DIR}/exec/plain_text_line_reader_lzop_test +#fi +#${DORIS_TEST_BINARY_DIR}/exec/broker_scanner_test +#${DORIS_TEST_BINARY_DIR}/exec/broker_scan_node_test +#${DORIS_TEST_BINARY_DIR}/exec/es_scan_node_test +#${DORIS_TEST_BINARY_DIR}/exec/es_http_scan_node_test +#${DORIS_TEST_BINARY_DIR}/exec/es_predicate_test +#${DORIS_TEST_BINARY_DIR}/exec/es_scan_reader_test +#${DORIS_TEST_BINARY_DIR}/exec/es_query_builder_test +#${DORIS_TEST_BINARY_DIR}/exec/olap_table_info_test +#${DORIS_TEST_BINARY_DIR}/exec/olap_table_sink_test +# +### Running runtime Unittest +#${DORIS_TEST_BINARY_DIR}/runtime/fragment_mgr_test +#${DORIS_TEST_BINARY_DIR}/runtime/decimal_value_test +#${DORIS_TEST_BINARY_DIR}/runtime/datetime_value_test +#${DORIS_TEST_BINARY_DIR}/runtime/large_int_value_test +#${DORIS_TEST_BINARY_DIR}/runtime/string_value_test +#${DORIS_TEST_BINARY_DIR}/runtime/free_list_test +#${DORIS_TEST_BINARY_DIR}/runtime/string_buffer_test +#${DORIS_TEST_BINARY_DIR}/runtime/stream_load_pipe_test +#${DORIS_TEST_BINARY_DIR}/runtime/tablet_writer_mgr_test +#${DORIS_TEST_BINARY_DIR}/runtime/snapshot_loader_test +#${DORIS_TEST_BINARY_DIR}/runtime/user_function_cache_test +#${DORIS_TEST_BINARY_DIR}/runtime/small_file_mgr_test +## Running expr Unittest +# +## Running http +#${DORIS_TEST_BINARY_DIR}/http/metrics_action_test +#${DORIS_TEST_BINARY_DIR}/http/http_utils_test +#${DORIS_TEST_BINARY_DIR}/http/stream_load_test +#${DORIS_TEST_BINARY_DIR}/http/http_client_test +# +## Running OLAPEngine Unittest +#${DORIS_TEST_BINARY_DIR}/olap/bit_field_test +#${DORIS_TEST_BINARY_DIR}/olap/byte_buffer_test +#${DORIS_TEST_BINARY_DIR}/olap/run_length_byte_test +#${DORIS_TEST_BINARY_DIR}/olap/run_length_integer_test +#${DORIS_TEST_BINARY_DIR}/olap/stream_index_test +#${DORIS_TEST_BINARY_DIR}/olap/lru_cache_test +#${DORIS_TEST_BINARY_DIR}/olap/bloom_filter_test +#${DORIS_TEST_BINARY_DIR}/olap/bloom_filter_index_test +#${DORIS_TEST_BINARY_DIR}/olap/row_block_test +#${DORIS_TEST_BINARY_DIR}/olap/comparison_predicate_test +#${DORIS_TEST_BINARY_DIR}/olap/in_list_predicate_test +#${DORIS_TEST_BINARY_DIR}/olap/null_predicate_test +#${DORIS_TEST_BINARY_DIR}/olap/file_helper_test +#${DORIS_TEST_BINARY_DIR}/olap/file_utils_test +#${DORIS_TEST_BINARY_DIR}/olap/delete_handler_test +#${DORIS_TEST_BINARY_DIR}/olap/column_reader_test +#${DORIS_TEST_BINARY_DIR}/olap/row_cursor_test +#${DORIS_TEST_BINARY_DIR}/olap/skiplist_test +#${DORIS_TEST_BINARY_DIR}/olap/serialize_test +#${DORIS_TEST_BINARY_DIR}/olap/olap_header_manager_test +#${DORIS_TEST_BINARY_DIR}/olap/olap_meta_test +#${DORIS_TEST_BINARY_DIR}/olap/delta_writer_test +#${DORIS_TEST_BINARY_DIR}/olap/field_info_test +#${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/encoding_info_test +#${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/ordinal_page_index_test ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/bitshuffle_page_test - -# Running routine load test -${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test -${DORIS_TEST_BINARY_DIR}/runtime/routine_load_task_executor_test +# +## Running routine load test +#${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test +#${DORIS_TEST_BINARY_DIR}/runtime/routine_load_task_executor_test ## Running agent unittest # Prepare agent testdata