From 5086f71c12521bb78c384138a4a05bbbd6d09c27 Mon Sep 17 00:00:00 2001 From: zhaochun Date: Thu, 1 Aug 2019 16:36:11 +0800 Subject: [PATCH] Add new format short key index In this patch, we create a new format for short key index. In orgin code index is stored in format like RowCusor which is not effecient to compare. Now we encode multiple column into binary, and we assure that this binary is sorted same with the key columns. --- be/src/gutil/endian.h | 153 ++++++++++++++ be/src/olap/CMakeLists.txt | 2 + be/src/olap/key_coder.cpp | 80 +++++++ be/src/olap/key_coder.h | 221 ++++++++++++++++++++ be/src/olap/short_key_index.cpp | 116 +++++++++++ be/src/olap/short_key_index.h | 197 ++++++++++++++++++ be/src/olap/types.h | 10 + be/src/olap/uint24.h | 13 +- be/src/util/CMakeLists.txt | 1 + be/src/util/slice.cpp | 28 +++ be/src/util/slice.h | 4 + be/test/olap/CMakeLists.txt | 2 + be/test/olap/key_coder_test.cpp | 287 ++++++++++++++++++++++++++ be/test/olap/short_key_index_test.cpp | 97 +++++++++ gensrc/proto/segment_v2.proto | 18 ++ run-ut.sh | 2 + 16 files changed, 1228 insertions(+), 3 deletions(-) create mode 100644 be/src/olap/key_coder.cpp create mode 100644 be/src/olap/key_coder.h create mode 100644 be/src/olap/short_key_index.cpp create mode 100644 be/src/olap/short_key_index.h create mode 100644 be/src/util/slice.cpp create mode 100644 be/test/olap/key_coder_test.cpp create mode 100644 be/test/olap/short_key_index_test.cpp diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h index f6b2485b6d8bab..6b8a0bc772d414 100644 --- a/be/src/gutil/endian.h +++ b/be/src/gutil/endian.h @@ -32,6 +32,18 @@ inline uint64 gbswap_64(uint64 host_int) { #endif // bswap_64 } +inline unsigned __int128 gbswap_128(unsigned __int128 host_int) { + return static_cast(bswap_64(static_cast(host_int >> 64))) | + (static_cast(bswap_64(static_cast(host_int))) << 64); +} + +// Swap bytes of a 24-bit value. +inline uint32_t bswap_24(uint32_t x) { + return ((x & 0x0000ffULL) << 16) | + ((x & 0x00ff00ULL)) | + ((x & 0xff0000ULL) >> 16); +} + #ifdef IS_LITTLE_ENDIAN // Definitions for ntohl etc. that don't require us to include @@ -188,4 +200,145 @@ class LittleEndian { #define gntohll(x) ghtonll(x) #define ntohll(x) htonll(x) +// Utilities to convert numbers between the current hosts's native byte +// order and big-endian byte order (same as network byte order) +// +// Load/Store methods are alignment safe +class BigEndian { +public: +#ifdef IS_LITTLE_ENDIAN + + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } + + static uint32 FromHost24(uint32 x) { return bswap_24(x); } + static uint32 ToHost24(uint32 x) { return bswap_24(x); } + + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } + + static uint64 FromHost64(uint64 x) { return gbswap_64(x); } + static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + + static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } + + static bool IsLittleEndian() { return true; } + +#elif defined IS_BIG_ENDIAN + + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } + + static uint32 FromHost24(uint32 x) { return x; } + static uint32 ToHost24(uint32 x) { return x; } + + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } + + static uint64 FromHost64(uint64 x) { return x; } + static uint64 ToHost64(uint64 x) { return x; } + + static uint128 FromHost128(uint128 x) { return x; } + static uint128 ToHost128(uint128 x) { return x; } + + static bool IsLittleEndian() { return false; } + +#endif /* ENDIAN */ + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } + + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } + + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } + + static uint64 Load64(const void *p) { + return ToHost64(UNALIGNED_LOAD64(p)); + } + + // Build a uint64 from 1-8 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 64 - 8 * len most significant bits are + // set all to 0. + // In latex-friendly words, this function returns: + // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. + // + // This function is equivalent with: + // uint64 val = 0; + // memcpy(&val, p, len); + // return ToHost64(val); + // TODO(user): write a small benchmark and benchmark the speed + // of a memcpy based approach. + // + // For speed reasons this function does not work for len == 0. + // The caller needs to guarantee that 1 <= len <= 8. + static uint64 Load64VariableLength(const void * const p, int len) { + assert(len >= 1 && len <= 8); + uint64 val = Load64(p); + uint64 mask = 0; + --len; + do { + mask = (mask << 8) | 0xff; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + return val & mask; + } + + static void Store64(void *p, uint64 v) { + UNALIGNED_STORE64(p, FromHost64(v)); + } + + static uint128 Load128(const void *p) { + return uint128( + ToHost64(UNALIGNED_LOAD64(p)), + ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); + } + + static void Store128(void *p, const uint128 v) { + UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); + UNALIGNED_STORE64(reinterpret_cast(p) + 1, + FromHost64(Uint128Low64(v))); + } + + // Build a uint128 from 1-16 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 128 - 8 * len most significant bits are + // set all to 0. + static uint128 Load128VariableLength(const void *p, int len) { + if (len <= 8) { + return uint128(Load64VariableLength(static_cast(p)+8, + len)); + } else { + return uint128( + Load64VariableLength(p, len-8), + Load64(static_cast(p)+8)); + } + } + + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void *p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } + + static void StoreUnsignedWord(void *p, uword_t v) { + if (sizeof(uword_t) == 8) + Store64(p, v); + else + Store32(p, v); + } +}; // BigEndian + #endif // UTIL_ENDIAN_ENDIAN_H_ diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index c494c8d0f6c6a6..6f45285f26ddaf 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(Olap STATIC hll.cpp in_list_predicate.cpp in_stream.cpp + key_coder.cpp lru_cache.cpp memtable.cpp merger.cpp @@ -63,6 +64,7 @@ add_library(Olap STATIC serialize.cpp storage_engine.cpp data_dir.cpp + short_key_index.cpp snapshot_manager.cpp stream_index_common.cpp stream_index_reader.cpp diff --git a/be/src/olap/key_coder.cpp b/be/src/olap/key_coder.cpp new file mode 100644 index 00000000000000..68f2ace8961bcb --- /dev/null +++ b/be/src/olap/key_coder.cpp @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/key_coder.h" + +#include + +namespace doris { + +template +KeyCoder::KeyCoder(TraitsType traits) + : _encode_ascending(traits.encode_ascending), + _decode_ascending(traits.decode_ascending) { +} + +// Helper class used to get KeyCoder +class KeyCoderResolver { +public: + ~KeyCoderResolver() { + for (auto& iter : _coder_map) { + delete iter.second; + } + } + + static KeyCoderResolver* instance() { + static KeyCoderResolver s_instance; + return &s_instance; + } + + KeyCoder* get_coder(FieldType field_type) const { + auto it = _coder_map.find(field_type); + if (it != _coder_map.end()) { + return it->second; + } + return nullptr; + } + +private: + KeyCoderResolver() { + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + } + + template + void add_mapping() { + _coder_map.emplace(field_type, new KeyCoder(KeyCoderTraits())); + } + + std::unordered_map _coder_map; +}; + +const KeyCoder* get_key_coder(FieldType type) { + return KeyCoderResolver::instance()->get_coder(type); +} + +} diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h new file mode 100644 index 00000000000000..bf7fa52562fabf --- /dev/null +++ b/be/src/olap/key_coder.h @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" +#include "gutil/endian.h" +#include "gutil/strings/substitute.h" +#include "olap/types.h" +#include "util/arena.h" + +namespace doris { + +using strings::Substitute; + +using EncodeAscendingFunc = void (*)(const void* value, size_t index_size, std::string* buf); +using DecodeAscendingFunc = Status (*)(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, Arena* arena); + +class KeyCoder { +public: + template + KeyCoder(TraitsType traits); + + void encode_ascending(const void* value, size_t index_size, std::string* buf) const { + _encode_ascending(value, index_size, buf); + } + Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, Arena* arena) const { + return _decode_ascending(encoded_key, index_size, cell_ptr, arena); + } + +private: + EncodeAscendingFunc _encode_ascending; + DecodeAscendingFunc _decode_ascending; +}; + +extern const KeyCoder* get_key_coder(FieldType type); + +template +class KeyCoderTraits { +}; + +template +class KeyCoderTraits::CppType>::value>::type> { +public: + using CppType = typename CppTypeTraits::CppType; + using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; + +private: + // Swap value's endian from/to big endian + static UnsignedCppType swap_big_endian(UnsignedCppType val) { + switch (sizeof(UnsignedCppType)) { + case 1: return val; + case 2: return BigEndian::FromHost16(val); + case 4: return BigEndian::FromHost32(val); + case 8: return BigEndian::FromHost64(val); + case 16: return BigEndian::FromHost128(val); + default: LOG(FATAL) << "Invalid type to big endian, type=" << field_type + << ", size=" << sizeof(UnsignedCppType); + } + } + +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, value, sizeof(unsigned_val)); + // swap MSB to encode integer + if (std::is_signed::value) { + unsigned_val ^= (static_cast(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1)); + } + // make it bigendian + unsigned_val = swap_big_endian(unsigned_val); + + buf->append((char*)&unsigned_val, sizeof(unsigned_val)); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < sizeof(UnsignedCppType)) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + sizeof(UnsignedCppType), encoded_key->size)); + } + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType)); + unsigned_val = swap_big_endian(unsigned_val); + if (std::is_signed::value) { + unsigned_val ^= (static_cast(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1)); + } + memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType)); + encoded_key->remove_prefix(sizeof(UnsignedCppType)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + using CppType = typename CppTypeTraits::CppType; + using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; + +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, value, sizeof(unsigned_val)); + // make it bigendian + unsigned_val = BigEndian::FromHost24(unsigned_val); + buf->append((char*)&unsigned_val, sizeof(unsigned_val)); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < sizeof(UnsignedCppType)) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + sizeof(UnsignedCppType), encoded_key->size)); + } + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType)); + unsigned_val = BigEndian::FromHost24(unsigned_val); + memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType)); + encoded_key->remove_prefix(sizeof(UnsignedCppType)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + decimal12_t decimal_val; + memcpy(&decimal_val, value, sizeof(decimal12_t)); + // encode integer + KeyCoderTraits::encode_ascending( + &decimal_val.integer, sizeof(decimal_val.integer), buf); + // encode integer + KeyCoderTraits::encode_ascending( + &decimal_val.fraction, sizeof(decimal_val.fraction), buf); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + decimal12_t decimal_val; + RETURN_IF_ERROR(KeyCoderTraits::decode_ascending( + encoded_key, sizeof(decimal_val.integer), (uint8_t*)&decimal_val.integer, arena)); + RETURN_IF_ERROR(KeyCoderTraits::decode_ascending( + encoded_key, sizeof(decimal_val.fraction), (uint8_t*)&decimal_val.fraction, arena)); + memcpy(cell_ptr, &decimal_val, sizeof(decimal12_t)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + const Slice* slice = (const Slice*)value; + CHECK(index_size <= slice->size) << "index size is larger than char size, index=" << index_size << ", char=" << slice->size; + buf->append(slice->data, index_size); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < index_size) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + index_size, encoded_key->size)); + } + Slice* slice = (Slice*)cell_ptr; + slice->data = arena->Allocate(index_size); + slice->size = index_size; + memcpy(slice->data, encoded_key->data, index_size); + encoded_key->remove_prefix(index_size); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + const Slice* slice = (const Slice*)value; + size_t copy_size = std::min(index_size, slice->size); + buf->append(slice->data, copy_size); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + CHECK(encoded_key->size <= index_size) + << "encoded_key size is larger than index_size, key_size=" << encoded_key->size + << ", index_size=" << index_size; + auto copy_size = encoded_key->size; + Slice* slice = (Slice*)cell_ptr; + slice->data = arena->Allocate(copy_size); + slice->size = copy_size; + memcpy(slice->data, encoded_key->data, copy_size); + encoded_key->remove_prefix(copy_size); + return Status::OK(); + } +}; + +} diff --git a/be/src/olap/short_key_index.cpp b/be/src/olap/short_key_index.cpp new file mode 100644 index 00000000000000..69e6c8a4700fec --- /dev/null +++ b/be/src/olap/short_key_index.cpp @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/short_key_index.h" + +#include + +#include "util/coding.h" +#include "gutil/strings/substitute.h" + +using strings::Substitute; + +namespace doris { + +Status ShortKeyIndexBuilder::add_item(const Slice& key) { + put_varint32(&_offset_buf, _key_buf.size()); + _footer.set_num_items(_footer.num_items() + 1); + _key_buf.append(key.data, key.size); + return Status::OK(); +} + +Status ShortKeyIndexBuilder::finalize(uint32_t segment_bytes, + uint32_t num_segment_rows, + std::vector* slices) { + _footer.set_num_segment_rows(num_segment_rows); + _footer.set_segment_bytes(segment_bytes); + _footer.set_key_bytes(_key_buf.size()); + _footer.set_offset_bytes(_offset_buf.size()); + + // encode header + if (!_footer.SerializeToString(&_footer_buf)) { + return Status::InternalError("Failed to serialize index footer"); + } + + put_fixed32_le(&_footer_buf, _footer_buf.size()); + uint32_t checksum = 0; + put_fixed32_le(&_footer_buf, checksum); + + slices->emplace_back(_key_buf); + slices->emplace_back(_offset_buf); + slices->emplace_back(_footer_buf); + return Status::OK(); +} + +Status ShortKeyIndexDecoder::parse() { + Slice data = _data; + + // 1. parse footer, get checksum and footer length + if (data.size < 2 * sizeof(uint32_t)) { + return Status::Corruption( + Substitute("Short key is too short, need=$0 vs real=$1", + 2 * sizeof(uint32_t), data.size)); + } + size_t offset = data.size - 2 * sizeof(uint32_t); + uint32_t footer_length = decode_fixed32_le((uint8_t*)data.data + offset); + uint32_t checksum = decode_fixed32_le((uint8_t*)data.data + offset + 4); + // TODO(zc): do checksum + if (checksum != 0) { + return Status::Corruption( + Substitute("Checksum not match, need=$0 vs read=$1", 0, checksum)); + } + // move offset to parse footer + offset -= footer_length; + std::string footer_buf(data.data + offset, footer_length); + if (!_footer.ParseFromString(footer_buf)) { + return Status::Corruption("Fail to parse index footer from string"); + } + + // check if real data size match footer's content + if (offset != _footer.key_bytes() + _footer.offset_bytes()) { + return Status::Corruption( + Substitute("Index size not match, need=$0, real=$1", + _footer.key_bytes() + _footer.offset_bytes(), offset)); + } + + // set index buffer + _key_data = Slice(_data.data, _footer.key_bytes()); + + // parse offset information + Slice offset_slice(_data.data + _footer.key_bytes(), _footer.offset_bytes()); + // +1 for record total length + _offsets.resize(_footer.num_items() + 1); + _offsets[_footer.num_items()] = _footer.key_bytes(); + for (uint32_t i = 0; i < _footer.num_items(); ++i) { + uint32_t offset = 0; + if (!get_varint32(&offset_slice, &offset)) { + return Status::Corruption("Fail to get varint from index offset buffer"); + } + DCHECK(offset <= _footer.key_bytes()) + << "Offset is larger than total bytes, offset=" << offset + << ", key_bytes=" << _footer.key_bytes(); + _offsets[i] = offset; + } + + if (offset_slice.size != 0) { + return Status::Corruption("Still has data after parse all key offset"); + } + + return Status::OK(); +} + +} diff --git a/be/src/olap/short_key_index.h b/be/src/olap/short_key_index.h new file mode 100644 index 00000000000000..e0ebf0ef78a850 --- /dev/null +++ b/be/src/olap/short_key_index.h @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "gen_cpp/segment_v2.pb.h" +#include "util/faststring.h" +#include "util/slice.h" + +namespace doris { + +// Used to encode a segment short key indices to binary format. This version +// only accepts binary key, client should assure that input key is sorted, +// otherwise error could happens. This builder would arrange data in following +// format. +// index = encoded_keys + encoded_offsets + footer + footer_size + checksum +// encoded_keys = binary_key + [, ...] +// encoded_offsets = encoded_offset + [, ...] +// encoded_offset = variant32 +// footer = ShortKeyFooterPB +// footer_size = fixed32 +// checksum = fixed32 +// Usage: +// ShortKeyIndexBuilder builder(segment_id, num_rows_per_block); +// builder.add_item(key1); +// ... +// builder.add_item(keyN); +// builder.finalize(segment_size, num_rows, &slices); +// TODO(zc): +// 1. If this can leverage binary page to save key and offset data +// 2. Extending this to save in a BTree like struct, which can index full key +// more than short key +class ShortKeyIndexBuilder { +public: + ShortKeyIndexBuilder(uint32_t segment_id, + uint32_t num_rows_per_block) { + _footer.set_segment_id(segment_id); + _footer.set_num_rows_per_block(num_rows_per_block); + } + + Status add_item(const Slice& key); + + Status finalize(uint32_t segment_size, uint32_t num_rows, std::vector* slices); + +private: + segment_v2::ShortKeyFooterPB _footer; + + faststring _key_buf; + faststring _offset_buf; + std::string _footer_buf; + std::vector _offsets; +}; + +class ShortKeyIndexDecoder; + +// An Iterator to iterate one short key index. +// Client can use this class to iterator all items +// item in this index. +class ShortKeyIndexIterator { +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = Slice; + using pointer = Slice*; + using reference = Slice&; + using difference_type = ssize_t; + + ShortKeyIndexIterator(const ShortKeyIndexDecoder* decoder, uint32_t ordinal = 0) + : _decoder(decoder), _ordinal(ordinal) { } + + ShortKeyIndexIterator& operator-=(ssize_t step) { + _ordinal -= step; + return *this; + } + + ShortKeyIndexIterator& operator+=(ssize_t step) { + _ordinal += step; + return *this; + } + + ShortKeyIndexIterator& operator++() { + _ordinal++; + return *this; + } + + bool operator!=(const ShortKeyIndexIterator& other) { + return _ordinal != other._ordinal || _decoder != other._decoder; + } + + bool operator==(const ShortKeyIndexIterator& other) { + return _ordinal == other._ordinal && _decoder == other._decoder; + } + + ssize_t operator-(const ShortKeyIndexIterator& other) const { + return _ordinal - other._ordinal; + } + + inline bool valid() const; + + Slice operator*() const; + + ssize_t ordinal() const { return _ordinal; } + +private: + const ShortKeyIndexDecoder* _decoder; + ssize_t _ordinal; +}; + +// Used to decode short key to header and encoded index data. +// Usage: +// MemIndex index; +// ShortKeyIndexDecoder decoder(slice) +// decoder.parse(); +// auto iter = decoder.lower_bound(key); +class ShortKeyIndexDecoder { +public: + // Client should assure that data is available when this class + // is used. + ShortKeyIndexDecoder(const Slice& data) : _data(data) { } + + Status parse(); + + ShortKeyIndexIterator begin() const { return {this, 0}; } + ShortKeyIndexIterator end() const { return {this, num_items()}; } + + // lower_bound will return a iterator which locates the first item + // equal with or larger than given key. + // NOTE: This function holds that without common prefix key, the one + // who has more length it the bigger one. Two key is the same only + // when their length are equal + ShortKeyIndexIterator lower_bound(const Slice& key) const { + return seek(key); + } + + // Return the iterator which locates the first item larger than the + // input key. + ShortKeyIndexIterator upper_bound(const Slice& key) const { + return seek(key); + } + + uint32_t num_items() const { return _footer.num_items(); } + + Slice key(ssize_t ordinal) const { + DCHECK(ordinal >= 0 && ordinal < num_items()); + return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]}; + } + +private: + template + ShortKeyIndexIterator seek(const Slice& key) const { + auto comparator = [this] (const Slice& lhs, const Slice& rhs) { + return lhs.compare(rhs) < 0; + }; + if (lower_bound) { + return std::lower_bound(begin(), end(), key, comparator); + } else { + return std::upper_bound(begin(), end(), key, comparator); + } + } + +private: + Slice _data; + + // All following fields are only valid after pares has been executed successfully + segment_v2::ShortKeyFooterPB _footer; + std::vector _offsets; + Slice _key_data; +}; + +inline Slice ShortKeyIndexIterator::operator*() const { + return _decoder->key(_ordinal); +} + +inline bool ShortKeyIndexIterator::valid() const { + return _ordinal >= 0 && _ordinal < _decoder->num_items(); +} + +} diff --git a/be/src/olap/types.h b/be/src/olap/types.h index d7597d36825601..0989d1a86599ce 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -104,24 +104,31 @@ struct CppTypeTraits { template<> struct CppTypeTraits { using CppType = bool; + using UnsignedCppType = bool; }; template<> struct CppTypeTraits { using CppType = int8_t; + using UnsignedCppType = uint8_t; }; template<> struct CppTypeTraits { using CppType = int16_t; + using UnsignedCppType = uint16_t; }; template<> struct CppTypeTraits { using CppType = int32_t; + using UnsignedCppType = uint32_t; }; template<> struct CppTypeTraits { using CppType = uint32_t; + using UnsignedCppType = uint32_t; }; template<> struct CppTypeTraits { using CppType = int64_t; + using UnsignedCppType = uint64_t; }; template<> struct CppTypeTraits { using CppType = int128_t; + using UnsignedCppType = unsigned int128_t; }; template<> struct CppTypeTraits { using CppType = float; @@ -131,12 +138,15 @@ template<> struct CppTypeTraits { }; template<> struct CppTypeTraits { using CppType = decimal12_t; + using UnsignedCppType = decimal12_t; }; template<> struct CppTypeTraits { using CppType = uint24_t; + using UnsignedCppType = uint24_t; }; template<> struct CppTypeTraits { using CppType = int64_t; + using UnsignedCppType = uint64_t; }; template<> struct CppTypeTraits { using CppType = Slice; diff --git a/be/src/olap/uint24.h b/be/src/olap/uint24.h index 7632b584c93d01..638ffae6e6a1ae 100644 --- a/be/src/olap/uint24.h +++ b/be/src/olap/uint24.h @@ -36,19 +36,26 @@ struct uint24_t { data[2] = value.data[2]; } - uint24_t(const int32_t& value) { + uint24_t(const uint32_t& value) { data[0] = static_cast(value); data[1] = static_cast(value >> 8); data[2] = static_cast(value >> 16); } + uint24_t& operator=(const uint32_t& value) { + data[0] = static_cast(value); + data[1] = static_cast(value >> 8); + data[2] = static_cast(value >> 16); + return *this; + } + uint24_t& operator+=(const uint24_t& value) { *this = static_cast(*this) + static_cast(value); return *this; } - operator int() const { - int value = static_cast(data[0]); + operator uint32_t() const { + uint32_t value = static_cast(data[0]); value += (static_cast(static_cast(data[1]))) << 8; value += (static_cast(static_cast(data[2]))) << 16; return value; diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index d2cc2b0152aec7..05cc4983a63f06 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -78,6 +78,7 @@ set(UTIL_FILES md5.cpp frontend_helper.cpp faststring.cc + slice.cpp ) if (WITH_MYSQL) diff --git a/be/src/util/slice.cpp b/be/src/util/slice.cpp new file mode 100644 index 00000000000000..fc583a2d228bef --- /dev/null +++ b/be/src/util/slice.cpp @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/slice.h" + +#include "util/faststring.h" + +namespace doris { + +// NOTE(zc): we define this function here to make compile work. +Slice::Slice(const faststring& s) : // NOLINT(runtime/explicit) + data((char*)(s.data())), size(s.size()) { } + +} diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 5c31072fe2e1e4..1eebe3a5f29fdd 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -29,6 +29,8 @@ namespace doris { +class faststring; + /// @brief A wrapper around externally allocated data. /// /// Slice is a simple structure containing a pointer into some external @@ -66,6 +68,8 @@ struct Slice { /// Create a slice that refers to the contents of the given string. Slice(const std::string& s) : // NOLINT(runtime/explicit) data(const_cast(s.data())), size(s.size()) { } + + Slice(const faststring& s); /// Create a slice that refers to a C-string s[0,strlen(s)-1]. Slice(const char* s) : // NOLINT(runtime/explicit) diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 0a11d868f7466d..ab105eed67856f 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -61,3 +61,5 @@ ADD_BE_TEST(rowset/alpha_rowset_test) ADD_BE_TEST(olap_snapshot_converter_test) ADD_BE_TEST(txn_manager_test) ADD_BE_TEST(generic_iterators_test) +ADD_BE_TEST(key_coder_test) +ADD_BE_TEST(short_key_index_test) diff --git a/be/test/olap/key_coder_test.cpp b/be/test/olap/key_coder_test.cpp new file mode 100644 index 00000000000000..5d241bb0804806 --- /dev/null +++ b/be/test/olap/key_coder_test.cpp @@ -0,0 +1,287 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/key_coder.h" + +#include +#include +#include + +#include "util/debug_util.h" + +namespace doris { + +class KeyCoderTest : public testing::Test { +public: + KeyCoderTest() { } + virtual ~KeyCoderTest() { + } +}; + +template +void test_integer_encode() { + using CppType = typename CppTypeTraits::CppType; + + auto key_coder = get_key_coder(type); + + { + std::string buf; + CppType val = std::numeric_limits::min(); + key_coder->encode_ascending(&val, 1, &buf); + + std::string result; + for (int i = 0; i < sizeof(CppType); ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + std::string buf; + CppType val = std::numeric_limits::max(); + key_coder->encode_ascending(&val, sizeof(CppType), &buf); + + std::string result; + for (int i = 0; i < sizeof(CppType); ++i) { + result.append("FF"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + CppType val1 = random(); + CppType val2 = random(); + + std::string buf1; + std::string buf2; + + key_coder->encode_ascending(&val1, sizeof(CppType), &buf1); + key_coder->encode_ascending(&val2, sizeof(CppType), &buf2); + + if (val1 < val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } else if (val1 > val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } else { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) == 0); + } + } +} + +TEST(KeyCoderTest, test_int) { + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + + test_integer_encode(); +} + +TEST(KeyCoderTest, test_date) { + using CppType = uint24_t; + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_DATE); + + { + std::string buf; + CppType val = 0; + key_coder->encode_ascending(&val, 1, &buf); + + std::string result; + for (int i = 0; i < sizeof(uint24_t); ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + std::string buf; + CppType val = 10000; + key_coder->encode_ascending(&val, sizeof(CppType), &buf); + + std::string result("002710"); + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + CppType val1 = random(); + CppType val2 = random(); + + std::string buf1; + std::string buf2; + + key_coder->encode_ascending(&val1, sizeof(CppType), &buf1); + key_coder->encode_ascending(&val2, sizeof(CppType), &buf2); + + if (val1 < val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } else if (val1 > val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } else { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) == 0); + } + } +} + +TEST(KeyCoderTest, test_decimal) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_DECIMAL); + + decimal12_t val1(1, 100000000); + std::string buf1; + + key_coder->encode_ascending(&val1, sizeof(decimal12_t), &buf1); + + decimal12_t check_val; + Slice slice1(buf1); + key_coder->decode_ascending(&slice1, sizeof(decimal12_t), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(check_val, val1); + + { + decimal12_t val2(-1, -100000000); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } + { + decimal12_t val2(1, 100000001); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } + { + decimal12_t val2(0, 0); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + + std::string result("80"); + for (int i = 0; i < sizeof(int64_t) - 1; ++i) { + result.append("00"); + } + result.append("80"); + for (int i = 0; i < sizeof(int32_t) - 1; ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf2.data(), buf2.size()).c_str()); + } +} + +TEST(KeyCoderTest, test_char) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_CHAR); + + char buf[] = "1234567890"; + Slice slice(buf, 10); + + { + std::string key; + key_coder->encode_ascending(&slice, 10, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 10, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("1234567890", check_slice.data); + } + + { + std::string key; + key_coder->encode_ascending(&slice, 5, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 5, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("12345", check_slice.data); + } +} + +TEST(KeyCoderTest, test_varchar) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_VARCHAR); + + char buf[] = "1234567890"; + Slice slice(buf, 10); + + { + std::string key; + key_coder->encode_ascending(&slice, 15, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 15, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("1234567890", check_slice.data); + } + + { + std::string key; + key_coder->encode_ascending(&slice, 5, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 5, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("12345", check_slice.data); + } +} + + +} // namespace doris + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/short_key_index_test.cpp b/be/test/olap/short_key_index_test.cpp new file mode 100644 index 00000000000000..eba2ef84e54468 --- /dev/null +++ b/be/test/olap/short_key_index_test.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/short_key_index.h" + +#include + +#include "olap/lru_cache.h" +#include "olap/file_helper.h" + +namespace doris { + +class ShortKeyIndexTest : public testing::Test { +public: + ShortKeyIndexTest() { } + virtual ~ShortKeyIndexTest() { + } +}; + +TEST_F(ShortKeyIndexTest, buider) { + ShortKeyIndexBuilder builder(0, 1024); + + for (int i = 1000; i < 10000; i += 2) { + builder.add_item(std::to_string(i)); + } + std::vector slices; + auto st = builder.finalize(10000, 9000 * 1024, &slices); + ASSERT_TRUE(st.ok()); + + std::string buf; + for (auto& slice : slices) { + buf.append(slice.data, slice.size); + } + + ShortKeyIndexDecoder decoder(buf); + st = decoder.parse(); + ASSERT_TRUE(st.ok()); + + // find 1499 + { + auto iter = decoder.lower_bound("1499"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1500", (*iter).to_string().c_str()); + } + // find 1500 lower bound + { + auto iter = decoder.lower_bound("1500"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1500", (*iter).to_string().c_str()); + } + // find 1500 upper bound + { + auto iter = decoder.upper_bound("1500"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1502", (*iter).to_string().c_str()); + } + // find prefix "87" + { + auto iter = decoder.lower_bound("87"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("8700", (*iter).to_string().c_str()); + } + // find prefix "87" + { + auto iter = decoder.upper_bound("87"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("8700", (*iter).to_string().c_str()); + } + + // find prefix "9999" + { + auto iter = decoder.upper_bound("9999"); + ASSERT_FALSE(iter.valid()); + } +} + +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index c66c2d88e023be..f7d95c6eaaa22f 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -117,3 +117,21 @@ message FileFooterPB { repeated MetadataPairPB file_meta_datas = 8; // meta data of file optional PagePointerPB key_index_page = 9; // short key index page } + +message ShortKeyFooterPB { + // How many index item in this index. + optional uint32 num_items = 1; + // The total bytes occupied by the index key + optional uint32 key_bytes = 2; + // The total bytes occupied by the key offsets + optional uint32 offset_bytes = 3; + // Segment id which this index is belong to + optional uint32 segment_id = 4; + // number rows in each block + optional uint32 num_rows_per_block = 5; + // How many rows in this segment + optional uint32 num_segment_rows = 6; + // Total bytes for this segment + optional uint32 segment_bytes = 7; +} + diff --git a/run-ut.sh b/run-ut.sh index 5c3a4a4bc6b721..59fab254b87421 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -252,6 +252,8 @@ ${DORIS_TEST_BINARY_DIR}/olap/txn_manager_test ${DORIS_TEST_BINARY_DIR}/olap/storage_types_test ${DORIS_TEST_BINARY_DIR}/olap/generic_iterators_test ${DORIS_TEST_BINARY_DIR}/olap/aggregate_func_test +${DORIS_TEST_BINARY_DIR}/olap/short_key_index_test +${DORIS_TEST_BINARY_DIR}/olap/key_coder_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test