diff --git a/be/src/gutil/endian.h b/be/src/gutil/endian.h index f6b2485b6d8bab..6b8a0bc772d414 100644 --- a/be/src/gutil/endian.h +++ b/be/src/gutil/endian.h @@ -32,6 +32,18 @@ inline uint64 gbswap_64(uint64 host_int) { #endif // bswap_64 } +inline unsigned __int128 gbswap_128(unsigned __int128 host_int) { + return static_cast(bswap_64(static_cast(host_int >> 64))) | + (static_cast(bswap_64(static_cast(host_int))) << 64); +} + +// Swap bytes of a 24-bit value. +inline uint32_t bswap_24(uint32_t x) { + return ((x & 0x0000ffULL) << 16) | + ((x & 0x00ff00ULL)) | + ((x & 0xff0000ULL) >> 16); +} + #ifdef IS_LITTLE_ENDIAN // Definitions for ntohl etc. that don't require us to include @@ -188,4 +200,145 @@ class LittleEndian { #define gntohll(x) ghtonll(x) #define ntohll(x) htonll(x) +// Utilities to convert numbers between the current hosts's native byte +// order and big-endian byte order (same as network byte order) +// +// Load/Store methods are alignment safe +class BigEndian { +public: +#ifdef IS_LITTLE_ENDIAN + + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } + + static uint32 FromHost24(uint32 x) { return bswap_24(x); } + static uint32 ToHost24(uint32 x) { return bswap_24(x); } + + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } + + static uint64 FromHost64(uint64 x) { return gbswap_64(x); } + static uint64 ToHost64(uint64 x) { return gbswap_64(x); } + + static unsigned __int128 FromHost128(unsigned __int128 x) { return gbswap_128(x); } + static unsigned __int128 ToHost128(unsigned __int128 x) { return gbswap_128(x); } + + static bool IsLittleEndian() { return true; } + +#elif defined IS_BIG_ENDIAN + + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } + + static uint32 FromHost24(uint32 x) { return x; } + static uint32 ToHost24(uint32 x) { return x; } + + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } + + static uint64 FromHost64(uint64 x) { return x; } + static uint64 ToHost64(uint64 x) { return x; } + + static uint128 FromHost128(uint128 x) { return x; } + static uint128 ToHost128(uint128 x) { return x; } + + static bool IsLittleEndian() { return false; } + +#endif /* ENDIAN */ + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } + + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } + + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } + + static uint64 Load64(const void *p) { + return ToHost64(UNALIGNED_LOAD64(p)); + } + + // Build a uint64 from 1-8 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 64 - 8 * len most significant bits are + // set all to 0. + // In latex-friendly words, this function returns: + // $\sum_{i=0}^{len-1} p[i] 256^{i}$, where p[i] is unsigned. + // + // This function is equivalent with: + // uint64 val = 0; + // memcpy(&val, p, len); + // return ToHost64(val); + // TODO(user): write a small benchmark and benchmark the speed + // of a memcpy based approach. + // + // For speed reasons this function does not work for len == 0. + // The caller needs to guarantee that 1 <= len <= 8. + static uint64 Load64VariableLength(const void * const p, int len) { + assert(len >= 1 && len <= 8); + uint64 val = Load64(p); + uint64 mask = 0; + --len; + do { + mask = (mask << 8) | 0xff; + // (--len >= 0) is about 10 % faster than (len--) in some benchmarks. + } while (--len >= 0); + return val & mask; + } + + static void Store64(void *p, uint64 v) { + UNALIGNED_STORE64(p, FromHost64(v)); + } + + static uint128 Load128(const void *p) { + return uint128( + ToHost64(UNALIGNED_LOAD64(p)), + ToHost64(UNALIGNED_LOAD64(reinterpret_cast(p) + 1))); + } + + static void Store128(void *p, const uint128 v) { + UNALIGNED_STORE64(p, FromHost64(Uint128High64(v))); + UNALIGNED_STORE64(reinterpret_cast(p) + 1, + FromHost64(Uint128Low64(v))); + } + + // Build a uint128 from 1-16 bytes. + // 8 * len least significant bits are loaded from the memory with + // BigEndian order. The 128 - 8 * len most significant bits are + // set all to 0. + static uint128 Load128VariableLength(const void *p, int len) { + if (len <= 8) { + return uint128(Load64VariableLength(static_cast(p)+8, + len)); + } else { + return uint128( + Load64VariableLength(p, len-8), + Load64(static_cast(p)+8)); + } + } + + // Load & Store in machine's word size. + static uword_t LoadUnsignedWord(const void *p) { + if (sizeof(uword_t) == 8) + return Load64(p); + else + return Load32(p); + } + + static void StoreUnsignedWord(void *p, uword_t v) { + if (sizeof(uword_t) == 8) + Store64(p, v); + else + Store32(p, v); + } +}; // BigEndian + #endif // UTIL_ENDIAN_ENDIAN_H_ diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index c494c8d0f6c6a6..6f45285f26ddaf 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(Olap STATIC hll.cpp in_list_predicate.cpp in_stream.cpp + key_coder.cpp lru_cache.cpp memtable.cpp merger.cpp @@ -63,6 +64,7 @@ add_library(Olap STATIC serialize.cpp storage_engine.cpp data_dir.cpp + short_key_index.cpp snapshot_manager.cpp stream_index_common.cpp stream_index_reader.cpp diff --git a/be/src/olap/key_coder.cpp b/be/src/olap/key_coder.cpp new file mode 100644 index 00000000000000..68f2ace8961bcb --- /dev/null +++ b/be/src/olap/key_coder.cpp @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/key_coder.h" + +#include + +namespace doris { + +template +KeyCoder::KeyCoder(TraitsType traits) + : _encode_ascending(traits.encode_ascending), + _decode_ascending(traits.decode_ascending) { +} + +// Helper class used to get KeyCoder +class KeyCoderResolver { +public: + ~KeyCoderResolver() { + for (auto& iter : _coder_map) { + delete iter.second; + } + } + + static KeyCoderResolver* instance() { + static KeyCoderResolver s_instance; + return &s_instance; + } + + KeyCoder* get_coder(FieldType field_type) const { + auto it = _coder_map.find(field_type); + if (it != _coder_map.end()) { + return it->second; + } + return nullptr; + } + +private: + KeyCoderResolver() { + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + + add_mapping(); + add_mapping(); + add_mapping(); + add_mapping(); + } + + template + void add_mapping() { + _coder_map.emplace(field_type, new KeyCoder(KeyCoderTraits())); + } + + std::unordered_map _coder_map; +}; + +const KeyCoder* get_key_coder(FieldType type) { + return KeyCoderResolver::instance()->get_coder(type); +} + +} diff --git a/be/src/olap/key_coder.h b/be/src/olap/key_coder.h new file mode 100644 index 00000000000000..bf7fa52562fabf --- /dev/null +++ b/be/src/olap/key_coder.h @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" +#include "gutil/endian.h" +#include "gutil/strings/substitute.h" +#include "olap/types.h" +#include "util/arena.h" + +namespace doris { + +using strings::Substitute; + +using EncodeAscendingFunc = void (*)(const void* value, size_t index_size, std::string* buf); +using DecodeAscendingFunc = Status (*)(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, Arena* arena); + +class KeyCoder { +public: + template + KeyCoder(TraitsType traits); + + void encode_ascending(const void* value, size_t index_size, std::string* buf) const { + _encode_ascending(value, index_size, buf); + } + Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr, Arena* arena) const { + return _decode_ascending(encoded_key, index_size, cell_ptr, arena); + } + +private: + EncodeAscendingFunc _encode_ascending; + DecodeAscendingFunc _decode_ascending; +}; + +extern const KeyCoder* get_key_coder(FieldType type); + +template +class KeyCoderTraits { +}; + +template +class KeyCoderTraits::CppType>::value>::type> { +public: + using CppType = typename CppTypeTraits::CppType; + using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; + +private: + // Swap value's endian from/to big endian + static UnsignedCppType swap_big_endian(UnsignedCppType val) { + switch (sizeof(UnsignedCppType)) { + case 1: return val; + case 2: return BigEndian::FromHost16(val); + case 4: return BigEndian::FromHost32(val); + case 8: return BigEndian::FromHost64(val); + case 16: return BigEndian::FromHost128(val); + default: LOG(FATAL) << "Invalid type to big endian, type=" << field_type + << ", size=" << sizeof(UnsignedCppType); + } + } + +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, value, sizeof(unsigned_val)); + // swap MSB to encode integer + if (std::is_signed::value) { + unsigned_val ^= (static_cast(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1)); + } + // make it bigendian + unsigned_val = swap_big_endian(unsigned_val); + + buf->append((char*)&unsigned_val, sizeof(unsigned_val)); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < sizeof(UnsignedCppType)) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + sizeof(UnsignedCppType), encoded_key->size)); + } + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType)); + unsigned_val = swap_big_endian(unsigned_val); + if (std::is_signed::value) { + unsigned_val ^= (static_cast(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1)); + } + memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType)); + encoded_key->remove_prefix(sizeof(UnsignedCppType)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + using CppType = typename CppTypeTraits::CppType; + using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; + +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, value, sizeof(unsigned_val)); + // make it bigendian + unsigned_val = BigEndian::FromHost24(unsigned_val); + buf->append((char*)&unsigned_val, sizeof(unsigned_val)); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < sizeof(UnsignedCppType)) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + sizeof(UnsignedCppType), encoded_key->size)); + } + UnsignedCppType unsigned_val; + memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType)); + unsigned_val = BigEndian::FromHost24(unsigned_val); + memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType)); + encoded_key->remove_prefix(sizeof(UnsignedCppType)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + decimal12_t decimal_val; + memcpy(&decimal_val, value, sizeof(decimal12_t)); + // encode integer + KeyCoderTraits::encode_ascending( + &decimal_val.integer, sizeof(decimal_val.integer), buf); + // encode integer + KeyCoderTraits::encode_ascending( + &decimal_val.fraction, sizeof(decimal_val.fraction), buf); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + decimal12_t decimal_val; + RETURN_IF_ERROR(KeyCoderTraits::decode_ascending( + encoded_key, sizeof(decimal_val.integer), (uint8_t*)&decimal_val.integer, arena)); + RETURN_IF_ERROR(KeyCoderTraits::decode_ascending( + encoded_key, sizeof(decimal_val.fraction), (uint8_t*)&decimal_val.fraction, arena)); + memcpy(cell_ptr, &decimal_val, sizeof(decimal12_t)); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + const Slice* slice = (const Slice*)value; + CHECK(index_size <= slice->size) << "index size is larger than char size, index=" << index_size << ", char=" << slice->size; + buf->append(slice->data, index_size); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + if (encoded_key->size < index_size) { + return Status::InvalidArgument( + Substitute("Key too short, need=$0 vs real=$1", + index_size, encoded_key->size)); + } + Slice* slice = (Slice*)cell_ptr; + slice->data = arena->Allocate(index_size); + slice->size = index_size; + memcpy(slice->data, encoded_key->data, index_size); + encoded_key->remove_prefix(index_size); + return Status::OK(); + } +}; + +template<> +class KeyCoderTraits { +public: + static void encode_ascending(const void* value, size_t index_size, std::string* buf) { + const Slice* slice = (const Slice*)value; + size_t copy_size = std::min(index_size, slice->size); + buf->append(slice->data, copy_size); + } + + static Status decode_ascending(Slice* encoded_key, size_t index_size, + uint8_t* cell_ptr, Arena* arena) { + CHECK(encoded_key->size <= index_size) + << "encoded_key size is larger than index_size, key_size=" << encoded_key->size + << ", index_size=" << index_size; + auto copy_size = encoded_key->size; + Slice* slice = (Slice*)cell_ptr; + slice->data = arena->Allocate(copy_size); + slice->size = copy_size; + memcpy(slice->data, encoded_key->data, copy_size); + encoded_key->remove_prefix(copy_size); + return Status::OK(); + } +}; + +} diff --git a/be/src/olap/short_key_index.cpp b/be/src/olap/short_key_index.cpp new file mode 100644 index 00000000000000..69e6c8a4700fec --- /dev/null +++ b/be/src/olap/short_key_index.cpp @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/short_key_index.h" + +#include + +#include "util/coding.h" +#include "gutil/strings/substitute.h" + +using strings::Substitute; + +namespace doris { + +Status ShortKeyIndexBuilder::add_item(const Slice& key) { + put_varint32(&_offset_buf, _key_buf.size()); + _footer.set_num_items(_footer.num_items() + 1); + _key_buf.append(key.data, key.size); + return Status::OK(); +} + +Status ShortKeyIndexBuilder::finalize(uint32_t segment_bytes, + uint32_t num_segment_rows, + std::vector* slices) { + _footer.set_num_segment_rows(num_segment_rows); + _footer.set_segment_bytes(segment_bytes); + _footer.set_key_bytes(_key_buf.size()); + _footer.set_offset_bytes(_offset_buf.size()); + + // encode header + if (!_footer.SerializeToString(&_footer_buf)) { + return Status::InternalError("Failed to serialize index footer"); + } + + put_fixed32_le(&_footer_buf, _footer_buf.size()); + uint32_t checksum = 0; + put_fixed32_le(&_footer_buf, checksum); + + slices->emplace_back(_key_buf); + slices->emplace_back(_offset_buf); + slices->emplace_back(_footer_buf); + return Status::OK(); +} + +Status ShortKeyIndexDecoder::parse() { + Slice data = _data; + + // 1. parse footer, get checksum and footer length + if (data.size < 2 * sizeof(uint32_t)) { + return Status::Corruption( + Substitute("Short key is too short, need=$0 vs real=$1", + 2 * sizeof(uint32_t), data.size)); + } + size_t offset = data.size - 2 * sizeof(uint32_t); + uint32_t footer_length = decode_fixed32_le((uint8_t*)data.data + offset); + uint32_t checksum = decode_fixed32_le((uint8_t*)data.data + offset + 4); + // TODO(zc): do checksum + if (checksum != 0) { + return Status::Corruption( + Substitute("Checksum not match, need=$0 vs read=$1", 0, checksum)); + } + // move offset to parse footer + offset -= footer_length; + std::string footer_buf(data.data + offset, footer_length); + if (!_footer.ParseFromString(footer_buf)) { + return Status::Corruption("Fail to parse index footer from string"); + } + + // check if real data size match footer's content + if (offset != _footer.key_bytes() + _footer.offset_bytes()) { + return Status::Corruption( + Substitute("Index size not match, need=$0, real=$1", + _footer.key_bytes() + _footer.offset_bytes(), offset)); + } + + // set index buffer + _key_data = Slice(_data.data, _footer.key_bytes()); + + // parse offset information + Slice offset_slice(_data.data + _footer.key_bytes(), _footer.offset_bytes()); + // +1 for record total length + _offsets.resize(_footer.num_items() + 1); + _offsets[_footer.num_items()] = _footer.key_bytes(); + for (uint32_t i = 0; i < _footer.num_items(); ++i) { + uint32_t offset = 0; + if (!get_varint32(&offset_slice, &offset)) { + return Status::Corruption("Fail to get varint from index offset buffer"); + } + DCHECK(offset <= _footer.key_bytes()) + << "Offset is larger than total bytes, offset=" << offset + << ", key_bytes=" << _footer.key_bytes(); + _offsets[i] = offset; + } + + if (offset_slice.size != 0) { + return Status::Corruption("Still has data after parse all key offset"); + } + + return Status::OK(); +} + +} diff --git a/be/src/olap/short_key_index.h b/be/src/olap/short_key_index.h new file mode 100644 index 00000000000000..e0ebf0ef78a850 --- /dev/null +++ b/be/src/olap/short_key_index.h @@ -0,0 +1,197 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "common/status.h" +#include "gen_cpp/segment_v2.pb.h" +#include "util/faststring.h" +#include "util/slice.h" + +namespace doris { + +// Used to encode a segment short key indices to binary format. This version +// only accepts binary key, client should assure that input key is sorted, +// otherwise error could happens. This builder would arrange data in following +// format. +// index = encoded_keys + encoded_offsets + footer + footer_size + checksum +// encoded_keys = binary_key + [, ...] +// encoded_offsets = encoded_offset + [, ...] +// encoded_offset = variant32 +// footer = ShortKeyFooterPB +// footer_size = fixed32 +// checksum = fixed32 +// Usage: +// ShortKeyIndexBuilder builder(segment_id, num_rows_per_block); +// builder.add_item(key1); +// ... +// builder.add_item(keyN); +// builder.finalize(segment_size, num_rows, &slices); +// TODO(zc): +// 1. If this can leverage binary page to save key and offset data +// 2. Extending this to save in a BTree like struct, which can index full key +// more than short key +class ShortKeyIndexBuilder { +public: + ShortKeyIndexBuilder(uint32_t segment_id, + uint32_t num_rows_per_block) { + _footer.set_segment_id(segment_id); + _footer.set_num_rows_per_block(num_rows_per_block); + } + + Status add_item(const Slice& key); + + Status finalize(uint32_t segment_size, uint32_t num_rows, std::vector* slices); + +private: + segment_v2::ShortKeyFooterPB _footer; + + faststring _key_buf; + faststring _offset_buf; + std::string _footer_buf; + std::vector _offsets; +}; + +class ShortKeyIndexDecoder; + +// An Iterator to iterate one short key index. +// Client can use this class to iterator all items +// item in this index. +class ShortKeyIndexIterator { +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = Slice; + using pointer = Slice*; + using reference = Slice&; + using difference_type = ssize_t; + + ShortKeyIndexIterator(const ShortKeyIndexDecoder* decoder, uint32_t ordinal = 0) + : _decoder(decoder), _ordinal(ordinal) { } + + ShortKeyIndexIterator& operator-=(ssize_t step) { + _ordinal -= step; + return *this; + } + + ShortKeyIndexIterator& operator+=(ssize_t step) { + _ordinal += step; + return *this; + } + + ShortKeyIndexIterator& operator++() { + _ordinal++; + return *this; + } + + bool operator!=(const ShortKeyIndexIterator& other) { + return _ordinal != other._ordinal || _decoder != other._decoder; + } + + bool operator==(const ShortKeyIndexIterator& other) { + return _ordinal == other._ordinal && _decoder == other._decoder; + } + + ssize_t operator-(const ShortKeyIndexIterator& other) const { + return _ordinal - other._ordinal; + } + + inline bool valid() const; + + Slice operator*() const; + + ssize_t ordinal() const { return _ordinal; } + +private: + const ShortKeyIndexDecoder* _decoder; + ssize_t _ordinal; +}; + +// Used to decode short key to header and encoded index data. +// Usage: +// MemIndex index; +// ShortKeyIndexDecoder decoder(slice) +// decoder.parse(); +// auto iter = decoder.lower_bound(key); +class ShortKeyIndexDecoder { +public: + // Client should assure that data is available when this class + // is used. + ShortKeyIndexDecoder(const Slice& data) : _data(data) { } + + Status parse(); + + ShortKeyIndexIterator begin() const { return {this, 0}; } + ShortKeyIndexIterator end() const { return {this, num_items()}; } + + // lower_bound will return a iterator which locates the first item + // equal with or larger than given key. + // NOTE: This function holds that without common prefix key, the one + // who has more length it the bigger one. Two key is the same only + // when their length are equal + ShortKeyIndexIterator lower_bound(const Slice& key) const { + return seek(key); + } + + // Return the iterator which locates the first item larger than the + // input key. + ShortKeyIndexIterator upper_bound(const Slice& key) const { + return seek(key); + } + + uint32_t num_items() const { return _footer.num_items(); } + + Slice key(ssize_t ordinal) const { + DCHECK(ordinal >= 0 && ordinal < num_items()); + return {_key_data.data + _offsets[ordinal], _offsets[ordinal + 1] - _offsets[ordinal]}; + } + +private: + template + ShortKeyIndexIterator seek(const Slice& key) const { + auto comparator = [this] (const Slice& lhs, const Slice& rhs) { + return lhs.compare(rhs) < 0; + }; + if (lower_bound) { + return std::lower_bound(begin(), end(), key, comparator); + } else { + return std::upper_bound(begin(), end(), key, comparator); + } + } + +private: + Slice _data; + + // All following fields are only valid after pares has been executed successfully + segment_v2::ShortKeyFooterPB _footer; + std::vector _offsets; + Slice _key_data; +}; + +inline Slice ShortKeyIndexIterator::operator*() const { + return _decoder->key(_ordinal); +} + +inline bool ShortKeyIndexIterator::valid() const { + return _ordinal >= 0 && _ordinal < _decoder->num_items(); +} + +} diff --git a/be/src/olap/types.h b/be/src/olap/types.h index d7597d36825601..0989d1a86599ce 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -104,24 +104,31 @@ struct CppTypeTraits { template<> struct CppTypeTraits { using CppType = bool; + using UnsignedCppType = bool; }; template<> struct CppTypeTraits { using CppType = int8_t; + using UnsignedCppType = uint8_t; }; template<> struct CppTypeTraits { using CppType = int16_t; + using UnsignedCppType = uint16_t; }; template<> struct CppTypeTraits { using CppType = int32_t; + using UnsignedCppType = uint32_t; }; template<> struct CppTypeTraits { using CppType = uint32_t; + using UnsignedCppType = uint32_t; }; template<> struct CppTypeTraits { using CppType = int64_t; + using UnsignedCppType = uint64_t; }; template<> struct CppTypeTraits { using CppType = int128_t; + using UnsignedCppType = unsigned int128_t; }; template<> struct CppTypeTraits { using CppType = float; @@ -131,12 +138,15 @@ template<> struct CppTypeTraits { }; template<> struct CppTypeTraits { using CppType = decimal12_t; + using UnsignedCppType = decimal12_t; }; template<> struct CppTypeTraits { using CppType = uint24_t; + using UnsignedCppType = uint24_t; }; template<> struct CppTypeTraits { using CppType = int64_t; + using UnsignedCppType = uint64_t; }; template<> struct CppTypeTraits { using CppType = Slice; diff --git a/be/src/olap/uint24.h b/be/src/olap/uint24.h index 7632b584c93d01..638ffae6e6a1ae 100644 --- a/be/src/olap/uint24.h +++ b/be/src/olap/uint24.h @@ -36,19 +36,26 @@ struct uint24_t { data[2] = value.data[2]; } - uint24_t(const int32_t& value) { + uint24_t(const uint32_t& value) { data[0] = static_cast(value); data[1] = static_cast(value >> 8); data[2] = static_cast(value >> 16); } + uint24_t& operator=(const uint32_t& value) { + data[0] = static_cast(value); + data[1] = static_cast(value >> 8); + data[2] = static_cast(value >> 16); + return *this; + } + uint24_t& operator+=(const uint24_t& value) { *this = static_cast(*this) + static_cast(value); return *this; } - operator int() const { - int value = static_cast(data[0]); + operator uint32_t() const { + uint32_t value = static_cast(data[0]); value += (static_cast(static_cast(data[1]))) << 8; value += (static_cast(static_cast(data[2]))) << 16; return value; diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index d2cc2b0152aec7..05cc4983a63f06 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -78,6 +78,7 @@ set(UTIL_FILES md5.cpp frontend_helper.cpp faststring.cc + slice.cpp ) if (WITH_MYSQL) diff --git a/be/src/util/slice.cpp b/be/src/util/slice.cpp new file mode 100644 index 00000000000000..fc583a2d228bef --- /dev/null +++ b/be/src/util/slice.cpp @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/slice.h" + +#include "util/faststring.h" + +namespace doris { + +// NOTE(zc): we define this function here to make compile work. +Slice::Slice(const faststring& s) : // NOLINT(runtime/explicit) + data((char*)(s.data())), size(s.size()) { } + +} diff --git a/be/src/util/slice.h b/be/src/util/slice.h index 5c31072fe2e1e4..1eebe3a5f29fdd 100644 --- a/be/src/util/slice.h +++ b/be/src/util/slice.h @@ -29,6 +29,8 @@ namespace doris { +class faststring; + /// @brief A wrapper around externally allocated data. /// /// Slice is a simple structure containing a pointer into some external @@ -66,6 +68,8 @@ struct Slice { /// Create a slice that refers to the contents of the given string. Slice(const std::string& s) : // NOLINT(runtime/explicit) data(const_cast(s.data())), size(s.size()) { } + + Slice(const faststring& s); /// Create a slice that refers to a C-string s[0,strlen(s)-1]. Slice(const char* s) : // NOLINT(runtime/explicit) diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt index 0a11d868f7466d..ab105eed67856f 100644 --- a/be/test/olap/CMakeLists.txt +++ b/be/test/olap/CMakeLists.txt @@ -61,3 +61,5 @@ ADD_BE_TEST(rowset/alpha_rowset_test) ADD_BE_TEST(olap_snapshot_converter_test) ADD_BE_TEST(txn_manager_test) ADD_BE_TEST(generic_iterators_test) +ADD_BE_TEST(key_coder_test) +ADD_BE_TEST(short_key_index_test) diff --git a/be/test/olap/key_coder_test.cpp b/be/test/olap/key_coder_test.cpp new file mode 100644 index 00000000000000..5d241bb0804806 --- /dev/null +++ b/be/test/olap/key_coder_test.cpp @@ -0,0 +1,287 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/key_coder.h" + +#include +#include +#include + +#include "util/debug_util.h" + +namespace doris { + +class KeyCoderTest : public testing::Test { +public: + KeyCoderTest() { } + virtual ~KeyCoderTest() { + } +}; + +template +void test_integer_encode() { + using CppType = typename CppTypeTraits::CppType; + + auto key_coder = get_key_coder(type); + + { + std::string buf; + CppType val = std::numeric_limits::min(); + key_coder->encode_ascending(&val, 1, &buf); + + std::string result; + for (int i = 0; i < sizeof(CppType); ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + std::string buf; + CppType val = std::numeric_limits::max(); + key_coder->encode_ascending(&val, sizeof(CppType), &buf); + + std::string result; + for (int i = 0; i < sizeof(CppType); ++i) { + result.append("FF"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + CppType val1 = random(); + CppType val2 = random(); + + std::string buf1; + std::string buf2; + + key_coder->encode_ascending(&val1, sizeof(CppType), &buf1); + key_coder->encode_ascending(&val2, sizeof(CppType), &buf2); + + if (val1 < val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } else if (val1 > val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } else { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) == 0); + } + } +} + +TEST(KeyCoderTest, test_int) { + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + test_integer_encode(); + + test_integer_encode(); +} + +TEST(KeyCoderTest, test_date) { + using CppType = uint24_t; + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_DATE); + + { + std::string buf; + CppType val = 0; + key_coder->encode_ascending(&val, 1, &buf); + + std::string result; + for (int i = 0; i < sizeof(uint24_t); ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + std::string buf; + CppType val = 10000; + key_coder->encode_ascending(&val, sizeof(CppType), &buf); + + std::string result("002710"); + + ASSERT_STREQ(result.c_str(), hexdump(buf.data(), buf.size()).c_str()); + { + Slice slice(buf); + CppType check_val; + key_coder->decode_ascending(&slice, sizeof(CppType), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(val, check_val); + } + } + + { + CppType val1 = random(); + CppType val2 = random(); + + std::string buf1; + std::string buf2; + + key_coder->encode_ascending(&val1, sizeof(CppType), &buf1); + key_coder->encode_ascending(&val2, sizeof(CppType), &buf2); + + if (val1 < val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } else if (val1 > val2) { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } else { + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) == 0); + } + } +} + +TEST(KeyCoderTest, test_decimal) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_DECIMAL); + + decimal12_t val1(1, 100000000); + std::string buf1; + + key_coder->encode_ascending(&val1, sizeof(decimal12_t), &buf1); + + decimal12_t check_val; + Slice slice1(buf1); + key_coder->decode_ascending(&slice1, sizeof(decimal12_t), (uint8_t*)&check_val, nullptr); + ASSERT_EQ(check_val, val1); + + { + decimal12_t val2(-1, -100000000); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + } + { + decimal12_t val2(1, 100000001); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) < 0); + } + { + decimal12_t val2(0, 0); + std::string buf2; + key_coder->encode_ascending(&val2, sizeof(decimal12_t), &buf2); + ASSERT_TRUE(memcmp(buf1.c_str(), buf2.c_str(), buf1.size()) > 0); + + std::string result("80"); + for (int i = 0; i < sizeof(int64_t) - 1; ++i) { + result.append("00"); + } + result.append("80"); + for (int i = 0; i < sizeof(int32_t) - 1; ++i) { + result.append("00"); + } + + ASSERT_STREQ(result.c_str(), hexdump(buf2.data(), buf2.size()).c_str()); + } +} + +TEST(KeyCoderTest, test_char) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_CHAR); + + char buf[] = "1234567890"; + Slice slice(buf, 10); + + { + std::string key; + key_coder->encode_ascending(&slice, 10, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 10, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("1234567890", check_slice.data); + } + + { + std::string key; + key_coder->encode_ascending(&slice, 5, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 5, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("12345", check_slice.data); + } +} + +TEST(KeyCoderTest, test_varchar) { + auto key_coder = get_key_coder(OLAP_FIELD_TYPE_VARCHAR); + + char buf[] = "1234567890"; + Slice slice(buf, 10); + + { + std::string key; + key_coder->encode_ascending(&slice, 15, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 15, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("1234567890", check_slice.data); + } + + { + std::string key; + key_coder->encode_ascending(&slice, 5, &key); + Slice encoded_key(key); + + Arena arena; + Slice check_slice; + auto st = key_coder->decode_ascending(&encoded_key, 5, (uint8_t*)&check_slice, &arena); + ASSERT_TRUE(st.ok()); + + ASSERT_STREQ("12345", check_slice.data); + } +} + + +} // namespace doris + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/short_key_index_test.cpp b/be/test/olap/short_key_index_test.cpp new file mode 100644 index 00000000000000..eba2ef84e54468 --- /dev/null +++ b/be/test/olap/short_key_index_test.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/short_key_index.h" + +#include + +#include "olap/lru_cache.h" +#include "olap/file_helper.h" + +namespace doris { + +class ShortKeyIndexTest : public testing::Test { +public: + ShortKeyIndexTest() { } + virtual ~ShortKeyIndexTest() { + } +}; + +TEST_F(ShortKeyIndexTest, buider) { + ShortKeyIndexBuilder builder(0, 1024); + + for (int i = 1000; i < 10000; i += 2) { + builder.add_item(std::to_string(i)); + } + std::vector slices; + auto st = builder.finalize(10000, 9000 * 1024, &slices); + ASSERT_TRUE(st.ok()); + + std::string buf; + for (auto& slice : slices) { + buf.append(slice.data, slice.size); + } + + ShortKeyIndexDecoder decoder(buf); + st = decoder.parse(); + ASSERT_TRUE(st.ok()); + + // find 1499 + { + auto iter = decoder.lower_bound("1499"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1500", (*iter).to_string().c_str()); + } + // find 1500 lower bound + { + auto iter = decoder.lower_bound("1500"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1500", (*iter).to_string().c_str()); + } + // find 1500 upper bound + { + auto iter = decoder.upper_bound("1500"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("1502", (*iter).to_string().c_str()); + } + // find prefix "87" + { + auto iter = decoder.lower_bound("87"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("8700", (*iter).to_string().c_str()); + } + // find prefix "87" + { + auto iter = decoder.upper_bound("87"); + ASSERT_TRUE(iter.valid()); + ASSERT_STREQ("8700", (*iter).to_string().c_str()); + } + + // find prefix "9999" + { + auto iter = decoder.upper_bound("9999"); + ASSERT_FALSE(iter.valid()); + } +} + +} + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index c66c2d88e023be..f7d95c6eaaa22f 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -117,3 +117,21 @@ message FileFooterPB { repeated MetadataPairPB file_meta_datas = 8; // meta data of file optional PagePointerPB key_index_page = 9; // short key index page } + +message ShortKeyFooterPB { + // How many index item in this index. + optional uint32 num_items = 1; + // The total bytes occupied by the index key + optional uint32 key_bytes = 2; + // The total bytes occupied by the key offsets + optional uint32 offset_bytes = 3; + // Segment id which this index is belong to + optional uint32 segment_id = 4; + // number rows in each block + optional uint32 num_rows_per_block = 5; + // How many rows in this segment + optional uint32 num_segment_rows = 6; + // Total bytes for this segment + optional uint32 segment_bytes = 7; +} + diff --git a/run-ut.sh b/run-ut.sh index 5c3a4a4bc6b721..59fab254b87421 100755 --- a/run-ut.sh +++ b/run-ut.sh @@ -252,6 +252,8 @@ ${DORIS_TEST_BINARY_DIR}/olap/txn_manager_test ${DORIS_TEST_BINARY_DIR}/olap/storage_types_test ${DORIS_TEST_BINARY_DIR}/olap/generic_iterators_test ${DORIS_TEST_BINARY_DIR}/olap/aggregate_func_test +${DORIS_TEST_BINARY_DIR}/olap/short_key_index_test +${DORIS_TEST_BINARY_DIR}/olap/key_coder_test # Running routine load test ${DORIS_TEST_BINARY_DIR}/runtime/kafka_consumer_pipe_test