From 90c0e6687ed753ef3b5ad247bffcb9d0e7264f35 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 4 Jun 2019 15:16:41 +0800 Subject: [PATCH 1/5] Add segment v2 api --- be/src/olap/rowset/segment_v2/column_reader.h | 66 ++++++ be/src/olap/rowset/segment_v2/column_writer.h | 74 +++++++ be/src/olap/rowset/segment_v2/common.h | 31 +++ be/src/olap/rowset/segment_v2/options.h | 47 +++++ be/src/olap/rowset/segment_v2/ordinal_index.h | 69 ++++++ be/src/olap/rowset/segment_v2/page_builder.h | 79 +++++++ be/src/olap/rowset/segment_v2/page_decoder.h | 83 ++++++++ be/src/olap/rowset/segment_v2/short_index.h | 72 +++++++ .../doris_storage_optimization.md | 196 ++++++++++++++++++ docs/resources/segment_v2.png | Bin 0 -> 40991 bytes gensrc/proto/segment_v2.proto | 103 +++++++++ 11 files changed, 820 insertions(+) create mode 100644 be/src/olap/rowset/segment_v2/column_reader.h create mode 100644 be/src/olap/rowset/segment_v2/column_writer.h create mode 100644 be/src/olap/rowset/segment_v2/common.h create mode 100644 be/src/olap/rowset/segment_v2/options.h create mode 100644 be/src/olap/rowset/segment_v2/ordinal_index.h create mode 100644 be/src/olap/rowset/segment_v2/page_builder.h create mode 100644 be/src/olap/rowset/segment_v2/page_decoder.h create mode 100644 be/src/olap/rowset/segment_v2/short_index.h create mode 100644 docs/documentation/cn/extending-doris/doris_storage_optimization.md create mode 100644 docs/resources/segment_v2.png create mode 100644 gensrc/proto/segment_v2.proto diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h new file mode 100644 index 00000000000000..412c2a93bd48ac --- /dev/null +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H + +#include "runtime/vectorized_row_batch.h" + +namespace doris { + +namespace segment_v2 { + +class ColumnReader { +public: + ColumnReader() { } + + bool init(); + + // Seek to the first entry in the column. + bool seek_to_first(); + + // Seek to the given ordinal entry in the column. + // Entry 0 is the first entry written to the column. + // If provided seek point is past the end of the file, + // then returns false. + bool seek_to_ordinal(rowid_t ord_idx) override; + + // Fetch the next vector of values from the page into 'dst'. + // The output vector must have space for up to n cells. + // + // return the size of entries. + // + // In the case that the values are themselves references + // to other memory (eg Slices), the referred-to memory is + // allocated in the dst column vector's arena. + virtual size_t next_batch(const size_t n, doris::ColumnVector *dst) = 0; + + size_t get_current_oridinal(); + + // Call this function every time before next_batch. + // This function will preload pages from disk into memory if necessary. + bool prepare_batch(size_t n); + + // release next_batch related resource + bool finish_batch(); +}; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h new file mode 100644 index 00000000000000..95883e83a56345 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H + +#include + +#include "gen_cpp/doris.pb.h" +#include "util/slice.h" + +namespace doris { + +namespace segment_v2 { + +// ColumnWriter is used to write data of a column +class ColumnWriter { +public: + explicit ColumnWriter(BuilderOptions builder_options, ColumnSchemaPB* column_schema) + : _builder_options(builder_options), + _column_schema(column_schema) { } + + bool init(); + + // close the writer + bool finish(); + + // Caller will loop all the ColumnWriter and call the following get page api + // to get page data and get the page pointer + bool get_data_pages(std::vector* data_buffers); + + // Get the dictionary page for under dictionary encoding mode column. + virtual bool get_dictionary_page(doris::Slice* dictionary_page); + + // Get the bloom filter pages for under bloom filter indexed column. + virtual bool get_bloom_filter_pages(std::vector* bf_pages); + + // Get the bitmap page for under bitmap indexed column. + virtual bool get_bitmap_page(doris::Slice* bitmap_page); + + // Get the statistic page for under statistic column. + virtual bool get_statistic_page(doris::Slice* statistic_page); + + bool write_batch(doris::RowBlock* block); + + size_t written_size() const; + + int written_value_count() const; + +private: + BuilderOptions _builder_options; + ColumnSchemaPB* _column_schema; +}; + +} // namespace segment_v2 + +} // namespace doris + + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H diff --git a/be/src/olap/rowset/segment_v2/common.h b/be/src/olap/rowset/segment_v2/common.h new file mode 100644 index 00000000000000..abbc2baafd4ea2 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/common.h @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H + +namespace doris { + +namespace segment_v2 { + +typedef uint32_t rowid_t; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h new file mode 100644 index 00000000000000..f159a21addcec5 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/options.h @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H + +#include "gen_cpp/segment_v2.pb.h" + +namespace doris { + +namespace segment_v2 { + +struct BuilderOptions { + size_t data_page_size; + + size_t dict_page_size; + + bool write_posidx; + + EncodingTypePB encoding; + + CompressionTypePB compression_type; + + bool is_nullable; + + bool has_dictionary; +}; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H diff --git a/be/src/olap/rowset/segment_v2/ordinal_index.h b/be/src/olap/rowset/segment_v2/ordinal_index.h new file mode 100644 index 00000000000000..be6d229630eea5 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/ordinal_index.h @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "util/slice.h" +#include "gen_cpp/segment_v2.pb.h" + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H + +namespace doris { + +namespace segment_v2 { + +class OrdinalIndexReader { +public: + bool init(const Slice& data); + + size_t count(); + + int compare_key(int idx_in_block, const rowid_t row_id); + + std::unique_ptr get_short_key_index(); +}; + +class OrdinalIndexWriter { +public: + bool init(); + + bool add_entry(doris::Slice* key, rowid_t rowid); + + dorsi::Slice finish(); +}; + +class OrdinalIndex { +public: + OrdinalIndex(OrdinalIndexReader* reader); + + bool seek_at_or_after(const rowid_t row_id, bool* matched); + + bool seek_at_or_before(const rowid_t row_id, bool* matched); + + void get_current_page_pointer(PagePointerPB* page_pointer); + +private: + bool _seeked; + size_t _cur_idx; +}; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h new file mode 100644 index 00000000000000..aad5d421fc1f2d --- /dev/null +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H + +#include +#include + +#include "util/slice.h" + +namespace doris { + +namespace segment_v2 { + +// PageBuilder is used to build page +// Page is a data management unit, including: +// 1. Data Page: store encoded and compressed data +// 2. BloomFilter Page: store bloom filter of data +// 3. Ordinal Index Page: store ordinal index of data +// 4. Short Key Index Page: store short key index of data +// 5. Bitmap Index Page: store bitmap index of data +class PageBuilder { +public: + virtual ~PageBuilder() { } + + // Used by column writer to determine whether the current page is full. + // Column writer depends on the result to decide whether to flush current page. + virtual bool is_page_full() = 0; + + // Add a sequence of values to the page. + // Returns the number of values actually added, which may be less + // than requested if the page is full. + // + // vals size should be decided according to the page build type + virtual int add(const uint8_t* vals, size_t count) = 0; + + // Get the dictionary page for under dictionary encoding mode column. + virtual bool get_dictionary_page(doris::Slice* dictionary_page); + + // Get the bitmap page for under bitmap indexed column. + virtual bool get_bitmap_page(doris::Slice* bitmap_page); + + // Return a Slice which represents the encoded data of current page. + // + // This Slice points to internal data of this builder. + virtual Slice finish(rowid_t page_first_rowid) = 0; + + // Reset the internal state of the page builder. + // + // Any data previously returned by finish may be invalidated by this call. + virtual void reset() = 0; + + // Return the number of entries that have been added to the page. + virtual size_t count() const = 0; + +private: + DISALLOW_COPY_AND_ASSIGN(PageBuilder); +}; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h new file mode 100644 index 00000000000000..70ef087492e219 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_DECODER_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_DECODER_H + +#include "runtime/vectorized_row_batch.h" + +namespace doris { + +namespace segment_v2 { + +// PageDecoder is used to decode page page. +class PageDecoder { +public: + virtual ~PageDecoder() { } + + // Call this to do some preparation for decoder. + // eg: parse data page header + virtual bool init() = 0; + + // Seek the decoder to the given positional index of the page. + // For example, seek_to_position_in_page(0) seeks to the first + // stored entry. + // + // It is an error to call this with a value larger than Count(). + // Doing so has undefined results. + virtual void seek_to_position_in_page(size_t pos) = 0; + + // Seek the decoder forward by a given number of rows, or to the end + // of the page. This is primarily used to skip over data. + // + // Return the step skipped. + virtual size_t seek_forward(size_t n) { + size_t step = std::min(n, count() - current_index()); + DCHECK_GE(step, 0); + seek_to_position_in_page(current_index() + step); + return step; + } + + // Fetch the next vector of values from the page into 'dst'. + // The output vector must have space for up to n cells. + // + // return the size of entries. + // + // In the case that the values are themselves references + // to other memory (eg Slices), the referred-to memory is + // allocated in the dst column vector's arena. + virtual size_t next_batch(const size_t n, doris::ColumnVector *dst) = 0; + + // Return the number of elements in this page. + virtual size_t count() const = 0; + + // Return the position within the page of the currently seeked + // entry (ie the entry that will next be returned by next_vector()) + virtual size_t current_index() const = 0; + + // Return the first rowid stored in this page. + virtual rowid_t get_first_rowid() const = 0; + +private: + DISALLOW_COPY_AND_ASSIGN(PageDecoder); +}; + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_DECODER_H diff --git a/be/src/olap/rowset/segment_v2/short_index.h b/be/src/olap/rowset/segment_v2/short_index.h new file mode 100644 index 00000000000000..8d2b6669591ff7 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/short_index.h @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_SHORT_INDEX_H +#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_SHORT_INDEX_H + +#include + +#include "util/slice.h" + +namespace doris { + +namespace segment_v2 { + +class ShortKeyIndexReader { +public: + bool init(const Slice& data); + + size_t count(); + + int compare_key(int idx_in_block, const Slice& key); + + std::unique_ptr get_short_key_index(); +}; + +class ShortKeyIndexWriter { +public: + bool init(); + + bool add_entry(doris::Slice* key, rowid_t rowid); + + dorsi::Slice finish(); +}; + +class ShortKeyIndex { +public: + ShortKeyIndex(ShortKeyIndexReader* reader); + + bool seek_at_or_after(const doris::Slice& key, bool* matched); + + bool seek_at_or_before(const doris::Slice& key, bool* matched); + + rowid_t get_current_rowid(); + + bool prev(); + + bool next(); + +private: + bool _seeked; + size_t _cur_idx; +} + +} // namespace segment_v2 + +} // namespace doris + +#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_SHORT_INDEX_H diff --git a/docs/documentation/cn/extending-doris/doris_storage_optimization.md b/docs/documentation/cn/extending-doris/doris_storage_optimization.md new file mode 100644 index 00000000000000..90c36c74fae5a1 --- /dev/null +++ b/docs/documentation/cn/extending-doris/doris_storage_optimization.md @@ -0,0 +1,196 @@ +# Doris存储文件格式优化 # + +## 文件格式 ## + +![](../../../resources/segment_v2.png) +
图1. doris segment文件格式
+ +文件包括: +- 文件开始是8个字节的magic code,用于识别文件格式和版本 +- Data Region:用于存储各个列的数据信息,这里的数据是按需分page加载的 +- Index Region: doris中将各个列的index数据统一存储在Index Region,这里的数据会按照列粒度进行加载,所以跟列的数据信息分开存储 +- Footer信息 + - FileFooterPB:定义文件的元数据信息 + - 4个字节的footer pb内容的checksum + - 4个字节的FileFooterPB消息长度,用于读取FileFooterPB + - 8个字节的MAGIC CODE,之所以在末位存储,是方便不同的场景进行文件类型的识别 + +### DataPage ### + +DataPage分为两种:nullable和non-nullable的data page。 + +nullable的data page内容包括: +``` + + +----------------+ + | value count | + |----------------| + | bitmap length | + |----------------| + | null bitmap | + |----------------| + | data | + |----------------| + | checksum | + +----------------+ +``` + +non-nullable data page结构如下: + +``` + |----------------| + | data | + |----------------| + | checksum | + +----------------+ +``` + +其中各个字段含义如下: + +- value count + - 表示page中的行数 +- bitmap length + - 表示接下来bitmap的字节数 +- null bitmap + - 表示null信息的bitmap +- data + - 存储经过encoding和compress之后的数据 + - 需要在数据的头部信息中写入:is_compressed + - 各种不同编码的data需要在头部信息写入一些字段信息,以实现数据的解析 + - TODO:添加各种encoding的header信息 +- checksum + - 存储page粒度的校验和,包括page第一个字节和之后的实际数据 + + +### Bloom Filter Pages ### + +针对每个bloom filter列,会在page的粒度相应的生成一个bloom filter的page,保存在bloom filter pages区域 + +### Ordinal Index Page ### + +针对每个列,都会按照page粒度,建立行号的稀疏索引。内容为这个page的起始行的行号到这个block的指针(包括offset和length) + +### Short Key Index page ### + +我们会每隔N行(可配置)生成一个short key的稀疏索引,索引的内容为:short key->行号(ordinal) + +### Column的其他索引 ### + +该格式设计支持后续扩展其他的索引信息,比如bitmap索引,spatial索引等等,只需要将需要的数据写到现有的列数据后面,并且添加对应的元数据字段到FileFooterPB中 + +### 元数据定义 ### +FileFooterPB的定义为: + +``` +message ColumnPB { + optional uint32 column_id = 1; // 这里使用column id,不使用column name是因为计划支持修改列名 + optional string type = 2; // 列类型 + optional string aggregation = 3; // 是否聚合 + optional uint32 length = 4; // 长度 + optional bool is_key = 5; // 是否是主键列 + optional string default_value = 6; // 默认值 + optional uint32 precision = 9 [default = 27]; // 精度 + optional uint32 frac = 10 [default = 9]; + optional bool is_nullable = 11 [default=false]; // 是否有null + optional bool is_bf_column = 15 [default=false]; // 是否有bf词典 + optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引 +} + +// page偏移 +message PagePointerPB { + required uint64 offset; // page在文件中的偏移 + required uint32 length; // page的大小 +} + +message MetadataPairPB { + optional string key = 1; + optional bytes value = 2; +} + +message ColumnMetaPB { + optional ColumnMessage encoding; // 编码方式 + + optional PagePointerPB dict_page // 词典page + repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息 + optional PagePointerPB ordinal_index_page; // 行号索引数据 + optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据 + + optional PagePointerPB bitmap_index_page; // bitmap索引数据 + + optional uint64 data_footprint; // 列中索引的大小 + optional uint64 index_footprint; // 列中数据的大小 + optional uint64 raw_data_footprint; // 原始列数据大小 + + optional CompressKind compress_kind; // 列的压缩方式 + + optional ZoneMapPB column_zone_map; //文件级别的过滤条件 + repeated MetadataPairPB column_meta_datas; +} + +message FileFooterPB { + optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用 + repeated ColumnPB schema = 5; // 列Schema + optional uint64 num_values = 4; // 文件中保存的行数 + optional uint64 index_footprint = 7; // 索引大小 + optional uint64 data_footprint = 8; // 数据大小 + optional uint64 raw_data_footprint = 8; // 原始数据大小 + + optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式 + repeated ColumnMetaPB column_metas = 10; // 列元数据 + optional PagePointerPB key_index_page; // short key索引page +} + +``` + +## 读写逻辑 ## + +### 写入 ### + +大体的写入流程如下: +1. 写入magic +2. 根据schema信息,生成对应的ColumnWriter,每个ColumnWriter按照不同的类型,获取对应的encoding信息(可配置),根据encoding,生成对应的encoder +3. 调用encoder->add(value)进行数据写入,每个K行,生成一个short key index entry,并且,如果当前的page满足一定条件(大小超过1M或者行数为K),就生成一个新的page,缓存在内存中。 +4. 不断的循环步骤3,直到数据写入完成。将各个列的数据依序刷入文件中 +5. 生成FileFooterPB信息,写入文件中。 + +相关的问题: + +- short key的索引如何生成? + - 现在还是按照每隔多少行生成一个short key的稀疏索引,保持每隔1024行生成一个short的稀疏索引,具体的内容是:short key -> ordinal + +- ordinal索引里面应该存什么? + - 存储page的第一个ordinal到page pointer的映射信息 +- 不同encoding类型的page里存什么? + - 词典压缩 + - plain + - rle + - bshuf + +### 读取 ### + +1. 读取文件的magic,判断文件类型和版本 +2. 读取FileFooterPB,进行checksum校验 +3. 按照需要的列,读取short key索引和对应列的数据ordinal索引信息 +4. 使用start key和end key,通过short key索引定位到要读取的行号,然后通过ordinal索引确定需要读取的row ranges, 同时需要通过统计信息、bitmap索引等过滤需要读取的row ranges +5. 然后按照row ranges通过ordinal索引读取行的数据 + +相关的问题: +1. 如何实现在page内部快速的定位到某一行? + + page内部是的数据是经过encoding的,无法快速进行行级数据的定位。不同的encoding方式,在内部进行快速的行号定位的方案不一样,需要具体分析: + - 如果是rle编码的,需要通过解析rle的header进行skip,直到到达包含该行的那个rle块之后,再进行反解。 + - binary plain encoding:会在page的中存储offset信息,并且会在page header中指定offset信息的offset,读取的时候会先解析offset信息到数组中,这样子就可以通过各个行的offset数据信息快速的定位block某一行的数据 +2. 如何实现块的高效读取?可以考虑将相邻的块在读取的时候进行merge,一次性读取? + 这个需要在读取的时候,判断block是否连续,如果连续,就一次性的读取 + +## 编码 ## + +现有的doris存储中,针对string类型的编码,采用plain encoding的方式,效率比较低。经过对比,发现在百度统计的场景下,数据会因为string类型的编码膨胀超过一倍。所以,计划引入基于词典的编码压缩。 + +## 压缩 ## + +实现可扩展的压缩框架,支持多种压缩算法,方便后续添加新的压缩算法,计划引入zstd压缩。 + +## TODO ## +1. 如何实现嵌套类型?如何在嵌套类型中进行行号定位? +2. 如何优化现在的ScanRange拆分导致的下游bitmap、column statistic统计等进行多次? diff --git a/docs/resources/segment_v2.png b/docs/resources/segment_v2.png new file mode 100644 index 0000000000000000000000000000000000000000..72f30ac2c17852df9017f752e83a96045d00f296 GIT binary patch literal 40991 zcmb@tbyQSgv^RWaU;u{_q!a`sln&_zQ9!yoMWjnQ9Y8^(6r@8y>7l!aE@|l+r5owy zJ9zJX?|a|%t@r!qTMHJ8nddyuKF`_DZ~yjh?-Q!3EQ=3+00#g7|E1h>bpU`O002V> z2MT^d@sQjP`~z`Omwg5l4N$Fv|6p25DMicen4GB{NloSa2 zJ|%Y8IZ}^_O8s!$H8Zu=RhhT7;gCP>gcH^ zJW8|?GZ*4Hv6CSZH;0eFB}~u6r3J)`4&K{~cr-VO+`IHM@AyR8cA72;z#N*d+#63d zJZ)_NG7pz7b->W|a_Ajo?{wgv@l@?a@qI{%VHwaAPshZi7%cz$OHMpM04xw7eyjW@ zEdeg~BaMo;qpi_+Kho_16Xy=nWN_A0=u<9a4k3Uf_g~0$kR}$OC*lG)ME&ljbJhc$ zjTktzUeIzJRe{>Nt}RRr)hI`A`D z+E=1lywJVY54g?yvBmz&1yoF*EnyBdzj0Xt^xl(36=aP60@u%bxq$qQ1#lGlpflcG z%@a~zaA3oiv;&kDE?Uk*Xtp<5k7DJ?f+0>kIV}r3*#Izp9LNXV3tBi`soTBUFWS9M z!+`DQqc+}M9CdazjT&qSP}orcx(1dX%-C4~gaHZ1^{6kvHY}NU)8goh3#^ig9`>oD zF!!1dk2$|W9APcbQGe?n>dR?DfRx->vj+ze2p@$5n#)y33{%NiM;IG0xY?ZX022ev zbsbcu2i}{lPwWGr<^}>No{R0m6M%W%31APZJ~VyKjAttg?q#HRY1$?>I{G;Z5L{@X zX)MT27wAvD`QVjx_9NZ=h)**u4hjgeFDF;c;Glk_OKI+q=7M8u@(R)=-;e^RX=7pF z=Y!p6tN^0&NQr_h797pONMDQ%kUx;9C3H%5zYECjXymF2z&+rIs_dhr9X?U_u5)CAPZ#E+>!tRwko7oI|d(UF`0nv~C2{ z3}B;;={>@P3O*BMZo?u-QGfn0zA2q;^ha3LMMyScea6cS;t zjJN$yPTZsHq?QCK;Xz*}kg<7%B3HC3|rX1z-ueCNx z&uwppHV!^vuZRB6=kUY$it1RVl;wX5r|tsORiV42gYIAe*$)J*1M z6}i|kj^`)1j&ptN4_iJ&xxYChbpfP+RaAY>BSPH`aPjo|w;Wb;n54U6c6MN)Y7JI0 z%X%T++bg7#AAFkO+wgLqvWCm>JWzFSwDRt({D!c(Uyc|%RtvatZh|^e9JXCPpy0KX zZwmuL_Q`7|RNs=8aM3@X2}=VIwoZkD+KlIFmu~EM>VVw==W7Hh4ADq$YN={95M_L< zXG=ZNC@zZL#_b9ug$2&;OGLxhx?DFjSpV~ zhI&l~&ZE?<(fNHQV=rp?s=SoA^}{^3(?=VnIu42lOoVNm+REx?^L|s6&`Tn}1s+8p z3@}Pkv|tU<&GwhH#Ju#|6IZ66ys~&33f-p7MP(YQ%b4{nc>d4(cmyODWaekQ9y6I( zrhZsaU{4oKQ42SJQupiSc&A3mv5DV`MVp_*{AQDWp=SbhB95``rtEkp)Hw z`JV30P*POODlL!J)1*|1VOOCZQ*MImEaN9YzgNXYKBCB(~|fHxSIwTa@=2hM1+M=?G=L<|35G? z1c!o{Tpz@tT9u4vbGH%Bh=>mTOe2JLQX9my&15b%7BF^c%F7rN5^*tbRDjs}_S>@$ zb<6<5t)A{V>O(dG5f@QO6~~=6Ivp^Oaae1T5<{f_JY$u_q>5$Dptk>q&<_AZtINA^ zjQ?pY1Vr&6FogbZXOj!t&}!rE=~HZEp+aj(H+b-pWUc65=`BZ(=Ouc_UUZK3~28 zAt33_gKjn*9$7%q^VcwNO^FX?+e`prMvZgae{~>_%kVTo>?Ru= znbwF71H-=v%pp$pXYB#`;HJr>!3Md+3%D?1H2vca{PJ(qJ5i%ED)&jVwx3a#>DSkx zPtt}OpLkUd3c&1tEXp7FT#(CCi8@~6yIhbvr%Miyd=6j_!FwCj)G39T;t`^gaIKUg zI2>5CU^!dk{DDY%A(CHC3KtO}sOuZ^dR0)BHV3XbY&#N^I8=*g2sC- zgX>6*pA2>hvGbx2Y5|S(fFXwo5a!NLHIpKt_<^Ra3$7e5jSBh!LuYwnDyMWwV^Y<< zK7Ixz(R*(y3MGSR6ZWd@-M;6g6CrYponI_6V7+@0K*hB8ZsBqzk;hVBA^oM4?4nNY z3qqJ^kZG%l7)PYpi|<0{ot;oiBd1}Hw5i?zsE{@q@dev&CCRp*RF|!Fq>In&B_`Yx ztL1$&5fh~vX@s_vhsq^dk6dw}bT!MCi55-)G#$IxNq_3jQjIW%*MN4K6mqD1_d}5@ zr(hy(8O2?|#Ba{^KuNb#+tm0)UTjEf0zat}44l$_v0yK7&IkUX4Ia_g2PkCIax>?}*>>^^2M%a=(j*fH)0<$C4ZBX|;AVCtbcO z4EX-(F$!t8asnN`8mi;BAF~k?0-Q?Ir0s9m)-OkvNTY{<>(&q>$D7pi4;1rSUb{%; z%EU-L2?u7?i@!OM_xZQlPO4oUhD$Z6h5@G(z+ZZ{A1MS!C}jPB8Tz=0g4eGjvY`}KoBQM~0QJV2QMB_qj21FXghANFaZfW7;cJr_se`=H<)r<+_PPoSck~J+M-5UOo}<(Qa$)6gCHvE{j^gB$*3-_Ji9*$HirHCoYG!zoIBCDVU9bRBXb~673Y;Gi zp(HNFh&$-=)uRQ|*BLR;68g`T>yv^3TX1)Fk6pv}S;&L8&u7%tYi!HPIpEa@BU|tF zUo6h-^o=wRnie_6eX+Pte0%+{Ax>q0F%H5dibmK9;#oul>B2W9^)C`F5fa#fm=?bh zCn~w0Ax!?dcHAr(X-@{hFwOq#2gVkF!Zcm70?vSq)}J14&rc0JjzIW*g2_wO)0>Js zt9;^&FFcXcviZz_&NT4i6BM8TWH){D@L|Nz5_8JvAmn4BZ5&;r$J-h|$x)FM)0UQB zFl&riOJmV96*+ZZ^%FPU6y;(92mD%Vn}PsAAE0S%)~s#7PZl!1FIg7Oa`y>hjHKkA zPw%Y#YR((2467$81(ydL*{^Zt?zPE4r(7~B8{>NU><#Y(rcOCB$`>W#Bf_ZR80cS+ z%Si6F;D~j?-!|WYy)TxL+G11lmh+^ohzA$D!b_h&cOJ}wiSr+5Ng7e~OIM3RGTXd_ zzxYT1;NUQQ#rGGCqYsJ1|O!!|J`LCdoI?JOi{!!MF@5;SzKu zF-C9|Wqk!i`AL|_5Ct<{V;%?+vGND375kb(!1x`|^Mw9nUK7O{cpX0amMiSaBgZH4 zolt0$58Ey{BUZ)DVNyb382zOR%oA16K zcI1~M??o)=gjeJuzv`e=z9uSw>>zAMP^I*t-4?^YV1AO8~YL6E{ zCV>YCKF35V9_oiKb8ozM+AeNlw(`KV`}S&3k^#8UVWFeV|0- z{4fP)v8_z-fdLJYV8miRWtXvNeA(e9RFMECWSL4hYD-f1ciN?=+(_9OL?f<%!k9a6zrtZJ~K#Jeq5QYK-ve%ho?7g+lzWm4qN5Eo(L4FkHmP3TOUf-=%UucRUhDtev|LT@}6+?EP|J!F9T z&*kvN_I%%4JWsRi+AU98$KN$hKPXI>>p<_0{@T(8! z#8m|#>d@I)y07cCFRLuJxK(=<=B;qch4m1Vb8}+W`(HuTt*`947gwct8w+8+BSjdg zqlveNrDaa7Zs{+Bja%X~P-j|ZH)bV>ZP%(3P;`0mv9Ebn(h^A7lu8H~fS4KqqUMe5 z@b7>Vmcs)@yCMV(ZC=+GIg)&oqYTbs|1gK(N`@>0G%4}=L!lzC3EfZ^EW$vsGy z(0rcABTzY_@wsXv#(aW-?&P}KUzYV`$c}XhhgPC%kFhtOh?KIw*vi+OHprRjxTMBx z=$Bhd8VdIZ9o8ObXu;%Swz_g4W`h??!cgVZ)B+lG=)ONHQj}J<9~)VW#zfsD0iA$;$@uojSkSj@^b!a~(zJFU=E@+<_<3CFiX>|}n-7o>q?J}ICG_VWc5 z&EXVyLM@3ZxA68p-$#x7Ul&XMzC)hDJzQ-(t5dTSI3mPg6^~v?BZmK)3XDz zbA%hx0fOLYJ*X2$m3|GdRp}wEG@iFES$mJlOH_-HvIQ-m970s#08nW%_mb*?&~^{7 zEgNe0;`9>d`t<@e=CUPGqpVTCQlxhM#*#z{lO}`1m3cok_3Z-0sh1qF!jl2wJ~6_j zbSzT{VMKV(0jO_g>Mkw$8q4l|`Vc^NxuM{V|6wn6l|C=F(?=ZL+Ir;DVd8HS9OTKv zB(+VzNrsc^&69j^A~J+a$?ZgA)t6e@hC<&BqfXd=tppqBGNrAt>x6d#PTWO2jdm)a zo!kIr{LGp7!&S;9)-%0`YJE(gWcr1h!biSwqvCr{&v$*hAUF<#%+H}g*{{W`~czDcr;!MisXrH0ni7`P6z!0KJ!u_E{C`llE{Lie_){BdhM08 z5m1VGA5w@jrN=;hb{kI$tLL?g>YYK@cGyv34uC!)m4l$3Ewr>~VN-jeH^vGC{z^eo zNhpNgF2%?DW{KAHc9?<)DP5=i>AR#0OdTkpU?d;_HrpTCX5zsfR-_UV$C`i0i0$tnlyVj9Z7l(DUNB1|LLu;8w8A9#Xg=pijF z?by)dV>O;KUsO~ie)jbo%wP!?JlfZ+`??fOB;3z~oq71fA6#66sSwML;_rfRs~kyv zz*rg+$%y#mKmsV^H;e_(U^{tucwp0z3@HW_jjEWG{-K_r2T7Rl+qLFfhkzg~{CXO^ z4&wH1w`S6@`6`aB76wRDk?d)K&+q}N%+3MvkZg8ApM|!`qKDlirh}0s2$VA$v?*ev z!6!Zn#Pyq(wFRyhe%+x?vj2f7?f6S(9`d2r6!@7g6_E5_L;j!vuIh?l_(#WD=bpk7 zf}$@B5se*0fURd17c(D7ncN|4%KeyJxS|^MepWTNsos#OX*; z0&yOAfT@EOJL#Tp4K_;Om^EDfC4>(BOAQa9@!*1&vKU-1r77%5_h!_VimZyz?7EqJ zypQl!@MAUy_v}$Fq0%$v{`|vJpTIfCmbw~cP;U_|Dt*z|0ctxT_19Pd`Bpr4?vn>a z1n0XINL(ZqJa}xOsLo&7?v5LT>Hg(7bV%CzxGg*Wl zJyIza-cYjsv`vOo$-QWKE{`MuIZY@&{{xnz=&S?Zgy4ck|`&*lg{lwNV-gDY(Xg=4|N072j}r3GbX9fYcq+8xA**k5h>7GY`-AYEkdqaPLd6fyL4Kd^Bl~X*!^$6&YT@t-k86UyW z>7+AP3-9sCh`CqEZ&cvvvKo^mFAzWpOb!|r`<8h`PB}C?!>4(+Qx#9zjG|8!K%PN$ z!*t+~@xd!oS81$2O;;pYav9v=?xw}TSeqi+F;=MildOpLisYqh=C!+kHqnMfn#y?H zFw5@yI+~rNcH;*iDbvMHoMB|zp*2GbeJf$_TEfB3tn8XHG{n_6Z*;fUCP1{^S)9<$;l6TxBX8A%JdBH6$C3w1eRVZ3% zFmwOalp2L7`WVZJAviBKp`RqO1>ERVQiSAV*TR6MFGfY^iYcY#i(l5`^S(rJuikgj zBQ4SvWjtn;>p@COdpFlvHlCPb7!FH_*%BV{i0x zNq_Fpc8-#{f^3b^dt}d-He_#H8m?b#6uaRULY%@q;(XY(TGIC3j{1SHWbS%zz$lbM zQ_D}rNdBkLvc-*pmt^&jEhI1S_q2pBf&>pG6YIwKkW8C~S(8$f!S#FNt3qe@xol!a zv(+1v`h^0*x?Z=~8r2JNlFZO&UU`7wKwGJ95jW7IbOO@W$fM2!IVl~8rqowTkKSg# zo2ApB8wn~gjS&ZBZym3-ousHx5t0wS*l z)QQ{f6lH&`G>~FFd5OXnGm3wE>zBf0b9?I4V;X={YJ9$el+O#)&Sgo*E&v?6ekz*A z`+WW!HNUMN-?viVL<>Iic|FZEux<#N% z+(pKMjbi`U$=vZTBhZy?kcI1C(UAr5_s$mi+ypLs(^rzhjzT;{AEcPs(xBe#B41Yt zSYrI#vKFNQx3jT4R}`#bg08JUU2m*Kkk*MG|_duA4ClXUmQ!GYP{j@1;pe+kZ)E(yOV6^Q!KUu%VaA+_7&CI!!; zE+Y{N=$7LK3Z38PFD3rpiNp5OK_G9_`a$?FXLdqjg!keWAl$)ZKQk65NjvrI~jT|-t~2L?F~)p&1<5*3q;>@*lBqD zt|r$hJNqZOY_=|n>o!=H&nYD0`hqZm3U(eEx}XBEi;NrVN+Q!DH}eqMfU)0E<>(XN zy*8RtbjPYoI)V(Y_$A}CC5*G)yn_Wu;c^`V&rRATDxaTp2phGZUgzy~)?~5l=7A^b zIfDwSCKPQCfLgQep&u?|?^1c1o}s5>m&cx?thp(G>h(1tBNdn{WDkvI=^>fp@m!c& zuI$4a=Xj{w6#x}xDnVYNjFu}|dr$E3l9;U+v6bM$+|(GRVrKzb_AKeRh>pTgj~H5A zVn1SR(6+~L9B&In`BPTHk40xMz{59l*qmbQ1TIjrJRQDFY;izg*dGAZXEM#yNh4r< z`8~M#so}rWPh!nK8NLQX+jtM^+JcI$Fca-8-;ixl4kJ`Kyrw8vPU=5xJ%pCbwEcA? z0m%^^y4`bD9I@c?%ZJruW#A?Q!4Gfn+HGvEJEWc<2>Z{JhgxLww=<7Gr0`jp#-N~& z!%gP=$YqoI%PO>+hlfg%sKv~{Tp9P^{Jh}LI#P@3@0Z8NSsotl?nyI8@?}rb~U@@B~8)J@hF;+nsFkUs(?tPF4V4#+z5?h<=eQ#1RHeah<*;6s4 z2SBUmZS9V~JS6kLpKTt|N9(!qt^>La_Oh}e{85i70->a^>lMfAm2m^df<8$e>=cT3 z3syo<{`n5GqMffDu4PF^6uy>cEh1RdXvqLq($CJGmS>PD_xc9k<_4xWsBob|6*#~@ zW9&xjSTO98ojIu;K_#@`r_l6g8uKR|a%qn>uTp#Ay@fEZdDwT%mR8fypnh|9Z1|5t z1UHG!*WLKb1IZ~}t5=rOAh$b8ha1LBoCOZ5d%uHaagMKpy=c{U!&vadDe753Mr6Gcp^h(x0Ja&HncL6 z0p|D?u%8*aQwwW`Z=#`yj!2)r0xvV~(w8~byZwdG>N+xcP=$d^9y=znZMNZ&l#uLQ z{=h^_0Z8TmxC5DqYyyL=*o=)Mf$n0VT|zL_4gLdW+Vm8O{xMo{0SICPX-_Ra*_EM# zp!&>1Zs+T)+Ovz#xPbAmmKlT7LL__xUeR!J>k!V%-_k}!*@opVFS zq*=yO3SiSSuY{Q$9B0CUC%l)oO#38p)L)kQ8_&Ef; zU0B1Kt5C|VgcCx^{Z%Hw6r@J~3%Zf3dz>e(cp4J-$aJ=OZ`3a=Ggmm%(x-n7YbaM7 z+3{f^%H0}E_+kHMG@FE3jiBuCov=>xZ}K6Nr}kwG+6KJah?b@aVgQ?BC6W16lDs0jyM8$wsLrOe1al~G$B(FXsF~MTh$t~8af`?z#j?X^!Z#*h3a3lWau|j)~Z{GKZ%Bvql)8hZ)Z~cN(Vixv5*3?CkHqe3im#5R5&IGp_5Ae-MlU1 zQ)|XGu9%Qelv=Lb!&;EzgUF8U`T2!z9)=(_L*|2V2?f0Cnw1CPLE9t^O_eYcZ zYsm3+V@#ud<9gxvE#s9RMFN1by!CVs^44Q4-S=c{YixC%grVmhSQsDLhNSQwaSVWD0;yeRUQB}CUvI4$vvZ3*}TTpna_ z-rK-#>Of5LQ5q~chvffCWW9BOfiXlj&ot=sm9-=knFY6VZwm95vd(~>YG)#WQ~{fA z@DTSX-tekP!YVNKUXUB{#uCEUexT-k2u&K6s(gBFdxHvkgDRu8EH77LIK?i1#dW1k zou950Vuy>aZEP0pZr|2WcQ;vI5_Wo|?fvTsn=ZGsVsZ=S<4Z3t9-RKcEWyj3;shDq zy)A039J;%?g?jmA`44-YHqpzd7>K_)KQA2>2v06G`!#uTD^vC!ar?8yV`F3K%!$;j zATS6G8@=5iMxsXJdXPe(;e2saobPuK%{2;}Q~VnStU}fF6J7_QvG3GFR&5CSeMSL4 zHo`3%r>YW(5C`J0$Hw~lPS{zC>&?+EGg#COe&|O8Byt7;*t<*;o~tN~KJ1Aiw&KT7 z<**i@#)5n>RG1Zv*x1-QFSFOuS}t}tijDY7+}BaU1SCUJ0T#>HJ1oUUYE6!&LyA(Y zLRRWSp?|@BRV1>r)q@xTYz|5d$(SwCYcYZew~Jw$F%g;&f z{(px0U^=FuNT@fYCTX5$Jn3yZf&h-=fq$5^X&s8VM_*L%NPTGA(`#T%8`22O&dtq% z9frIb>E-1v9bzW{x?+uWyNVE9O><0O8$$}F?Xm#^ptPYA|H;J}1m@*9gWz2cbm^_0 z=dzd^!zpqUUcP$FxxwDQ%NqL{H%;~Jzw+NsT!h#0ai`gMKXd)VC)LtW#h1>HrxBBi%kfT=AdF~GsL^DeMs?&o3pS>Zb{XB zplO-kEAW&V#0Ts@wEBT+rF3xGb)cfcM15GnPxS!FdN~0o2Z3tK+VJlEwtu0z zxbe8|XxzZ4m;yj?;lOOb!>x!hnS}8s;K{9268;}#C$oo&YhfzyH%<$~?n&i7OD{v- z>^xNo_c|`3Hil1Al6t3EcgpO`K8w`~f{%(i;Wd*aX39TGfsGm$zYWJcwz-Jfj=$rF zCy+iRI^43_m%LPT37Cj-TvM-r+@+KDmfr?7U-&dp`|QFEcW?n)K~b(y96`6!s!I4Y zz(sg6+f~Ps-WSh(7qTh*tR#%Y^&HPZ2!@ZS4RE{FO=^V0p5!iM4i9S(I6LHKYD?Q- zbUK29i(%-fXkPl%oeaBtGG)d0amcV`2KAx5dtm1Gv@DO;9t`0hS<}@^W=DTNfBn%r ziLCHhk<3s!pq*f=4Z86oxk?V(d-pQ(Gs4 z?~i4TYzqII9E?MtZjS*tPXBralX@|fLj|JldAJN)PYlSC+Q$yDOYRfuh|Ay2zpjhZ zJZ45xs9VcftJqKD*^7uX~4*TVickEyB!KzH>%Ctf3(9pYo zTH6gtz>F^;gkp>wY%Hbv(|Ylz_f~PyN(`t}TTzy$3|;q*X2sVEM3X^~biexKz9B2i zChK?p(sC0$GfYSb10jp*;I3`uIj#RA$vGk|jn90KAei`um;iQMucNC_bg#|F&5xw4 zUyl@oMGBo}2_JWfd~L7%)^X&awoQcUk~?8;^82eSHQ`Sgg!70)@#l+MEx_wzqRWd) z6*PNfB;};nz2MTxd#FOy+E|y_;3}pU3D+AMWZ?55yFN1%`NW4E<#yp|8ILi?Fc6%d zVaun^KV#K|qrlxE@8?62hq<0jAl{)F@xN`BGXeJrzWv-@fcxK7Qq%@Z;u|04#RlHe z34DaK+}4R_4ugk?x3^7sUD1zXbJejl_vG)t{$$2{6Sq8*kny3k29*~oMLt#>HZ*jbyn5%^Xg+=8tBkYj*=gD|Fj$P0Fer>JaQ>JhQj;CCpY>- zaKL!mO+i%|!4GP5MlcT!ja}5joru$5D-WuJp=i|sl6zRe1%sw<7 zd-5*%mhJ7-A5?ENH?SaAwl|1R#Xp$We=gL*Ll}$}p6L}$%}eI3u2jB3J{Ny}%UX*l z2M0NS?NEh%cbCgeL!ZYqt`EIZxzZnkBR5VIu^>)C@PhHYV?p8UbI8STD<^~6yW-cY zg&~MuH`QLIoeZ%dnoC$G5x5tEqj&nxDK zGlR77=d~>{SaA@I<*h}scJ*`nb<>$zqEbyUk!zMyTrDSJNAtjO=`6J5LJYNY zD$-Oi;m{A%7{P=jPX37^i?r3Pd<(nW2Oqg($!CQJTsd0vlMJiFdrH5R_JchST3VqZ zg&UO1qz0xVQOR;H=iKh#($-Ay@hWiu#RRAX-zHY@cg zt5t*R_Y905->*swEXu5kUUsY4D!pzCwQ$6|$VwMu_o&#F4P>5g{gY#k zWu3)+&;%8JPd@W)iv#0_t)_9}|0wpLRXed3WWui4D@V`F1HrIc&&Zv{ly@s?~ zCVy(T4q__9`~MHygYIqoO?^^hV$#{HMr#k7Z#bDZG4dW1TjpDC(OPbqmfbr4zY{*C zRAH^oga2;!`*nZ(7ow?2T*3M*!}|Ao+>idnyVOa1In@OP3O~XA`LKbulZV{i)BD_` z6H%#6X0ze=-#eRb@(YU8i=zIT*NO^q)ojeS@L_`#0<5y&uKigr8-TB2qW3qJmoIjy zbUkFxGG?&A9y&jmL*vOmD(8X@sK?qd5yC;->vSj-0+2i}N(wD*g_?BHtT>%pp$3#? z6#58f_59J1V7H;1UVum+VXvy8w(*mwVB#0}Un4&DUf+d0!Fml+v#{0YskiD-&~nQd z@oOa0!jpj=heh4KTb^Qy*Fm~`!FVI8YRniFY7=}9B;2W~B30|f2o4|)_3Rn$U*{8e za9;uAG+Gk!kmU2QplN&4(~HdnfQ%Re%p{w`x}JfvgIdAcJB!U&!Tsh=q@c)vL1c36Dr2cxPZ?!UpYBN7u!ERJ%wAG$oKAzbt#(*1SNdoWj6|j9Ha!oVy&AY z5Xq)z+~jDjS0yO!InziuFOCX;GPBZNQ0`KAs#aZgKUD#%J&JF&ENed^_>N8!vTk2C zxCg|YU4S(*)?V8iRK;`_6^)C~5a6+xd8Fdo=#Ir1*-eLoijka=BQY=vN|mYM zy0a7R^_%b?LHpt!$E{c0OeX_)yj$w6SlCjyV7eOt^&z~ecN9OP+YM9(+SoZ;-) zcy7D-)_CsrPrr{LNUa@Ab5&3U<9>m{LI{XfE`R&xt0o%H<+)7=YCUYN@^eXocuLgL0Db>Squuyjk1mW#abk{QLOxhI7S>2I@-``{jKQ*q)K^3q` z^bIN4Vd!{um=&SYcsMy~@H$EaY(#9{4-C+5jw}&0PEK2q2>@I2sO2xeGHg7K8yFBwk;kMP0!Kz7p=i5vBWDRey$BRP-a7)dM|r+ z!M+JJhnq>enslb)BVbtv&Ek8*X^G>`onH649(qLkQSq%%OmGZWH(r%EcZJ8B7dOuT zxVF9_!p?otI~Z^16`=+8xUolTW6`{O%1$xlun*GhL!iMQgtM*z-_x3@C~Aq5(#2IV z_tx1i@Ck;&yVu102h3B;X@ri?Lo76-gwN=S{3z8Q0N2w=vgDjbtj|R)|8zjRe_vdt z-qlp~?#(lbepDfTn?+^%I)FuN!~)$J3WM<003BnmEUncSNdzyz3^6nb+ zUZTK2ZSr8C-(3&hRO$M(bZEdliPPYhksE^Nr}vbJ=>}4SG=Q^0M?Y3tLwdFN(qQmHX93e1orFLc{OU0Z~yLWnT;z+)mTHr~p z#A9&ze85=ZP2GL$u$iP7zvyZht}uJy#Q&CH7P&UXP1JdJcYh zXufTJ8Tw?#OK^hFhWpH`xjOu<7UNNr@8Ctd2#xo3<;a?kL_Po6^~ftWxTb9JUqvCm ztL_t{d>y#{V{OaXJ-2|J83GZ*%pRVb-)xVHo+p0cdD;VKBk0Jeow~wM4`!80TBbZEin6 zQ;sD<@LI;;37~u*^{4!mNMyn}A4c`AkahcvOrMd^$qgs^@ACfSESD{EN zqL_J)>HDwdZ_bh%m$-68uYINV)xNQG}zlfsZJH84@^VkO47tEnpV=@I`V_lq;%F~}u*;#C3VRcD^E04XB zd-^#y3mCC!8REta*m5I3rGEK^fp7s#duHLTguW-v#3H?43zuSGb$M*_;ccaI06n!< z*?2U&fT^$R#G~C^q5VYw$~Cv<$tgaRc61Hp7Zu%VKdf&`t_75WY2vJkNPI3J1Z_ipZACZ z=zE^6XxV%GC~+axyMxrrNGxcHIwj3_2_GRx!4Zv3k!H6jn?R9zf zw2-wPu~gLK_k(gA_VP7bz~}cJWf2?cF`O1vg2a(khA$5Rvcuk9xxR+guxHL zPshbKH2(bCJzM*F--0^TsDs^!_mugmHJw5AHWC9EzmJMGj72HGEhOH3+D6(Q^itLJ zOg5+2?yo;K4abx^|hHwH~C0IW+!@QAe4x!kcl%L7EiS zuXacTpEX@;lB4QWm+@Bu1RrL}4|T@yn2LM`oy6ZZ{omvg7^bZCp0kfGl?B!<@|;vl zlMLWzNwNWP)_vo_P|FWmD0!xrX(n%`V3c-dHfMiZlX!{l7rwk_E1Sab7VKV}t+WMS z;$8e-GG$LS<8kcyvMo)>u1G7V6iah_y3#X&ms#=@I;ib1vxhG}?>|-IV9j05qA7c& zz)wcww=JEa`|MwDl9bFFtX{ciFH0VwK6`moalyJuwvLEvUoXV-S^v>T0*HI*#k@pt z)YCrJ-$|JEKb<<35?*)2*|GVLCWrK~@ay%BLv;kR@}QkRdQJ~+cK4cT-nI7pFFLos zH}&5QhV*%}qd|pk< zR?U#qfW01BLdS^PDJ6CD{xMiMKIZPC!bZ&8&rM2Wy~F559IdC=5{~Kh^B!*a3(POi zj$Q9~yrA+LarFIrTaP$ARvu!r-OfQe#PB(kjYRafD={0GpptmM#I8QI-Y-=#v{p%? z;?^&71O!`#Q+zc7@88*fZKE+S{+KH+ap0Msbf#+++-NZ?hwjDR95a|&K6%$>Gn0wD z`bzp_{LL%9i_SNHvKitkm-nAO^=hd>^2fi7Ip3}UbZ3UC8_o6^SZ}A9eJ7o1Hx#{! z=QaJ=l3*PPnY5ve5UdiOXAscVt`ZYYfAW7}UVDA{*$DrCO6$ z?PXT8{_A;2hIkA3K6P96A^-@)GTp!o7d|t^5RVIxDXeHW2}!r@zW>aq`6>aKNlf3N z$*pUAz&aG`Xc&3S*9_k5>dkdjW=PqwM%gInZ?DAfgyHVvhZ3(hEoPp5S59GjxiW6- z4M%*Z@H@+wCVu?CW*uj}?@8$r{@NAh87LND@@j$b5oczZ9xGP&h{NKh{>hXqUW}IJ zs~-su7IeBHAj>OG773k~(@j})lp|StP<&s)KNItcc%9l$aWz)Qc&ZE+ih3E|O;z!T zUh!!iRJ0`xrE-^vmV`<_1j$OqU(od@%NCY$Y*yS7ccP!$9RJPeNpD}#whdziOCFT~faq9O5ltM{E9SRL`2LJdt1zblaP z2mRA`G*EhzULoau|HkSqJ8_y0VUr*;?UVhSeQoUTp2|c?y=9~EE=C-{LM1jURIVeC zffY@U)ZBSrwrUf2on_JL&0{foj=ug@YF8GGEK6XhAw5{ikZOqg&n3^ho3(HNTOLvF z=Mw*_XO5aH?Q~uZRo~`dj?XNlQX3B2()TA%aG{K&tQnT0h2rewcgOYnvKhXyMfA?I za7*&*z0}srgtinWOtg-Ns>WjjA<<#2|It74D&n-lhqayu(a>O4%aGOZ>;=!$SP}12}{xSQ`<85Y^ zP3#G(xGNRgo{v|duY395c7P4&(duUY|7w^L-xJf;a=;qqZj$L2{(sU!ziIntD)2=J z7f-cybHAybmU&+IwiqiR44HZ+R6$c5F@3gwh%0J2DASi*r3VhXK{Pd5h z{`Bo-DaRB)u0%m=Vw&p0p5RgPn>mjZjuJ)sk^nL^Aep$Tq#yV0 z=K8H{B#g=|G&~W}@wmurd=LOqg@D)ltgpj!I~xi`cz#RiE*NSg->WJq#Jzjab30zN zJxR0J$DLjC8UCQUMVuI?{!InF&u(M|y`lENZGpA1G^un;-z5VCE(dY>z<8(6Ki!h<6oNg^BFwai zf+sqsHe*Y8#_Sy0svG1J-Vt6G9+6Lgi|PT5hCWPk=21pa=v1}^1o}0{DC4-$&jiTN z-&a(q6dBQ>`N2+J(-T#fkm@%D5%lO!W|2u$H+v#;!{37e_pcQ`qiBBX!|vhW&(mpe zaL?p}cnqfaZtM)%{li_RTX7QPN*fjgV^ zD4}pGi4b9w>ICR|c?6z)-+umZ*j0Yx$v7L}U}9tWX}lI$`?QzDN~r5chmp(aSf|r3 zKs%)F-nUQdD|it{Iv3S%Od9$sI1R{bZWYi%=cK$X{bZt&%u-TPMV#hlp2E#v#`Qpr z*p;Bj#6QoS1_ssL`>|0QO%c684P8`D5B(!n{ky!8zlx-`@G;PnjM)KOw=WAh<$u?? z*wSbGy^3wh7m)qRO*ub3QzrywGBc{9GKw>CvpC@$i&yX`IMQmQM++pkM%*77YF2KF^!5i^U1CN|n zQ{7>q((OkME;!D@a9)s^@reG<^SPG|rUM+@5>cZ2a=yd z`uZ1b?PBB9*uL=8!hi3x*wTlTtI~diPGRkV?a45e35KRm2Zw zLB{(3F!!EeQ8aD0XwSe13p1j!5r5Q&21EFvON$w3%V zK{80rNpj8^b`8(_yzlev{ax2S=j{FC{2Cp4x~saYtL}T%TI((e2Z`DeNWrb?jrF(Y zfr^yoj}FK}v8ifWMM3|kt$XxeAL>!Nt?Or1KBaSh&&MfX0Mfa;Gk{CY@M`sK(+ey> zZ1pJpk+}@TFCui^PQIgV-dB)gLdx~(0pqG>;R|91uGgnRuh!l|AJ3f~uQ8a1m9?gj zoJeAm(TS;?yE7-Uv?f>joVPbvlFd~;2ftWQ)6vFbNw4eGQd2Nno|eOpomDk;K5@@5TKC47z#;l+_~(c>Aa(k4 zbZbYf=h5aCNLpO68KvZg`JJ8C1l}xF0?8*naTJxdLogkg%1~jDK#uvvOGjHJ3#x+V zSeBp_Dg+4HUN&OEh8N2`-FAq^C&m<>e=OduK^5^(tgL^fxJ6FTNOpbwJ&<}rZ1Lne z;UdnPAG-@Y=HC94^*e3{5No6BgOeb|WYqdKkTU(APdZ(5x-G@wIc{zBO73!O(PKU` z;`cdk^dOzS>x8Zr*R+pQK$6J2@hD*R^^W9&E747Ssk9#-#_vh>G2MenT5U&KKlGR^2Tl2>?|7B zmjMb-7pC}cLDKWMkMe=aUcQ8d+A@JMrnDJu*k}Cv>cnrD_SXAwm9Uu)hY$wt3t^*5TR>+ zjzJTresslJ#pBu^;i{&}9bUK~KfK0&dJLe6&Po5eI^4r3Lo-Pn>sxgo{t51dr7Zu*-d zq`H@G-Hg?B+2BYFO&v*sp?UF4f}dKbMky>c-(nzuxzH~J&Xh|x;H3#G);}7QaRb>L zkA89iSqKw6_P`LE8Mfv*(%Voc+)iQj^iJT9t8}^kxizymSEw&ZcGHa~YUO3X|H2;8 za`BlX;948l87dI>4h)QfX^Q4!iK0o0GePc)!P5r+Eq&w#C19LdE9u(&soH+>`DRhH zLGF+0FRW~W-BqRsG&MDLnIiLl@q8GN`jQ6gPeSBh^k4;$( z-6`Z>G4n3ne9KtlnI}g^s>Q7M$GP#EJS0TCt>;!oWKyYfxj{}(Gb;D;fLsB;Pynkn z8$(jwO;OH}j~0o^6fC4(Q4E^*>0xu%{Pf&Zrg#%{c>AD$BFVJ`zww7ZSompY`B>E;K`$ix9N(}duA_Yl22#CDD-8Y zeqCt$Zb5MsrSwcRT7}8tw=^XRa6Ww-(wsI&PVaxsI`GYkQmAwNH?@<8%}m+;Vy^k? zG42mG4u5IXgf-Qp4vGj_{O0ornU^9yKGR{{TJ57`_H3jiV5-x<>+T*bl$|`pbT`=G z4znUu1USm{CI>|-eTCFqV_qF{U*0J)k9xW}u+|<@#L7jJC|CXfsoqZ{o~x1wAe_LQ zl%`B}uHPH`%%mUxN1g+hce$~A6_5Ia@#XSDl%^eH`*9%&u0ID3SmEi-FAfueOq6ik z+$|~3IXcF{j@cpLM;}pKo*2mS?o|~Ou_&59r-jx0o$BpZeCnvqmmpCM29)_?mj~ z=gt2l^igso0E>=kuo5yBDsP2e5gkC?~26S)Hb1r2e>WTSF_V z#p{XZv~~9qI1kc$8MSjEaFtL zg&88ZOr66)E@zO2dw6+OzLV?CYQUO=Lyxs~$uVavI0#Qoih+gN5SD79Ji&rbehEpO zf3J^%kfAq1>&e4i?T-U_S&c>b78x_*RU@o)xK}bv~WOjmmwr= zsw#C7D6K1P4*-FYoxwvR>V&Z@A`S- z*yXvo09QJSoN4R}c{SrpwYWB8)gQdMD_cDjAJvp^NCkp8QyWp_FGicCtCuZgqi@>H zM8+8-qh=2D*FQ8#m<{q>|MvVpnd3%pm+VtH9pk#tO1e9L48X5d86vR(P5FYou%V`u z(e`yNe%`U2javD?Q^WvptHIazLgDVz=o@TiMgRG$yk2@2xzCvOXGu!H_v0+E)ip!djaz>j`5Eo zmdC+n4QbfDAngfXtK{Oz;O2e-@OvU|6$U*7H#=1;d&dt zl))a?(sa^+#KivoRaSUx+@%%&0dHBbRYpwi(Z8SR)l6DPAU}`#Twh99=RUMxHGxqD z;%j{L+R%FRUpfc)#Ph=1RWG@)-Pl7WIrgKIf9zn%G77CoMh%u4Ni#^!TiR{mdo� z>!{oPnAvFQulM{~I%ecKiTLt5TS%kQIQOP6z3@MYcYnr~S65T{Je$i>sx8zSc2VE+ zM72gj|IM^l=Vx-3`4^-YE5`#{&s27-bO%CLnAiMWCO9SJczi(V`9;ix9115KQG{_6 zEZWZ*drz?$hRT!w3V^SPeVI0dPf%Z#N%3KW+P@*P2tTtjkgoGUv{df4I|8uv(4DDI zWA3MJkWj3uQdTeuSO-r1P68qfp~XKfK;cXac7O|^i|i_C1Qci>{$C^Drr`0H_H*ke+0GY|B{u zP2FnZGgcM7!C=8x;Se7Q_`gE;pmy0rPOLIHa+y=w)(H6+$7FBbIjH=@ z2EYRzp3(9PpH4olVpoXdA{sVsiS}+n|v#Ze%1a zVdcRTRK|N#Vkrexrxj}>9F!nV`eZhA$dwanUxr#=O+rGVP`g}WzA*Z^45nG!k}11` zsE(TN#-81Ar~r44HuyKMifjMTJL}>UjZG6QsbDlfs(nc% zy%IbB3DPsYl?sRi^IhaK)f(^#lvuw4l(VM+x1ceXt8@;KtclQDx1$zbYJorI-@%R-+#o$fQClD6{Wue`cqAfN)G$pTXatB8dUf;yE21-CU~os>ZoIIf&kGza&?J03H3! z-WT=1k@i|P9De2%s5!yuPjP*y{ih%jsmkDq02=s8F;+<8Q>Ix{6ccdlL@ObaecOe# zjdq$gC)oS*AK)evMZugliPC?PQ-HVgIebD>)~$vLT6%HK?l$=)$p6uhB;aN11%4D` z8=}W5S$NB{2J?t#HOO_UTy@14)FB{~`UGy#T8YNQa&gDOzNR>;#g+&{l@mhkxagTE zH@6hOX145kzWe}AR_@;T;Y9bCB4yt^D!gc7G*dkTyzjEqeZhy&aYVP57V((2z%vuZ z3G*U-+pHAw68>@~C{@_Sva9j-NxuS&w{%x7SnTCD8`@+zdWIQ>NxVqXn!{=W+fUt0 z4B#Pk`n?^qI|MHhE_b7WW*z#J!Ti)1R>|qi@!SS@F6~rn>AXz^j|{QMu2_a+Lcp=g zi8Hb7a7}MlTZ*=&F|;SK)m~W57K&GW4#x_NV__U2@>*#_ZWq`dJ6MmBN8;xfJb>?c$aiIb+(2v~37+Qv2=LD;j+nceIr#IjZN42eU#0Ih}y%}@`2dOx$z6h90ZQ$X!ou8XXo-h70xkC-_GDPgFpL0*eG~>3y%2nCFbj%_R zC00&Ltfm7xC<-{n^4d^I^Keboa8n@_e@xVZ-+b=nEdp>W8wGzPU&Fj9e@*G3SUu63 zEQr2wJ1>4q7phngveUpE^vr(kYzo#BjW+n11e7RQ-e1aK=#$W`DyAAovg(tDr*Gz> z@9>N|E+Kz9@qlLDCdz#nQ*gEIB=l;_qlar_d_O-Krdu~LJUPhjewLgP#3PN#D15i} z%Y`Y5{lU&_^e7w4joT}=EhBAw^y}A{dPIj?#M0Dh@;8+}9%XRJ*RWt8(l~x@8rx=b z7JvNJ&Esgd>>-j?wJgKdr}iJ_Mh&qN-{p1{aqX|vmXALVu&;J~lth2^`s1Dyj`^@y ze6H}F)?*u9O@}ZU6x$^1wwE+lS!I9dP}8MmWVKw;le}Bk{lgomC3KEKAn8Wd+-gi)6muaJt_L>noQ}p zdy;}fs6H>H7oPWdWMWcRKB>EJ7?$JPfFS)okrac=fbP8!AKbiQc7r&R-J*K$#Vjvg ze&ZKeS0+D4)_-|O-SYwAX5V);O=It#ah#4a7Rs%K&Axe7aJlqJHUF=mgVUxByfc5H zsu_GL4X=LA`X_=*;`y^HoXgB9ZT!jYxhC{JP2c(xm}i- zo!wyAKb>zz=*-!_B)tE2BS&TAIG+T4GEO>WpL(KCQcU~RX@u9Pp`jD>ow@d}zPnaV zW{KftLlafi!*tn+h#Dx*iyn1I44Ei*%sojA+}jF9j}GFviFE9yRWYXv+QTs$qgrm) zv$Us}$7Z_=M(Gv=E9tuh`s=Wg>glT~o-ilzIX!sMo^<>TJ@?f_y9VvB=ceER+iN0< zmfUDIYOeE)Ih5#CH%hw;RFi-yB3JOv)|QxB=OSvIXh`U{ky85pNlq`h>pK1bOrpcO zbQfljB3G>5;vkHpFo)2Q>ON-tsSbx4Gi~#=(uiC~+7{LPm8!63@+sFq-`9y*)@5p1 zB0dnC`!t1cvX?8}6ZOfn{t^oNOiy^1< zT*d3;D%g1}UKEaC)JBuLVmGIVvEK*c{6_CBBj7Jhy0~CT8t4z&0c);_^eRjRxfO;2WzE_yo>K ze=ynI8|>+H=*Cdd*%Yu^ZpVKH64MCy(MtJvNviD{TOl=TAv;N*1YRg@soi&Ffm5Fr zv!rrb?!}4R!s^;`e^bYty23&}x5XYnj;*ON>$k1mU8bgaNM&+IZ*6I*c+8bm5|=H_ zoch0oqyD94x(!T)Qa(?v&{wR!SFd^-Vs=(xW8$=vV`$ZyEX-@<_;7{wk{T=w5);A_ zUVF<&s1;CYKTYZ3+I`o-57Yj(`jc<`gwwIt0gDrxX2I zoPjpNN}B*BHTGiO*W%-e6&%2Ftc90f6!VY;VpzPsQGTa0af=KtQm4&h1ZEIJpcSYo zg@QMz#%we|Sxl-kF4{t}paNWmp zzPdt}Fnr^I3=?e_W3ZT4B0V8fs9 zQ+yYQIDjGeRHO`jH0kmn9e+r_V;j}daCq6Y$TJVhM(EQt;PGq4s(2ED6j<{1N?L2mgYcJ~kPr&0oeaFn z44iOI{)+mZ0o7Ki3LGj&<0%E}!#qvf8VeV7b<6e7$wg5HON$~WZ`GCFP`Pg1f%%e0 z9`spj`>I6?n+)z((?E@FmlqxHz?Q{s?g^h)=E5j@0w0|$|a-ST(ZQoTz+j*oS1VY~8^5PV+1yrkWC6)^(msyS5imN=gq z>2D+$JqGh|-{O-JGlj}*vjO0W@I^Ifykn)uc-{qId$+bPO3h7!01m7Y$Wr*kmHT*L zGH#gcmFb`bEDkOZyp$88j)QR4YZJ9WyQs^&r~R%4872>DWcKP_VzL_@1<0=O;G--GJn0i9r(5-#ss8To`oiDm?o_`)Y0f&`rh@vm%Iev_VxNFcA*pU@ zLmL~DgB=y7U}6ZxT(xO z1%8fYT?PzI=-l1eByc{6+OGdtU{xsu36RGBwFD@(@LCzmd6VSA_=niQ5Uh}*N-8J= z)RhYuPLgX%(lAM^y4gE?(PM5hi2iTwgx}H19!M}(|Br(Lqs#xNK>^w2bhH-oG2TRy zW980yEFk{h#sWvLHBoqLD3XE;7`b=-Hb7(f@?vQ4!}D0;vDn7~*75SZn%(wIU}pj# z%|qmfz`y)Vbe{HOPWIlutd+(zZBaq8GwV6BT!<3*`UGZzeZv_Dq{lUls`F|bbPup9 z8T|k^dAzaZhxh>Np{6OPhoyP;4F+BF`X~`iqw@^@4&7>RIOq~~ zyL`syi{$aIaG;r(1x6y*T$?SjQ$5@NzwI{p&n4tXNETzgL=37YY_n7RWKJ}~D)BH!V$Kyw8pz2o$3*gGhrI`` zX>fGDLYFF3v0l=x0>Rcb^L9X2c#{U8OXq`POU9;C@+i{BYG`^z5Hk)6u}= z1g^^jER4ixOIJjV8zPAwpXgb$AnaE_VV5Cruz&(>NUOcvpM@bS>{0CV)mU895W4E3 z|2$Xe27I;*fH}cvNoE&)x3vlc=Fkwu@eokUlHH&*B)S{;zYQwG!p0Da$VAgm`B+pA znq9&#{rdT}L%dKdDsN4=7cB6=GS*jYamXL6({6N012Lw+D2~@>gIoMIcA{3YE9z2l z29x~H%D(LqH|$pqzS~aB$bEq4P}BTxa}j{EELD3vJ7f~46QAki#})hb_;LM|jcM`V zkfivc6HQ;9tJICf*GzpUS{FlS#6Tm=sC6{-?7#v)zf}0-^GNq?I~@ZeEgED`IN3nb z-yU+D|C5K@>r4XXK!3#gMfre-kw4N(>=|y8BvF}(}k4szDU%p&c%co+R!a|8Sjz% z4NU@7=P*ra=Z~Tv0H&$`;faN5oWr?L#uol?F$euVq16U2ZPFfdWvKk_j8?nz)xme< zcaA=33q!PrULVaA0A6!w|C-I>Eo({Oy~g~h^!J1qev|&wOK?o-)gFM>9Q`b$YVXTE z2`vR!h(hu?7Bam5WW2NQhEw_vH6xh8icflF`M@m&I0Z=d8Aa(k0V=~{LwEb5^D``3 z)5u%s>9W&yw7Weg%d^5Bn=x4Iy!}B2TKWFUT-%MuNJkAP&sY)lWN>vRhpxnxz zgaa=T10R}s-J2g;$XrUnEy2{9wf@7NPN>{3Qj-RGd}-h&V#Ol0gDot|6o6#gFN3#m z!EK$UHkQ~^Z?lhAo(stzqW}BXM%w6X(q^vwAMwgq zv9F?~a4JX5(du#jVd1N}{i(|INgD)aa(mNTuVfW@_1(ke`Bzs^h~ubsC4HNF9eJ4x zW+o?21!3;=TO--~yY-?7!y|j5Q4(L%DKQubKXn@7#%t``?&}c$nAH1p_T;1~)#;0r zCOzx{30&zuwXYkj;?7H>-Tn42gGx z^_IOTgOa$mOSwu(iCMc z;@qmWe>CqmPe?X*jU3$J*t#^p`Z_9ek6`}^0j zo=H<1;fgDB`;RKquY>DfM{F5e7yglL?NM>lsZy&8pFvz~OxT+JWby7QY`~u_tK4(f zQM~oZ_B#SJU2at-0QdgBByX+Hl$6zB%hQJ?2na73d$q6j>mG}OgjIUCPoj8V?~Up% zP*GPe*?EU^r4rH%{jMl`E8HJiHpZoQYTCf zYjN%y8O&>s*o$O7#txSl8b91J%cW>s1d8Sbh3<5z6JS zTHdJGOVQbtDKd`b-W0AC;(FX!1#?o#V%0XsPFlA%_xsD%bkqL8F+;?hMBRT|-^9OT zz7iw6(b~2FGil&K@&QIL&ctlyi_2`Rkt=eypyc|i zo1ZpKR$MT@R9gg&&8Iuc#mYQeV41|VhgD0!;uP%79vwdXUkZ}A!YJSG0FCle%I@a9 z$W6Ie(i5X`aXwomM{Vu6_WmvTOf#bIMMWuHy;wFQs0`S8rJX;y6X1ao1%CcZiVY+7+Ja(A`uLAkHX zlCBSL0Pv-ts=UU=)3fc%W)(0jtFQ-_Gc60J>K0OYgASA6C$RapR;2!cP2>A%G_UjC)VD_u-S>q+PP^ym!dn~=oJTXl0I zG+382F_S@cxjg^xqq5@FdkG6s<72hk4hg)x|Bo=5u@wMg)lUF0>c_Y<)~e>$;~J~= zS`euH!g=?{x+jAWT@3b}UA(yBwq<1TNl^=(g+Te|lFf9pobj z=w>NV$hZAX_a;YiW=CR~NsIb#!To~ZiGT*o3z;O82~v49(}*M}>fH9IBi^DJK*NE- zA86=97ne*Bo|WS3sLO`vvQA)gZ+eWDk>YIG&0mo@xQfjD+&BnnSY0J2MC5e7sR*RZ z;F>`>w02HxohA;1kl@qWw^*4b*gexyWw;0S8i@=!6ciP)S*ZF|AHHN`#1vwEz`3z? z<~e*&+hqJ?ch0brfV<)=mGhr|lODK=(e~Wdj%asqed#%_YA5gK(+__dEE>eZ;l>rjRE-)4^1AC&h8s zJcn@CAJm3(-y*`pb!zM~E0)Z^L!x4``Yq@h<@f4zK}TPotv=b*G4?w}6`bFSq)Eno z)J}n@K0$rrJ~d{^|8S}ytqvJR2wmQdm2TJ`(6Jg`7F>`qZo{DF$RfgHDkc8OjrMYY~1x}eIv`Z zg_Xrcp1PT=x^n-?bzj-gziN}1S?1A;nti_wG8Mt2H`8-sUiCGN%E?Cq!JEe5UM9j> zT78%IJtN%OJzbCUf*K9!7s{^_pgAwdS1_`S{RM^$`c=U{UUn$yCVZs zNH6H}9r!9OB%jQ0xDcKF1ijQSSv5;>p%-=7&1^I}Dc-p##?H7H&u6&4!Puyn8J*AIQ8if{ zs%tu(HroN7BPZfhWq_e)1`5UK1k?e^r-Z0kL0x>+zI#u52{`J&I8D^v@5WR!kg_69zCl(5p30U>ITK6It0SGo}^3F zl2>@{9w*&A&R+&(0n#u1d7s$OjY`H`+kh0W3!D&n3NXOddUw$OeW$jJTM{H-q<2I8 z5hyA(yTg^(@BiEb%=e%c)z<7)9DN>fnSB%)hKX_F6~V~1qRv!4E@sLDxP=FVqKdqB zY+&mH30K4LEAs6h%<4}QUAQXi?`SOnx}*V{Eb!Eje4l>fg_9X1pU#lPOd>=JStX0b zJFVocX=e?m^U zkG;^u;g-yh`V5xSVAy~MqOmm$k5(d<+*JZ!W8FvxijDi!{Lx`e^X<o2G$9SM!xn*@_|q)6rwB#xK_i>h>9P^mIEh~=vm*`M2Q2`BKdwe~vZPx{ z{wG93foK<=qc!?LO7T4DRy~h8fJ|9%**ye@5bF{Chds`XX&%eYjV;bZvQtRf-S+SK z+B7R?vE}Lh>`0yGbx*%h{ZZ8M9~J+RR5Wz+HmGh9jqE)sgKBI$I*pBnPwLC+P92tP z&rW`~pZUGHVU2J&9a1C2P$^c@o^-2DWTn)Ne%VqkJl=^(sXk>dm%cD2eIOHPe4ot) z18S%}(kKuerbItoqaNDmAN3!rT{^HYJe!=UJDuz(biSE~iPDeLg=+VIw~#p`&axwG zj}9(HVImwi41fc-k*M?7r$}G3Oxn z4SOE@r(^aoa2m-(r*1wg6vOAjPVLQ(%?F9F+b=xZOGxG&h{vUERvsNr?`lDyLa< z;2#MWq7VN=p;KD}pUBZRc;mBF@|cZbT9S8}pKRoPdhtX?i#XkPW{&n}S)9Wa9`!{v zPd|SaSb+-#9Iz)oPRG2aK;_}@zeW{Ev6AC|gdhiMYUh;4on@OjcLm)c(G9EPTt%9| zS?hVSsVK>OFJR>ymt}n$%3FBZHOv7e$9~_j?uS%ml?RV=o%-)O@j(t4p&4c7vs}s& z4yDJbff5-O-F)Jt(9#N9k}e)PiS5U@I2eUq2WB|_JKOTa81}PaRc0*lsWaS7Bo)L3 zE9pShTxk5T567e}`LoqwKfDO6Ow)`dQN;yJ;aN_y9gk2+oHM;V*A^@Y{VlKe8x+gV z++>vdj>cYZw*YO+u#$gUT|pfLpr>`$4g@-@0l*) zgBW9>gE{nvyNs|}WW>|Ffov>fk!VjFC)0>?y9zwP4?L(wLl z<7r}OLF90PWck^INY~n)jN)$QtKf<7LvF(k+biwtP+n=KoeeGIz)p%-1i|p+hATl&873~sDoX`JRp~b^5HyQy zkl%V}pa}o3H%aJL(2&|!;@^EA^iE)QZCA9ZKPt2zzEAh{SZudJ1r&@2PAqDa?h)Qi z!~y*If)_)j^Fz5^LnB2llr>BlJE8=T)$kS^7)sW?&BMb~RZg)F^k~@PK-$l>_6{Ex z5*UkS8Q>{eS@V;vJiLdp`b3z$C=KlQQT~RCcA{1DT_4S?V_m4@9kh%1y&irmupb@A5LL{QyH-f|I-g>KhtwQbn|OT z3;BKa>xcjhFR%mu*YKM7CC*0iPKpqOTK{E?R3MC-*jO(C=9kcLDivgPbeLr>4k|=O zt6fKyq=UWcCWK562NeF`*CS1ALD`sOEISMA2mSx<)V{>fNOyPf3&5{62=LsRUEzK7 z=X1ZL7%%}*W%2*h!`q(v@4eZil7HI)`GImhohmM{Y(;~awzLzPJq(ISI(aacEDIu+ zCFHLMu^SwjiTFak9as9`5Utkj<`6hIcRck{W4n%|!|VkDn(&mJ_5Hz!&@>QSfs;QP z*ClB=|C!fK7fixdJ0#>1r-1jImuO;Psg0w=#N#60A>6sJWS#GM0NA_XS@b zZ|_R|1&H=8$7?N$Uwr7sD?~N*vhcjvk<;uZVXzmC5GCYV}3WcbHFMz z7FT$mO@Mf7hQD%acAVjCMk*kgjUfTJGLRqA99VhI^GVha>Y>UPOo^br6Qs4ZaV*49 z7W)vu$D)Vxx7Q?|@SO*UH^zL;J9@QoOgO)Oh*JfG-MEJ=_joUWXwFK~d*$>fLT4u| z2t4gZWb%lc%-)aS3+MxZn<=!+Z1V~RVXapZ_@O=D+2a0iTDQ(Zmg*z44JJlf$db$K)Y!>hq`(d7tQPbZJV14nqE?sx(5oZUL)oyqBtkoLXY*!2$0c?|X(R$H}4Gm8P5Q{jixd|8`CONF}OZY-a zC%8#Pd;yrd%p7PStL0}4rVh6mC(=5WN_e~J^;bPYv`*4`#s{2>tS*jMm=6SR^^LgZ zsqO`D`C(crRTy~wZjb<4HjKtigq|G#1I9Fn&N4&p0j)e`VB;PU$TgNWNJ{y?n%CB_94Li856vZ|j5Ng`WtFjr|+paaS`gy?($H zPmYWW{*=(0t1a4}w$4S4pCRQ3<+&b4@r(L>oi#%`qk8!KxNvvQqH!>ikyo@YI<;Z? zUp>7DX@8m;?P0wHHjIVXd6+di($0?OeD_8LUo>X1A~h)XhwDy;SyDY1Gv3ftBs^xE zcnmFHLoq*vao_Pb0Fyk%?{3mF(Wk2;fM8pXVo-J*3Y|*4E&>~Hc})d1zNDv%!Ym9y zX#VX7@h{*AH3k|;pSN_8*2&^Rxk&KR4$BRMA!;@r6tKgx{G0*GMfhi7o*nu4qm(pM zrY)af{f9=2v)`KY{imbo+iSLd>>lp7!U{O1;TY#hmeG0jpgu4Vt`S75q)Q=7AQL(R z7J+oBBhWCor%2UnI1NAB2~d7`1?3$BD5Z-co?8?RKL2rG|4+vfKtAeK07cJ z9Q}7o!3ZJ07C%~x-RDdVFL;LzMB>8M%LRSUc)RD$l?)YTGrj_q|G7LLV%>#nb$=p= zOB~=spqK;5NSwOvQ_=bS(ogd>tq!&M1|Y)7?95!&^bB|yjKgnnkO~(QhovhWSJw&Q zAhy+{jApr@)0wT|G~Y23u!G%s!}Kze{6PRSu z|Md(ENy{|l9U3exWFoqV6anh9K@I^V|1w1&WO?$i>c5<`|Hik8+{4>C`l|~c?+60- zOm?}q5=uBCUd=ImZKeTaM> zmS!s?@UZtZW}fUWG7R>Q7USU*Wolevy}(9>dxx`sTI};K7Ykaj)WHAJv>T{1^q(dY zFx(`fOZYz6g@Ywaaur3REfD;u*Hq9UF;S!F-Da9PUfFndf-`|cIA`5TL+Aa z1@_gN>>szeVu6u^&~}Wp2$g30Rt>Jwuf`%6oqM)>^wm0HI-<7Cb7HI#Kexd$g51?K z))nU;eJckXitkN&w2RNB`5)L{#I#ck#S{rj+4lNXT{n8q!S~OL*hAiPPB;vo`5|us zs&sxx0a?ZE5ElMUN&q}Vd$1QFsgvJqOFYnu)HXA*z=<4R%Vm4q&ukPist8iM+cysH zD_8Xb0{q3o7cs0V0KIndhGL5it>=-~e6{YNc#FeT`quUy?p(`Fw@(eiElO;#B-H|i zGHOq2)pGtBPeo!4d35g3!bYWG=8?HX59X@Q~)30huDiY=M_xH*P`jV)bZ1`q_e&7-laN;{guL) zx|5gyM*vMxQbcy48YR#CC65NrkZ5BB=KiM3HU*+YBb^QHe{h_ue6p4+dD>RCCb_p+ z*RlcZGR@Tk=8O(b3ylxD&T2Nd#?S6wLxI^(l(Z;Tf*?~Rog2vqTMrRU_4PaAI^F+- zdr|pxmkXQ%b@venxjw)P#Ws@cqx}75BMMI!4_1KSsop~v%o&O7oqX_EsmVCz=YnUZ zN8NS)F(6dVMPp;$yT(nDl6#CUm_!k#*1G}kSV6z){JPfFdB`}Ni)iZ($w0ewV76CU zJYrQj&63DnRYzz~q+#L|kC7%D>K6hPu~BY<_5{LAOdk*G>t-{hK$71NKtpA|l#=OE z4&d?LcXrt(Lu_j>FreuN_6u`sH`YG|0cON7h^E1;+D(Q1iBbRqWK_OkyfQkLB?ll%X|s7nne@NEzEOc*Ee-2B$vi#-gLvD>Z{fzZqW;-9jO@KgVNk!T}z##!9f4$F&r^(4X6=Bp(dwIjGqDvm6vKh2=DfeM?6!@Er z;W0}<3OFkDOgx{B;l{!0k>D=}lWE|Q#09qBMZB2o?gl(#?Ig!+_Q8X%fVE*Ywb)f% zs#e8(!FNgU>{PSqF3!X=58bJ`^-ZC983*fjwKoVtX?_T@xu z*twD+ctEb$7k0^cNPTtT3UJ0r;*?gCOA;vY6M@>%+V#@ptgqlAFprHFrke}%Y~Iz{ z-#2p8%H-6>LoM#CkliDLbxoH1$dGfQqm8-Z8~h^WjmBk_!1)l^zq=zv4_)>9ntAWE zA0JaHN#nG^X>(|wDgEGCnbT_)#-T#l{BJiM@(Vwxls)hhiC@>O;yPQM*Hyq~T@x|f zV2yeQoNCKHK7tq-&`DdR2xOVlLpT5yaTSU;9)K*<()on+Y$<_R#!5af=_bQMSBMMrlD`(jzih>V4Ku@HGn4$z$CObMQ_#r$7VvIEfImzm#5-64NKk6}j|-pp^PjxBaCmI`iLmdvqO zWSuvdT&D2$+CKw<>Yytxbd<+noBq7{Zej7stH;4h7Vm??F8=FpLvuj^fJrri1#jzC9caV8F1 z9r+C>UTr8|p$RryBQK$VdyDPucfj;p#>M7}a{Iet3bp5a>;0lA5R-=kY5U(?(j36R^GPL%!M8M8*cGxCIxN0+SLG3tK=kLKmqBLrF zc{f7}%iSRm@=ffN1%V*#oD7H{pMr%?!VVLKi+!uBZ!X^pm;$dnnbRR(Mg(}}h1$bX z-=VR@Qr`hI69M>>+f%s#P+1&=lq?h5)_3W_dg+{eRs23|6ySGsZC zW}*89F!i385)$-!4X*-N(T*uj>`mq&P`r41qr>1VA}#eGLfa)>N`PX2dx!pLd*_TVbDO@-#|S!J-V%kJEKeu{ zGo@~kpRYDaXX}f~e#I+Cb$$ol9RM)UqS!CDGCtj#GdlfNXzVvtK5YI#@<@!->6XEj z6e-}BbP2)?`)@RA`=5G0|0a2wrtP;aISf{nBw%{$I}lKCuhjXZg59?~wAjPux37WA_Isc|O~LlW2-_2aXJOddD^D^pgbk6g&+&!0 z976rD_W&Q?q{S2j52@h354!5e#bSI6KI7~A)Ns__l zeg<`skpIeqpNpx_PWBU?6HDl%#Zj9QAWQNvo^$>a4TU2I%Y?%gS}*a1iv0+`t1?g_ zaiR5H|C26DU@6`>C!J3&w06AZSTRGa8}^ei&p?P<1nA_o(wihqTu!#?jxRyB2+~k% z!+@=JQU7moYaSrL08&NGx4OHA1L0utPhSWT`k^d-`aC-rt^f2OIVT!Em-^IiA*9^# z^t<~QBE@s=J{}f5<3kkLCHgGmNn=EA5bfR7|!`rKl>ix)nm4P9C z(W*UXP#qJBcI2(Sh=p(6OvQmFAB;-zKojSyVCXK3a(bb`-GQL;$yQ;0Z}3%3#jQht zp@H1gD&o4%NFd08j+8>KrGtA8f@>icr8caLlB*yD-0EHMVhoyreM|t6wbypP!()x4 z0&mUL_GI;9z<+NKS$ap09SM-r5D^qrk(oW>#q?8{46+)x?!qDe z6$00T4?KF!rAlO;ljgoYbMIPa@QYV{*U%>y^Pg3I(ja0-{3z>=O3WiLX!Ih4IT;yH zoq(Z6jbnmk6vM*YBO|(T;Ba5e5n`;e_)-`n^fy3VJ zbiaPNS`j|5ZZf(deit;;l2CrW{|&m59Rz1h4}sIbbS#2H-nYlp7`f){yB#UpB#1zw z>fek4lB~!Ix?7z(=rJ}j|D&|)3~MUeqL)A@p^X|ul%}WzP!JpuK_Q?_taK?O7=pdi zQEDP!Ly)lyIMjdxC|wk!NCHT41VKT16VL>b(0fVVzRvsJydUr1%deE1+>?9HIcx8= z*Ey@qNyIrqF%_GqwJB1RnK-gbIM3m7cfVmxP2VxZQNAryi9p3HBJB$5yE^waH+h`c z?CzdV5#jPi;>?**LwT~sN=h$3Y+f%^i*ao{s*BXbBqM;M2eH#ad7)tx+osX`uTB|% zriNacyHMbAD3V2$vt0pW*rB}U*c#neJyB{#oD4en*Y+G^_%Cz(bgp>S3uzf1K>7{O zxJiI#@qT}CkXz#3r4j9gxL{m1*=|4d{AKM)-e7)ITflI1Kv1+t3dhghMcSsRSG^^e zIUap$N3{TLwyIcYWQyyIf#XM z27R|ac^S#wHf&SU)Zb*6aZjWBcgz7M#YN;BB#Q$!sv$^gC}T9CKKOZ8p=;&)uQ1{~ zidT=A3*p5xiK9rCM97zx>?dch=<^g4x5NXXj~HY{B=3bw;fx!SVQ!y4ZGlSV4YVE1 z-u^f%#IZMJU3%Va%F0u|ISFs&1G_=Dq2pCCQ4KLxx<4)=;!Ypme@@8#psc9}1Q+cv z^mjbBg4v-xP(lu_N*FA2lE;~NgMG{NPk?jyJE)@N5xQdF|N0R~c$9SqwjcR1LmTkJ z)*K<1@rL^ti5q+u3wB1{EZ|i;Jt?o_^HXVxv0JdJEhcQ@d0L7^GAN@ zQR}6{jqZ5a5ORHnC0LT5aJJ<8c0FP|5dbKE$4ApkdFhO$h%km=Cfxy2*nm0ndZBbq z@OdTdy=-A@939xxgDJ2%aE8_L_&$d%9^jo@^|55yohI`zCcMMPpl~W}zT; zju8CYk!^2+2D+?Vp8%=;r{Epv28Uw+^zNKXudal%EB>u%1-2+c3 zL%*ZJjw|VQr^{>yiyZQ5p;*2UC>$db5hCrCoH(X`1H+RASV{FWK=)ZO zVE2a#zTLLmpmF|`Of25JQ|8XBb#fg}#0k5C6=t$n%!SWb*l56HawT2BS>S!&v<-1S z%_8-oJ|!5gaBu$k_e!h;F)GbL8Nbe_se&U|(QvXL9FcUwCKN8wgYbkxv~>Z|Ycc(M zlW!ZZ17!ri*Y;(0R1mXzHgH-Q7VrV=-=$IUCk5? z&3v!JvED$;h)r}pejf>`qWWCuc7hcu&%WAZMnY4?2;Mk*AIRWAfIaY9H&^4v!K65S8!x`UxB~poppX?vm}+-4I+Rbjubd6Z2u%7Mn^cNkIOpU04No z16=44gF9_$N72;Yuvq1?Da}nQS_jcd+)>Xz>c|z2^t2>pKVBX>k>|s<<7Q+QfA%I> zI%Rih@=o3A{F733@!WcbU*Z5k;hFxQ_sLprX;YS0_xHHf(9d$(Dh7`t&X;eK#;l3z zSWgkxN7&;T>)PEtaz-K}JuRXa@LBOz4@%MwWSD&jH*L<4dF_+Bb%7Tq;bV}!T-_mA zHGMe2BC}pJftbyCOjej1+n(0>q`2hT?>S$1_=4v#;xdj(%o2h#y zE(X0|T@>lBv1P9ke$_MbPgfZdW9GQ#;PMs&^W*4$-JMNp(epukv*@#%gLwEYYYS8M z``K|{)K{FHd+4T;>2}yOAZJ!@frKk*w+QWpdG|mn6jiu2(K^4cN7_5RPc`)V>j=v{ z9LsKXtNbg=%4w*ZP|V5b4%BuRjR}5${cTOGxuYl9F^Tf2`&`RS49xaEIB3I1@pfVI zm!Utw>iVnmuC5bt4c^)xC*yYcJp1MNCVS1SR)*8C8n28?rfQ>woQ9&F0>Pc4`$oQH9*R|HiJPg&4B*YA)do*zHiWeE zxl>)G>f|4U=?oM(IbhjXQd+8hEb-+P)(_QE5G82d820;+^Gzo5@i70U2Mb0%FSUjQ zeOb(v4&-Qo_U{vY_<%%wK%#OSb&A!%UWGe-6lySQO}2ckT*K&@4J9)s zRT|$;jy0Bp8<7)~^T+0r0>0fAO#NjUG8aR3M9S}o8=T_fysT9nP!3hn=$+-=qKB%E z1GiKJXEy-D=|D?bJ_`W0^r0F4zz0^5$r6ymI1RRQ@lHd~x7sA@QbSe-qF5()oc+M( zSaxl>q)s3In@QDs!9+_*mXH1oc7TnekX?A%J4Rq`*s3B_EvdCqCQHjvLtW+scvJbmA?zQ^ge$N)nmSSEAD^?eKMUG=^$R3q)t;J=5pCC9ryEElJvf zeb5mQ;nrTcv=kz`(E;VH{x>hE2z{iJQjZX#H==OiZbqs^g~db9cgSHqL3$;_d99HI zaN#bqZ@r5dO$s`YSLhnU)u4Tk-)I^B9BtP!Qr5>;vkw}i0EXvd)cq3PmnM6$TiB~{ zr_F^dkw(xUc1xg&b=Z=rXJhRuxU)LgEbDzGr?_8|9#wpt6FOJzLQ%&J9YK%Q+8$fl zU*jZM{_~|z51k^e76)&HioGRmi(Vy&EUl4lhmrr)nQndK1gGzYzP(U4)GeC}qk@=CH7S9DLM& zgHVx%SH))EwXKxG6Tss74H2wiQJ-q_34b8W!IWHd97^4-)nZs&7L)xcwd2;ayJYm> zFw9!2fG1|MxI8nJ^Hi4wb=AP{Q;Gzuz1XzI-zEg}jFoZF8@7u%i}#V#tyO)nLy^Lz zzOpN4SflcVZ~3L!xhr!KMtiK1V{vu!F{Q*&Y_S@5x5xOVsFonSDA4wD(eiw5?^I1x z+uZVvr5sk#%8aazU`941I8qpkOJ{xvc`~8nN9&gKB-xR}s^juJIg9LdxM#W4XUTK1 zPHbLCkDAWc061Ta9~bl{DZs3QlUp~|Q>RX;B9qWOcg&HBF}ETr&sGm|J9lo*Vda+I z27&HvVeGv$>is|kfv5bs>El-eY?{o+Y16B)pY4cyqjWiEjOWJ`R;#?DVXAGI>z7{_ z_l@wxsm~e B1(5&% literal 0 HcmV?d00001 diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto new file mode 100644 index 00000000000000..f02ac348648452 --- /dev/null +++ b/gensrc/proto/segment_v2.proto @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// Define file format struct, like data header, index header. + +package doris.v2; + +message ColumnSchemaPB { + optional uint32 column_id = 1; + optional string type = 2; + optional string aggregation = 3; + optional uint32 length = 4; + optional bool is_key = 5; + optional string default_value = 6; + optional uint32 precision = 9 [default = 27]; + optional uint32 frac = 10 [default = 9]; + optional bool is_nullable = 11 [default=false]; + optional bool is_bf_column = 15 [default=false]; // is bloom filter indexed column + optional bool is_bitmap_column = 16 [default=false]; +} + +// page position info +message PagePointerPB { + required uint64 offset = 1; // offset in segment file + required uint32 length = 2; // size of page in byte +} + +message MetadataPairPB { + optional string key = 1; + optional bytes value = 2; +} + +enum EncodingTypePB { + PLAIN_ENCODING = 1; + PREFIX_ENCODING = 2; + RLE = 4; + DICT_ENCODING = 5; + BIT_SHUFFLE = 6; + UNKNOWN_ENCODING = 1000; +} + +enum CompressionTypePB { + DEFAULT_COMPRESSION = 0; + NO_COMPRESSION = 1; + SNAPPY = 2; + LZ4 = 3; + ZLIB = 4; + ZSTB = 5; + LZO = 6; + UNKNOWN_COMPRESSION = 1000; +} + +message ZoneMapPB { + optional bytes min = 1; + optional bytes max = 2; + optional bool null_flag = 3; +} + +message ColumnMetaPB { + optional EncodingTypePB encoding = 1; + + optional PagePointerPB dict_page = 2;// dictionary page for DICT_ENCODING + repeated PagePointerPB bloom_filter_pages = 3; // bloom filter pages for bloom filter column + optional PagePointerPB ordinal_index_page = 4; // ordinal index page + optional PagePointerPB page_zonemap_page = 5; // page zonemap info of column + + optional PagePointerPB bitmap_index_page = 6; // bitmap index page + + optional uint64 data_footprint = 7; // data footprint of column after encoding and compress + optional uint64 index_footprint = 8; // index footprint of column after encoding and compress + optional uint64 raw_data_footprint = 9; // raw column data footprint + + optional CompressionTypePB compress_type = 10; // compress type for column + + optional ZoneMapPB column_zonemap = 11; // column zonemap info + repeated MetadataPairPB column_meta_datas = 12; +} + +message FileFooterPB { + optional uint32 version = 1 [default = 1]; // file version + repeated ColumnSchemaPB schema = 2; // tablet schema + optional uint64 num_values = 3; // number of values + optional uint64 index_footprint = 4; // total idnex footprint of all columns + optional uint64 data_footprint = 5; // total data footprint of all columns + optional uint64 raw_data_footprint = 6; // raw data footprint + + optional CompressionTypePB compress_type = 7 [default = LZO]; // default compression type for file columns + repeated MetadataPairPB file_meta_datas = 8; // meta data of file + optional PagePointerPB key_index_page = 9; // short key index page +} From 0a702ea92e20e2c6f8d3b2eb3fd3e96f5efaae59 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 4 Jun 2019 15:32:20 +0800 Subject: [PATCH 2/5] add api comments --- be/src/olap/rowset/segment_v2/ordinal_index.h | 13 ++++++++++++- be/src/olap/rowset/segment_v2/short_index.h | 15 +++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/ordinal_index.h b/be/src/olap/rowset/segment_v2/ordinal_index.h index be6d229630eea5..98a71744336c62 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_index.h @@ -29,12 +29,16 @@ namespace segment_v2 { class OrdinalIndexReader { public: + // parse the data bool init(const Slice& data); + // return the entry number of the index size_t count(); + // compare the row_id in idx_in_block to the row_id int compare_key(int idx_in_block, const rowid_t row_id); + // get the OrdinalIndex from the reader std::unique_ptr get_short_key_index(); }; @@ -42,8 +46,10 @@ class OrdinalIndexWriter { public: bool init(); - bool add_entry(doris::Slice* key, rowid_t rowid); + // add a rowid -> page_pointer entry to the index + bool add_entry(rowid_t rowid, const PagePointerPB& page_pointer); + // return the index data dorsi::Slice finish(); }; @@ -51,10 +57,15 @@ class OrdinalIndex { public: OrdinalIndex(OrdinalIndexReader* reader); + // seek the the first entry when the rowid is equal to or greater than row_id + // if equal, matched will be set to true, else false bool seek_at_or_after(const rowid_t row_id, bool* matched); + // seek the the first entry when the rowid is equal to or less than row_id + // if equal, matched will be set to true, else false bool seek_at_or_before(const rowid_t row_id, bool* matched); + // return the current seeked index related page pointer void get_current_page_pointer(PagePointerPB* page_pointer); private: diff --git a/be/src/olap/rowset/segment_v2/short_index.h b/be/src/olap/rowset/segment_v2/short_index.h index 8d2b6669591ff7..cd963c3861810e 100644 --- a/be/src/olap/rowset/segment_v2/short_index.h +++ b/be/src/olap/rowset/segment_v2/short_index.h @@ -28,12 +28,16 @@ namespace segment_v2 { class ShortKeyIndexReader { public: + // parse index data bool init(const Slice& data); + // return the entry number of the index size_t count(); + // compare the short key in idx_in_block to the key int compare_key(int idx_in_block, const Slice& key); + // get the ShortKeyIndex from the reader std::unique_ptr get_short_key_index(); }; @@ -41,8 +45,10 @@ class ShortKeyIndexWriter { public: bool init(); + // add a short key -> rowid entry to the index bool add_entry(doris::Slice* key, rowid_t rowid); + // return the index data dorsi::Slice finish(); }; @@ -50,14 +56,23 @@ class ShortKeyIndex { public: ShortKeyIndex(ShortKeyIndexReader* reader); + // seek the the first entry when the short key is equal to or greater than key + // if equal, matched will be set to true, else false bool seek_at_or_after(const doris::Slice& key, bool* matched); + // seek the the first entry when the short key is equal to or less than key + // if equal, matched will be set to true, else false bool seek_at_or_before(const doris::Slice& key, bool* matched); + // return the current row id of current index entry rowid_t get_current_rowid(); + // Seek the index to previous one + // If the current index is 0, return false bool prev(); + // Seek the index to next one + // If the current index is tee last one, return false bool next(); private: From daff2f5e93c39605f92234e86e2e5eb08fa21afd Mon Sep 17 00:00:00 2001 From: huangkangping Date: Tue, 4 Jun 2019 15:50:23 +0800 Subject: [PATCH 3/5] Modify doris_storage_optimization markdown --- .../cn/extending-doris/doris_storage_optimization.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/documentation/cn/extending-doris/doris_storage_optimization.md b/docs/documentation/cn/extending-doris/doris_storage_optimization.md index 90c36c74fae5a1..54fae85fc0ff29 100644 --- a/docs/documentation/cn/extending-doris/doris_storage_optimization.md +++ b/docs/documentation/cn/extending-doris/doris_storage_optimization.md @@ -15,6 +15,8 @@ - 4个字节的FileFooterPB消息长度,用于读取FileFooterPB - 8个字节的MAGIC CODE,之所以在末位存储,是方便不同的场景进行文件类型的识别 +文件中的数据按照page的方式进行组织,page是编码和压缩的基本单位。现在的page类型包括以下几种: + ### DataPage ### DataPage分为两种:nullable和non-nullable的data page。 From 9c812d673753967f2eb97b98f1cce3d2ec4bb072 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 5 Jun 2019 15:40:28 +0800 Subject: [PATCH 4/5] Optimize api design --- be/src/olap/rowset/segment_v2/column_reader.h | 14 ++++++++------ be/src/olap/rowset/segment_v2/column_writer.h | 19 ++++++++++--------- be/src/olap/rowset/segment_v2/ordinal_index.h | 13 +++++++------ be/src/olap/rowset/segment_v2/page_builder.h | 7 ++++--- be/src/olap/rowset/segment_v2/page_decoder.h | 9 +++++---- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 412c2a93bd48ac..fa49654884571e 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H #include "runtime/vectorized_row_batch.h" +#include "common/status.h" namespace doris { @@ -28,16 +29,16 @@ class ColumnReader { public: ColumnReader() { } - bool init(); + doris::Status init(); // Seek to the first entry in the column. - bool seek_to_first(); + doris::Status seek_to_first(); // Seek to the given ordinal entry in the column. // Entry 0 is the first entry written to the column. // If provided seek point is past the end of the file, // then returns false. - bool seek_to_ordinal(rowid_t ord_idx) override; + doris::Status seek_to_ordinal(rowid_t ord_idx) override; // Fetch the next vector of values from the page into 'dst'. // The output vector must have space for up to n cells. @@ -47,16 +48,17 @@ class ColumnReader { // In the case that the values are themselves references // to other memory (eg Slices), the referred-to memory is // allocated in the dst column vector's arena. - virtual size_t next_batch(const size_t n, doris::ColumnVector *dst) = 0; + virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, MemPool* mem_pool) = 0; + // Get current oridinal size_t get_current_oridinal(); // Call this function every time before next_batch. // This function will preload pages from disk into memory if necessary. - bool prepare_batch(size_t n); + doris::Status prepare_batch(size_t n); // release next_batch related resource - bool finish_batch(); + doris::Status finish_batch(); }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index 95883e83a56345..d85ac5f669d507 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -22,6 +22,7 @@ #include "gen_cpp/doris.pb.h" #include "util/slice.h" +#include "common/status.h" namespace doris { @@ -34,32 +35,32 @@ class ColumnWriter { : _builder_options(builder_options), _column_schema(column_schema) { } - bool init(); + doris::Status init(); // close the writer - bool finish(); + doris::Status finish(); // Caller will loop all the ColumnWriter and call the following get page api // to get page data and get the page pointer - bool get_data_pages(std::vector* data_buffers); + doris::Status get_data_pages(std::vector* data_buffers); // Get the dictionary page for under dictionary encoding mode column. - virtual bool get_dictionary_page(doris::Slice* dictionary_page); + doris::Status get_dictionary_page(doris::Slice* dictionary_page); // Get the bloom filter pages for under bloom filter indexed column. - virtual bool get_bloom_filter_pages(std::vector* bf_pages); + doris::Status get_bloom_filter_pages(std::vector* bf_pages); // Get the bitmap page for under bitmap indexed column. - virtual bool get_bitmap_page(doris::Slice* bitmap_page); + doris::Status get_bitmap_page(doris::Slice* bitmap_page); // Get the statistic page for under statistic column. - virtual bool get_statistic_page(doris::Slice* statistic_page); + doris::Status get_statistic_page(doris::Slice* statistic_page); - bool write_batch(doris::RowBlock* block); + doris::Status write_batch(doris::RowBlock* block); size_t written_size() const; - int written_value_count() const; + size_t written_value_count() const; private: BuilderOptions _builder_options; diff --git a/be/src/olap/rowset/segment_v2/ordinal_index.h b/be/src/olap/rowset/segment_v2/ordinal_index.h index 98a71744336c62..cbbd235e35166e 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_index.h @@ -19,6 +19,7 @@ #include "util/slice.h" #include "gen_cpp/segment_v2.pb.h" +#include "common/status.h" #ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H #define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H @@ -30,7 +31,7 @@ namespace segment_v2 { class OrdinalIndexReader { public: // parse the data - bool init(const Slice& data); + doris::Status init(const Slice& data); // return the entry number of the index size_t count(); @@ -44,13 +45,13 @@ class OrdinalIndexReader { class OrdinalIndexWriter { public: - bool init(); + doris::Status init(); // add a rowid -> page_pointer entry to the index - bool add_entry(rowid_t rowid, const PagePointerPB& page_pointer); + doris::Status add_entry(rowid_t rowid, const PagePointerPB& page_pointer); // return the index data - dorsi::Slice finish(); + doris::Slice finish(); }; class OrdinalIndex { @@ -59,11 +60,11 @@ class OrdinalIndex { // seek the the first entry when the rowid is equal to or greater than row_id // if equal, matched will be set to true, else false - bool seek_at_or_after(const rowid_t row_id, bool* matched); + doris::Status seek_at_or_after(const rowid_t row_id, bool* matched); // seek the the first entry when the rowid is equal to or less than row_id // if equal, matched will be set to true, else false - bool seek_at_or_before(const rowid_t row_id, bool* matched); + doris::Status seek_at_or_before(const rowid_t row_id, bool* matched); // return the current seeked index related page pointer void get_current_page_pointer(PagePointerPB* page_pointer); diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index aad5d421fc1f2d..38b60413c31be2 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -22,6 +22,7 @@ #include #include "util/slice.h" +#include "common/status.h" namespace doris { @@ -47,13 +48,13 @@ class PageBuilder { // than requested if the page is full. // // vals size should be decided according to the page build type - virtual int add(const uint8_t* vals, size_t count) = 0; + virtual doris::Status add(const uint8_t* vals, size_t count) = 0; // Get the dictionary page for under dictionary encoding mode column. - virtual bool get_dictionary_page(doris::Slice* dictionary_page); + virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page); // Get the bitmap page for under bitmap indexed column. - virtual bool get_bitmap_page(doris::Slice* bitmap_page); + virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page); // Return a Slice which represents the encoded data of current page. // diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 70ef087492e219..30dba19467286b 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -19,6 +19,7 @@ #define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_DECODER_H #include "runtime/vectorized_row_batch.h" +#include "common/status.h" namespace doris { @@ -31,7 +32,7 @@ class PageDecoder { // Call this to do some preparation for decoder. // eg: parse data page header - virtual bool init() = 0; + virtual doris::Status init() = 0; // Seek the decoder to the given positional index of the page. // For example, seek_to_position_in_page(0) seeks to the first @@ -39,7 +40,7 @@ class PageDecoder { // // It is an error to call this with a value larger than Count(). // Doing so has undefined results. - virtual void seek_to_position_in_page(size_t pos) = 0; + virtual doris::Status seek_to_position_in_page(size_t pos) = 0; // Seek the decoder forward by a given number of rows, or to the end // of the page. This is primarily used to skip over data. @@ -59,8 +60,8 @@ class PageDecoder { // // In the case that the values are themselves references // to other memory (eg Slices), the referred-to memory is - // allocated in the dst column vector's arena. - virtual size_t next_batch(const size_t n, doris::ColumnVector *dst) = 0; + // allocated in the mem_pool. + virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, MemPool* mem_pool) = 0; // Return the number of elements in this page. virtual size_t count() const = 0; From 690f2274a555886bb1661bf432354cb5a2ddd533 Mon Sep 17 00:00:00 2001 From: huangkangping Date: Wed, 5 Jun 2019 15:58:56 +0800 Subject: [PATCH 5/5] add type's namespace --- be/src/olap/rowset/segment_v2/column_reader.h | 2 +- be/src/olap/rowset/segment_v2/page_decoder.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index fa49654884571e..0d8fd722541ec0 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -48,7 +48,7 @@ class ColumnReader { // In the case that the values are themselves references // to other memory (eg Slices), the referred-to memory is // allocated in the dst column vector's arena. - virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, MemPool* mem_pool) = 0; + virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, doris::MemPool* mem_pool) = 0; // Get current oridinal size_t get_current_oridinal(); diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h index 30dba19467286b..216a01566c78c0 100644 --- a/be/src/olap/rowset/segment_v2/page_decoder.h +++ b/be/src/olap/rowset/segment_v2/page_decoder.h @@ -61,7 +61,7 @@ class PageDecoder { // In the case that the values are themselves references // to other memory (eg Slices), the referred-to memory is // allocated in the mem_pool. - virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, MemPool* mem_pool) = 0; + virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, doris::MemPool* mem_pool) = 0; // Return the number of elements in this page. virtual size_t count() const = 0;