Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H

#include "runtime/vectorized_row_batch.h"
#include "common/status.h"

namespace doris {

namespace segment_v2 {

class ColumnReader {
public:
ColumnReader() { }

doris::Status init();

// Seek to the first entry in the column.
doris::Status seek_to_first();

// Seek to the given ordinal entry in the column.
// Entry 0 is the first entry written to the column.
// If provided seek point is past the end of the file,
// then returns false.
doris::Status seek_to_ordinal(rowid_t ord_idx) override;

// Fetch the next vector of values from the page into 'dst'.
// The output vector must have space for up to n cells.
//
// return the size of entries.
//
// In the case that the values are themselves references
// to other memory (eg Slices), the referred-to memory is
// allocated in the dst column vector's arena.
virtual doris::Status next_batch(size_t* n, doris::ColumnVector* dst, doris::MemPool* mem_pool) = 0;

// Get current oridinal
size_t get_current_oridinal();

// Call this function every time before next_batch.
// This function will preload pages from disk into memory if necessary.
doris::Status prepare_batch(size_t n);

// release next_batch related resource
doris::Status finish_batch();
};

} // namespace segment_v2

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_READER_H
75 changes: 75 additions & 0 deletions be/src/olap/rowset/segment_v2/column_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H

#include <vector>

#include "gen_cpp/doris.pb.h"
#include "util/slice.h"
#include "common/status.h"

namespace doris {

namespace segment_v2 {

// ColumnWriter is used to write data of a column
class ColumnWriter {
public:
explicit ColumnWriter(BuilderOptions builder_options, ColumnSchemaPB* column_schema)
: _builder_options(builder_options),
_column_schema(column_schema) { }

doris::Status init();

// close the writer
doris::Status finish();

// Caller will loop all the ColumnWriter and call the following get page api
// to get page data and get the page pointer
doris::Status get_data_pages(std::vector<doris::Slice*>* data_buffers);

// Get the dictionary page for under dictionary encoding mode column.
doris::Status get_dictionary_page(doris::Slice* dictionary_page);

// Get the bloom filter pages for under bloom filter indexed column.
doris::Status get_bloom_filter_pages(std::vector<doris::Slice*>* bf_pages);

// Get the bitmap page for under bitmap indexed column.
doris::Status get_bitmap_page(doris::Slice* bitmap_page);

// Get the statistic page for under statistic column.
doris::Status get_statistic_page(doris::Slice* statistic_page);

doris::Status write_batch(doris::RowBlock* block);

size_t written_size() const;

size_t written_value_count() const;

private:
BuilderOptions _builder_options;
ColumnSchemaPB* _column_schema;
};

} // namespace segment_v2

} // namespace doris


#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COLUMN_WRITER_H
31 changes: 31 additions & 0 deletions be/src/olap/rowset/segment_v2/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H

namespace doris {

namespace segment_v2 {

typedef uint32_t rowid_t;

} // namespace segment_v2

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_COMMON_H
47 changes: 47 additions & 0 deletions be/src/olap/rowset/segment_v2/options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H

#include "gen_cpp/segment_v2.pb.h"

namespace doris {

namespace segment_v2 {

struct BuilderOptions {
size_t data_page_size;

size_t dict_page_size;

bool write_posidx;

EncodingTypePB encoding;

CompressionTypePB compression_type;

bool is_nullable;

bool has_dictionary;
};

} // namespace segment_v2

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_OPTIONS_H
81 changes: 81 additions & 0 deletions be/src/olap/rowset/segment_v2/ordinal_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <memory>

#include "util/slice.h"
#include "gen_cpp/segment_v2.pb.h"
#include "common/status.h"

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H

namespace doris {

namespace segment_v2 {

class OrdinalIndexReader {
public:
// parse the data
doris::Status init(const Slice& data);

// return the entry number of the index
size_t count();

// compare the row_id in idx_in_block to the row_id
int compare_key(int idx_in_block, const rowid_t row_id);

// get the OrdinalIndex from the reader
std::unique_ptr<OrdinalIndex> get_short_key_index();
};

class OrdinalIndexWriter {
public:
doris::Status init();

// add a rowid -> page_pointer entry to the index
doris::Status add_entry(rowid_t rowid, const PagePointerPB& page_pointer);

// return the index data
doris::Slice finish();
};

class OrdinalIndex {
public:
OrdinalIndex(OrdinalIndexReader* reader);

// seek the the first entry when the rowid is equal to or greater than row_id
// if equal, matched will be set to true, else false
doris::Status seek_at_or_after(const rowid_t row_id, bool* matched);

// seek the the first entry when the rowid is equal to or less than row_id
// if equal, matched will be set to true, else false
doris::Status seek_at_or_before(const rowid_t row_id, bool* matched);

// return the current seeked index related page pointer
void get_current_page_pointer(PagePointerPB* page_pointer);

private:
bool _seeked;
size_t _cur_idx;
};

} // namespace segment_v2

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_ORDINAL_INDEX_H
80 changes: 80 additions & 0 deletions be/src/olap/rowset/segment_v2/page_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H
#define DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H

#include <stdint.h>
#include <vector>

#include "util/slice.h"
#include "common/status.h"

namespace doris {

namespace segment_v2 {

// PageBuilder is used to build page
// Page is a data management unit, including:
// 1. Data Page: store encoded and compressed data
// 2. BloomFilter Page: store bloom filter of data
// 3. Ordinal Index Page: store ordinal index of data
// 4. Short Key Index Page: store short key index of data
// 5. Bitmap Index Page: store bitmap index of data
class PageBuilder {
public:
virtual ~PageBuilder() { }

// Used by column writer to determine whether the current page is full.
// Column writer depends on the result to decide whether to flush current page.
virtual bool is_page_full() = 0;

// Add a sequence of values to the page.
// Returns the number of values actually added, which may be less
// than requested if the page is full.
//
// vals size should be decided according to the page build type
virtual doris::Status add(const uint8_t* vals, size_t count) = 0;

// Get the dictionary page for under dictionary encoding mode column.
virtual doris::Status get_dictionary_page(doris::Slice* dictionary_page);

// Get the bitmap page for under bitmap indexed column.
virtual doris::Status get_bitmap_page(doris::Slice* bitmap_page);

// Return a Slice which represents the encoded data of current page.
//
// This Slice points to internal data of this builder.
virtual Slice finish(rowid_t page_first_rowid) = 0;

// Reset the internal state of the page builder.
//
// Any data previously returned by finish may be invalidated by this call.
virtual void reset() = 0;

// Return the number of entries that have been added to the page.
virtual size_t count() const = 0;

private:
DISALLOW_COPY_AND_ASSIGN(PageBuilder);
};

} // namespace segment_v2

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_SEGMENT_V2_PAGE_BUILDER_H
Loading