Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions be/src/olap/rowset/dfile/column_reader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_READER_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_READER_H

#include "runtime/vectorized_row_batch.h"

namespace doris {

namespace dfile {

class ColumnReader {
public:
ColumnReader() { }

bool init();

// Seek to the first entry in the column.
bool seek_to_first();

// Seek to the given ordinal entry in the column.
// Entry 0 is the first entry written to the column.
// If provided seek point is past the end of the file,
// then returns false.
bool seek_to_ordinal(rowid_t ord_idx) override;

// Fetch the next vector of values from the page into 'dst'.
// The output vector must have space for up to n cells.
//
// return the size of entries.
//
// In the case that the values are themselves references
// to other memory (eg Slices), the referred-to memory is
// allocated in the dst column vector's arena.
virtual size_t next_vector(const size_t n, ColumnVector *dst) = 0;

size_t get_current_oridinal();

// 每个批次读取的时候,会调用这个函数
bool prepare_batch(size_t n);

// 释放batch读取的相关资源
bool finish_batch();
};

} // namespace dfile

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_READER_H
70 changes: 70 additions & 0 deletions be/src/olap/rowset/dfile/column_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_WRITER_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_WRITER_H

#include <vector>

#include "gen_cpp/doris.pb.h"
#include "util/slice.h"

namespace doris {

namespace dfile {

class ColumnWriter {
public:
explicit ColumnWriter(BuilderOptions builder_options, ColumnSchemaPB* column_schema)
: _builder_options(builder_options),
_column_schema(column_schema) { }

bool init();

// close the writer
bool finish();

// 循环各个ColumnWriter, 通过以下几个接口,来构造对应的page pointer
// 之所以需要分这么多接口,是为了最终获取绝对位置,来构造page pointer
bool get_data_pages(std::vector<Slice*>* data_buffers);

// Get the dictionary page for under dictionary encoding mode column.
virtual bool get_dictionary_page(Slice* dictionary_page);

// Get the bloom filter page for under bloom filter indexed column.
virtual bool get_bloom_filter_pages(std::vector<Slice*>* bf_page);

// Get the bitmap page for under bitmap indexed column.
virtual bool get_bitmap_page(Slice* bitmap_page);

bool write_batch(RowBlock* block);

size_t written_size() const;

int written_value_count() const;

private:
BuilderOptions _builder_options;
ColumnSchemaPB* _column_schema;
};

} // namespace dfile

} // namespace doris


#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_COLUMN_WRITER_H
31 changes: 31 additions & 0 deletions be/src/olap/rowset/dfile/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_COMMON_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_COMMON_H

namespace doris {

namespace dfile {

typedef uint32_t rowid_t;

} // namespace dfile

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_COMMON_H
47 changes: 47 additions & 0 deletions be/src/olap/rowset/dfile/options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_OPTIONS_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_OPTIONS_H

#include "gen_cpp/doris.pb.h"

namespace doris {

namespace dfile {

struct BuilderOptions {
size_t data_page_size;

size_t dict_page_size;

bool write_posidx;

EncodingTypePB encoding;

CompressionTypePB compression_type;

bool is_nullable;

bool has_dictionary;
};

} // namespace dfile

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_OPTIONS_H
77 changes: 77 additions & 0 deletions be/src/olap/rowset/dfile/page_builder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_BUILDER_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_BUILDER_H

#include <stdint.h>
#include <vector>

#include "util/slice.h"

namespace doris {

namespace dfile {

class PageBuilder {
public:
virtual ~PageBuilder() { }

// Used by column writer to determine whether the current page is full.
// Column writer depends on the result to decide whether to flush current page.
virtual bool is_page_full() = 0;

// Get the dictionary page for under dictionary encoding mode column.
virtual bool get_dictionary_page(Slice* dictionary_page);

// Get the bloom filter page for under bloom filter indexed column.
virtual bool get_bloom_filter_page(std::vector<Slice*>* bf_page);

// Get the bitmap page for under bitmap indexed column.
virtual bool get_bitmap_page(Slice* bitmap_page);

// Add a sequence of values to the page.
// Returns the number of values actually added, which may be less
// than requested if the page is full.
//
// vals size should be decided according to the page build type
virtual int add(const uint8_t* vals, size_t count) = 0;

// Return a Slice which represents the encoded data of current page,
// And the page pointer to the page. The offset is relative to the current column.
// The offset of pointer should be revised in column writer.
//
// This Slice points to internal data of this builder.
virtual Slice finish(rowid_t first_page_rowid) = 0;

// Reset the internal state of the page builder.
//
// Any data previously returned by finish may be invalidated by this call.
virtual void reset() = 0;

// Return the number of entries that have been added to the page.
virtual size_t count() const = 0;

private:
DISALLOW_COPY_AND_ASSIGN(PageBuilder);
};

} // namespace dfile

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_BUILDER_H
82 changes: 82 additions & 0 deletions be/src/olap/rowset/dfile/page_decoder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_DECODER_H
#define DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_DECODER_H

#include "runtime/vectorized_row_batch.h"

namespace doris {

namespace dfile {

class PageDecoder {
public:
virtual ~PageDecoder() { }

// Call this to do some preparation for decoder.
// eg: parse data block header
virtual bool init() = 0;

// Seek the decoder to the given positional index of the page.
// For example, seek_to_position_in_block(0) seeks to the first
// stored entry.
//
// It is an error to call this with a value larger than Count().
// Doing so has undefined results.
virtual void seek_to_position_in_block(size_t pos) = 0;

// Seek the decoder forward by a given number of rows, or to the end
// of the page. This is primarily used to skip over data.
//
// Return the step skipped.
virtual size_t seek_forward(size_t n) {
size_t step = std::min(n, count() - current_index());
DCHECK_GE(step, 0);
seek_to_position_in_block(current_index() + step);
return step;
}

// Fetch the next vector of values from the page into 'dst'.
// The output vector must have space for up to n cells.
//
// return the size of entries.
//
// In the case that the values are themselves references
// to other memory (eg Slices), the referred-to memory is
// allocated in the dst column vector's arena.
virtual size_t next_vector(const size_t n, ColumnVector *dst) = 0;

// Return the number of elements in this page.
virtual size_t count() const = 0;

// Return the position within the page of the currently seeked
// entry (ie the entry that will next be returned by next_vector())
virtual size_t current_index() const = 0;

// Return the first rowid stored in this page.
virtual rowid_t get_first_rowid() const = 0;

private:
DISALLOW_COPY_AND_ASSIGN(PageDecoder);
};

} // namespace dfile

} // namespace doris

#endif // DORIS_BE_SRC_OLAP_ROWSET_DFILE_PAGE_DECODER_H
Loading