Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1298,6 +1298,9 @@ DEFINE_mBool(skip_loading_stale_rowset_meta, "false");

DEFINE_Bool(enable_file_logger, "true");

// The minimum row group size when exporting Parquet files. default 128MB
DEFINE_Int64(min_row_group_size, "134217728");

// clang-format off
#ifdef BE_TEST
// test s3
Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,9 @@ DECLARE_mBool(skip_loading_stale_rowset_meta);
// Only works when starting BE with --console.
DECLARE_Bool(enable_file_logger);

// The minimum row group size when exporting Parquet files.
DECLARE_Int64(min_row_group_size);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
6 changes: 3 additions & 3 deletions be/src/vec/runtime/vparquet_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <ostream>
#include <string>

#include "common/config.h"
#include "common/status.h"
#include "gutil/endian.h"
#include "io/fs/file_writer.h"
Expand Down Expand Up @@ -70,8 +71,6 @@

namespace doris::vectorized {

const uint64_t min_row_group_size = 128 * 1024 * 1024; // 128MB

ParquetOutputStream::ParquetOutputStream(doris::io::FileWriter* file_writer)
: _file_writer(file_writer), _cur_pos(0), _written_len(0) {
set_mode(arrow::io::FileMode::WRITE);
Expand Down Expand Up @@ -247,6 +246,7 @@ Status VParquetTransformer::_parse_properties() {
}
builder.created_by(
fmt::format("{}({})", doris::get_short_version(), parquet::DEFAULT_CREATED_BY));
builder.max_row_group_length(std::numeric_limits<int64_t>::max());
_parquet_writer_properties = builder.build();
_arrow_properties = parquet::ArrowWriterProperties::Builder()
.enable_deprecated_int96_timestamps()
Expand Down Expand Up @@ -296,7 +296,7 @@ Status VParquetTransformer::write(const Block& block) {

RETURN_DORIS_STATUS_IF_ERROR(_writer->WriteRecordBatch(*result));
_write_size += block.bytes();
if (_write_size >= min_row_group_size) {
if (_write_size >= doris::config::min_row_group_size) {
RETURN_DORIS_STATUS_IF_ERROR(_writer->NewBufferedRowGroup());
_write_size = 0;
}
Expand Down