From 4a4eecdb3ac34c2b9b1b15e3e825f3c8dab68ce3 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Tue, 14 May 2024 10:11:15 +0800 Subject: [PATCH] [Fix](hive-writer) Fix hive partition update file size and remove redundant column names. (#34651) --- be/src/vec/sink/writer/vhive_partition_writer.cpp | 7 ++++--- be/src/vec/sink/writer/vhive_partition_writer.h | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/be/src/vec/sink/writer/vhive_partition_writer.cpp b/be/src/vec/sink/writer/vhive_partition_writer.cpp index e9e816219b6283..f88a695b3bf4be 100644 --- a/be/src/vec/sink/writer/vhive_partition_writer.cpp +++ b/be/src/vec/sink/writer/vhive_partition_writer.cpp @@ -66,7 +66,9 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* profile) std::vector column_names; column_names.reserve(_columns.size()); for (int i = 0; i < _columns.size(); i++) { - column_names.emplace_back(_columns[i].name); + if (_non_write_columns_indices.find(i) == _non_write_columns_indices.end()) { + column_names.emplace_back(_columns[i].name); + } } switch (_file_format_type) { @@ -157,7 +159,6 @@ Status VHivePartitionWriter::write(vectorized::Block& block, vectorized::IColumn RETURN_IF_ERROR(_projection_and_filter_block(block, filter, &output_block)); RETURN_IF_ERROR(_file_format_transformer->write(output_block)); _row_count += output_block.rows(); - _input_size_in_bytes += output_block.bytes(); return Status::OK(); } @@ -200,7 +201,7 @@ THivePartitionUpdate VHivePartitionWriter::_build_partition_update() { hive_partition_update.__set_location(location); hive_partition_update.__set_file_names({_get_target_file_name()}); hive_partition_update.__set_row_count(_row_count); - hive_partition_update.__set_file_size(_input_size_in_bytes); + hive_partition_update.__set_file_size(_file_format_transformer->written_len()); if (_write_info.file_type == TFileType::FILE_S3) { doris::io::S3FileWriter* s3_mpu_file_writer = diff --git a/be/src/vec/sink/writer/vhive_partition_writer.h b/be/src/vec/sink/writer/vhive_partition_writer.h index e4fc2ebc24bd4e..912ac8b1e496b2 100644 --- a/be/src/vec/sink/writer/vhive_partition_writer.h +++ b/be/src/vec/sink/writer/vhive_partition_writer.h @@ -88,7 +88,6 @@ class VHivePartitionWriter { TUpdateMode::type _update_mode; size_t _row_count = 0; - size_t _input_size_in_bytes = 0; const VExprContextSPtrs& _vec_output_expr_ctxs; const VExprContextSPtrs& _write_output_expr_ctxs;