From e1c9ac4d0eed226c83ba856d06a975a27a6de6b2 Mon Sep 17 00:00:00 2001 From: pengxianyu Date: Fri, 6 May 2022 19:58:38 +0800 Subject: [PATCH 1/4] return error tablet list when close_wait return error --- be/src/olap/delta_writer.cpp | 9 ++++++++- be/src/olap/delta_writer.h | 3 ++- be/src/runtime/load_channel.h | 3 ++- be/src/runtime/tablets_channel.cpp | 7 ++++--- be/src/runtime/tablets_channel.h | 5 +++-- 5 files changed, 19 insertions(+), 8 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index efd9d06212a60e..5d579cc42ce369 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -311,6 +311,7 @@ Status DeltaWriter::close() { } Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* tablet_vec, + google::protobuf::RepeatedPtrField* tablet_errors, bool is_broken) { std::lock_guard l(_lock); DCHECK(_is_init) @@ -321,7 +322,13 @@ Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* } // return error if previous flush failed - RETURN_NOT_OK(_flush_token->wait()); + Status s = Status::OK(); + if (!(s = _flush_token->wait()).ok()) { + PTabletError* tablet_error = tablet_errors->Add(); + tablet_error->set_tablet_id(_tablet->tablet_id()); + tablet_error->set_msg(s.get_error_msg()); + return s; + } // use rowset meta manager to save meta _cur_rowset = _rowset_writer->build(); diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index e095fff13216c8..1a4c81f8f15c13 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -67,7 +67,8 @@ class DeltaWriter { Status close(); // wait for all memtables to be flushed. // mem_consumption() should be 0 after this function returns. - Status close_wait(google::protobuf::RepeatedPtrField* tablet_vec, bool is_broken); + Status close_wait(google::protobuf::RepeatedPtrField* tablet_vec, + google::protobuf::RepeatedPtrField* tablet_errors, bool is_broken); // abandon current memtable and wait for all pending-flushing memtables to be destructed. // mem_consumption() should be 0 after this function returns. diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h index fa88ec20e214df..1d0c3f04e1becc 100644 --- a/be/src/runtime/load_channel.h +++ b/be/src/runtime/load_channel.h @@ -81,7 +81,8 @@ class LoadChannel { bool finished = false; auto index_id = request.index_id(); RETURN_IF_ERROR(channel->close(request.sender_id(), request.backend_id(), &finished, - request.partition_ids(), response->mutable_tablet_vec())); + request.partition_ids(), response->mutable_tablet_vec(), + response->mutable_tablet_errors())); if (finished) { std::lock_guard l(_lock); _tablets_channels.erase(index_id); diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index d74b03342af8cd..04d9bcf8716cee 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -80,7 +80,8 @@ Status TabletsChannel::open(const PTabletWriterOpenRequest& request) { Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, const google::protobuf::RepeatedField& partition_ids, - google::protobuf::RepeatedPtrField* tablet_vec) { + google::protobuf::RepeatedPtrField* tablet_vec, + google::protobuf::RepeatedPtrField* tablet_errors) { std::lock_guard l(_lock); if (_state == kFinished) { return _close_status; @@ -128,7 +129,7 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, for (auto writer : need_wait_writers) { // close may return failed, but no need to handle it here. // tablet_vec will only contains success tablet, and then let FE judge it. - writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != + writer->close_wait(tablet_vec, tablet_errors, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); } } @@ -259,4 +260,4 @@ std::ostream& operator<<(std::ostream& os, const TabletsChannelKey& key) { return os; } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h index 725fe440832b57..eb39956cba68a1 100644 --- a/be/src/runtime/tablets_channel.h +++ b/be/src/runtime/tablets_channel.h @@ -77,7 +77,8 @@ class TabletsChannel { // no-op when this channel has been closed or cancelled Status close(int sender_id, int64_t backend_id, bool* finished, const google::protobuf::RepeatedField& partition_ids, - google::protobuf::RepeatedPtrField* tablet_vec); + google::protobuf::RepeatedPtrField* tablet_vec, + google::protobuf::RepeatedPtrField* tablet_error); // no-op when this channel has been closed or cancelled Status cancel(); @@ -235,4 +236,4 @@ Status TabletsChannel::add_batch(const TabletWriterAddRequest& request, } return Status::OK(); } -} // namespace doris \ No newline at end of file +} // namespace doris From c9d0354c9c122c609b58e2eba761cb981b2e6ee0 Mon Sep 17 00:00:00 2001 From: pengxianyu Date: Sat, 7 May 2022 10:11:49 +0800 Subject: [PATCH 2/4] return error tablet list when close_wait return error --- be/src/olap/delta_writer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 5d579cc42ce369..5a05e9ade3a57c 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -322,8 +322,8 @@ Status DeltaWriter::close_wait(google::protobuf::RepeatedPtrField* } // return error if previous flush failed - Status s = Status::OK(); - if (!(s = _flush_token->wait()).ok()) { + Status s = _flush_token->wait(); + if (!s.ok()) { PTabletError* tablet_error = tablet_errors->Add(); tablet_error->set_tablet_id(_tablet->tablet_id()); tablet_error->set_msg(s.get_error_msg()); From 447835f91e0d775012815968e2ac629939f79871 Mon Sep 17 00:00:00 2001 From: pengxianyu Date: Sat, 7 May 2022 19:34:54 +0800 Subject: [PATCH 3/4] return error tablet list when close_wait return error --- be/src/olap/delta_writer.h | 3 ++- be/src/runtime/tablets_channel.cpp | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 1a4c81f8f15c13..091d8ef213adf2 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -68,7 +68,8 @@ class DeltaWriter { // wait for all memtables to be flushed. // mem_consumption() should be 0 after this function returns. Status close_wait(google::protobuf::RepeatedPtrField* tablet_vec, - google::protobuf::RepeatedPtrField* tablet_errors, bool is_broken); + google::protobuf::RepeatedPtrField* tablet_errors, + bool is_broken); // abandon current memtable and wait for all pending-flushing memtables to be destructed. // mem_consumption() should be 0 after this function returns. diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index 04d9bcf8716cee..b40b03f503b95e 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -129,7 +129,9 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, for (auto writer : need_wait_writers) { // close may return failed, but no need to handle it here. // tablet_vec will only contains success tablet, and then let FE judge it. - writer->close_wait(tablet_vec, tablet_errors, (_broken_tablets.find(writer->tablet_id()) != + writer->close_wait( + tablet_vec, tablet_errors, + (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); } } From f18a178fb4063d37837491c5d2a65eff33e79deb Mon Sep 17 00:00:00 2001 From: pengxianyu Date: Sat, 7 May 2022 19:53:26 +0800 Subject: [PATCH 4/4] return error tablet list when close_wait return error --- be/src/runtime/tablets_channel.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index b40b03f503b95e..3f807f32433fe2 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -131,8 +131,7 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, // tablet_vec will only contains success tablet, and then let FE judge it. writer->close_wait( tablet_vec, tablet_errors, - (_broken_tablets.find(writer->tablet_id()) != - _broken_tablets.end())); + (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); } } return Status::OK();