From 90dbcf58aadc495881d5a2d9d8ddf6bcbf3070f9 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Mon, 10 Feb 2025 21:09:04 +0800 Subject: [PATCH] [improve](move-memtable) improve error log and message for "not enough streams" (#47470) improve error log and message for "not enough streams", make it easier to understand. --- be/src/vec/sink/writer/vtablet_writer_v2.cpp | 21 +++++++++++++++++-- .../test_multi_replica_fault_injection.groovy | 2 +- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/be/src/vec/sink/writer/vtablet_writer_v2.cpp b/be/src/vec/sink/writer/vtablet_writer_v2.cpp index 38ae77d5cddd8c..b08b5fdf3280da 100644 --- a/be/src/vec/sink/writer/vtablet_writer_v2.cpp +++ b/be/src/vec/sink/writer/vtablet_writer_v2.cpp @@ -378,6 +378,7 @@ void VTabletWriterV2::_generate_rows_for_tablet(std::vector& r Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id, int64_t index_id, std::vector>& streams) { + std::vector failed_node_ids; const auto* location = _location->find_tablet(tablet_id); DBUG_EXECUTE_IF("VTabletWriterV2._select_streams.location_null", { location = nullptr; }); if (location == nullptr) { @@ -397,6 +398,9 @@ Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id, << ", stream_ok=" << (stream == nullptr ? "no" : "yes"); }); if (stream == nullptr) { + LOG(WARNING) << "skip writing tablet " << tablet_id << " to backend " << node_id + << ": stream is not open"; + failed_node_ids.push_back(node_id); continue; } streams.emplace_back(std::move(stream)); @@ -407,8 +411,21 @@ Status VTabletWriterV2::_select_streams(int64_t tablet_id, int64_t partition_id, << ", num_nodes=" << location->node_ids.size(); }); if (streams.size() <= location->node_ids.size() / 2) { - return Status::InternalError("not enough streams {}/{}", streams.size(), - location->node_ids.size()); + std::ostringstream success_msg; + std::ostringstream failed_msg; + for (auto& s : streams) { + success_msg << ", " << s->dst_id(); + } + for (auto id : failed_node_ids) { + failed_msg << ", " << id; + } + LOG(INFO) << "failed to write enough replicas " << streams.size() << "/" + << location->node_ids.size() << " for tablet " << tablet_id + << " due to connection errors; success nodes" << success_msg.str() + << "; failed nodes" << failed_msg.str() << "."; + return Status::InternalError( + "failed to write enough replicas {}/{} for tablet {} due to connection errors", + streams.size(), location->node_ids.size(), tablet_id); } Status st; for (auto& stream : streams) { diff --git a/regression-test/suites/fault_injection_p0/test_multi_replica_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_multi_replica_fault_injection.groovy index 4c01513c612d45..072d5bf1c93884 100644 --- a/regression-test/suites/fault_injection_p0/test_multi_replica_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_multi_replica_fault_injection.groovy @@ -102,7 +102,7 @@ suite("test_multi_replica_fault_injection", "nonConcurrent") { // test one backend open failure load_with_injection("VTabletWriterV2._open_streams.skip_one_backend", "success", true) // test two backend open failure - load_with_injection("VTabletWriterV2._open_streams.skip_two_backends", "not enough streams 1/3", false, "succ replica num 1 < load required replica num 2") + load_with_injection("VTabletWriterV2._open_streams.skip_two_backends", "failed to write enough replicas 1/3 for tablet", false, "succ replica num 1 < load required replica num 2") sql """ set enable_memtable_on_sink_node=false """ } }