From 6f29a53ca479db1f8a801c24326d831ebaaff3e3 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 9 Oct 2025 15:23:29 +0800 Subject: [PATCH 1/2] add reopen thrift connection on RuntimeQueryStatisticsMgr::report_runtime_query_statistics --- .../runtime/runtime_query_statistics_mgr.cpp | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/be/src/runtime/runtime_query_statistics_mgr.cpp b/be/src/runtime/runtime_query_statistics_mgr.cpp index f8f6361e09fbf0..a6d70baffbb1c8 100644 --- a/be/src/runtime/runtime_query_statistics_mgr.cpp +++ b/be/src/runtime/runtime_query_statistics_mgr.cpp @@ -397,6 +397,13 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() { continue; } + auto reopen_coord = [&coord]() -> Status { + std::this_thread::sleep_for( + std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2)); + // just reopen to disable this connection + return coord.reopen(config::thrift_rpc_timeout_ms); + }; + // 2.2 send report TReportWorkloadRuntimeStatusParams report_runtime_params; report_runtime_params.__set_backend_id(be_id); @@ -413,17 +420,12 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() { coord->reportExecStatus(res, params); rpc_result[addr] = true; } catch (apache::thrift::transport::TTransportException& e) { + rpc_status = reopen_coord(); #ifndef ADDRESS_SANITIZER LOG_WARNING( "[report_query_statistics] report to fe {} failed, reason:{}, try reopen.", add_str, e.what()); - rpc_status = coord.reopen(config::thrift_rpc_timeout_ms); - if (!rpc_status.ok()) { - LOG_WARNING( - "[report_query_statistics]reopen thrift client failed when report " - "workload runtime statistics to {}, reason: {}", - add_str, rpc_status.to_string()); - } else { + if (rpc_status.ok()) { coord->reportExecStatus(res, params); rpc_result[addr] = true; } @@ -436,21 +438,26 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() { "[report_query_statistics]fe {} throw exception when report statistics, " "reason:{}, you can see fe log for details.", add_str, e.what()); + rpc_status = reopen_coord(); } catch (apache::thrift::TException& e) { LOG_WARNING( "[report_query_statistics]report workload runtime statistics to {} failed, " "reason: {}", add_str, e.what()); - std::this_thread::sleep_for( - std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2)); - // just reopen to disable this connection - static_cast(coord.reopen(config::thrift_rpc_timeout_ms)); + rpc_status = reopen_coord(); } catch (std::exception& e) { LOG_WARNING( "[report_query_statistics]unknown exception when report workload runtime " "statistics to {}, reason:{}. ", add_str, e.what()); } + + if (!rpc_status.ok()) { + LOG_WARNING( + "[report_query_statistics]reopen thrift client failed when report " + "workload runtime statistics to {}, reason: {}", + add_str, rpc_status.to_string()); + } } // 3 when query is finished and (last rpc is send success), remove finished query statistics From 7a1ca9122709fc74a893344508d632cde5d55e75 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Thu, 9 Oct 2025 15:29:23 +0800 Subject: [PATCH 2/2] update --- be/src/runtime/runtime_query_statistics_mgr.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/runtime/runtime_query_statistics_mgr.cpp b/be/src/runtime/runtime_query_statistics_mgr.cpp index a6d70baffbb1c8..e1f97b8cdc0f0f 100644 --- a/be/src/runtime/runtime_query_statistics_mgr.cpp +++ b/be/src/runtime/runtime_query_statistics_mgr.cpp @@ -425,13 +425,13 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() { LOG_WARNING( "[report_query_statistics] report to fe {} failed, reason:{}, try reopen.", add_str, e.what()); +#else + std::cerr << "thrift error, reason=" << e.what(); +#endif if (rpc_status.ok()) { coord->reportExecStatus(res, params); rpc_result[addr] = true; } -#else - std::cerr << "thrift error, reason=" << e.what(); -#endif } } catch (apache::thrift::TApplicationException& e) { LOG_WARNING(