From da0a3572d8b5316c3dcaa33d2b79b2f53dcc54be Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Thu, 29 Aug 2024 14:38:29 +0800 Subject: [PATCH 1/4] LOG --- be/src/runtime/fragment_mgr.cpp | 26 ++++++++++++++++--- .../doris/service/FrontendServiceImpl.java | 6 +++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 57f0e51a2b728f..bb3f6b9830982d 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -1321,10 +1321,13 @@ void FragmentMgr::cancel_worker() { if (q_ctx->enable_pipeline_x_exec()) { queries_pipeline_task_leak.push_back(q_ctx->query_id()); LOG_INFO( - "Query {}, type {} is not found on any frontends, maybe it " - "is leaked.", - print_id(q_ctx->query_id()), - toString(q_ctx->get_query_source())); + "Query {}(fe process {} type {}) is not found on any " + "frontends, maybe it " + "is leaked.(arrival {}, last check {})", + print_id(q_ctx->query_id()), q_ctx->get_fe_process_uuid(), + toString(q_ctx->get_query_source()), + q_ctx->get_query_arrival_timestamp().tv_sec, + check_invalid_query_last_timestamp.tv_sec); continue; } } @@ -1389,6 +1392,21 @@ void FragmentMgr::cancel_worker() { << print_id(id); } + if (!queries_pipeline_task_leak.empty()) { + // Print running_queries_on_all_fes in one line + std::string ss; + for (const auto& fe : running_queries_on_all_fes) { + ss += fmt::format("{}", fe.first); + ss += ": "; + for (const auto& qid : fe.second) { + ss += print_id(qid); + ss += " "; + } + ss += "\n"; + } + LOG_INFO("Running queries on all fes: {}", ss); + } + for (const auto& qid : queries_pipeline_task_leak) { // Cancel the query, and maybe try to report debug info to fe so that we can // collect debug info by sql or http api instead of search log. diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 9a2a083b36d6d8..bc91b9b4d3c6aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -4040,6 +4040,12 @@ public TFetchRunningQueriesResult fetchRunningQueries(TFetchRunningQueriesReques runningQueries.add(coordinator.getQueryId()); } + StringBuilder sb = new StringBuilder(); + for (TUniqueId queryId : runningQueries) { + sb.append(queryId.toString()).append(","); + } + LOG.info("fetchRunningQueries: {}", sb.toString()); + result.setStatus(new TStatus(TStatusCode.OK)); result.setRunningQueries(runningQueries); return result; From a354d0478cb956618be0cb1cb9853164ce7d9fa7 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Fri, 8 Nov 2024 17:38:09 +0800 Subject: [PATCH 2/4] NED --- .../src/main/java/org/apache/doris/qe/ConnectContext.java | 2 +- fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index d5bf9b9ef2dc1f..890506443523b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -962,7 +962,7 @@ private void killByTimeout(boolean killConnection) { + " connection type: {}, connectionId: {}", getRemoteHostPortString(), killConnection, getConnectType(), connectionId); - executorRef.cancel(Types.PPlanFragmentCancelReason.TIMEOUT); + executorRef.cancel("Query timeout"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index fee7d0442bf425..7fe5cb7ef19f52 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -3503,10 +3503,6 @@ public void onSuccess(InternalService.PCancelPlanFragmentResult result) { fragmentId, status.toString()); } } - LOG.warn("Failed to cancel query {} instance initiated={} done={} backend: {}," - + "fragment id={}, reason: {}", - DebugUtil.printId(queryId), initiated, done, backend.getId(), - fragmentId, "without status"); } public void onFailure(Throwable t) { From 4388ce2c688d4795cd9a887bb318d513b7d22e94 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Fri, 8 Nov 2024 17:41:22 +0800 Subject: [PATCH 3/4] Revert "LOG" This reverts commit da0a3572d8b5316c3dcaa33d2b79b2f53dcc54be. --- be/src/runtime/fragment_mgr.cpp | 26 +++---------------- .../doris/service/FrontendServiceImpl.java | 6 ----- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 6c60e60c3788e5..cd8eabb06424c3 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -1344,13 +1344,10 @@ void FragmentMgr::cancel_worker() { if (q_ctx->enable_pipeline_x_exec()) { queries_pipeline_task_leak.push_back(q_ctx->query_id()); LOG_INFO( - "Query {}(fe process {} type {}) is not found on any " - "frontends, maybe it " - "is leaked.(arrival {}, last check {})", - print_id(q_ctx->query_id()), q_ctx->get_fe_process_uuid(), - toString(q_ctx->get_query_source()), - q_ctx->get_query_arrival_timestamp().tv_sec, - check_invalid_query_last_timestamp.tv_sec); + "Query {}, type {} is not found on any frontends, maybe it " + "is leaked.", + print_id(q_ctx->query_id()), + toString(q_ctx->get_query_source())); continue; } } @@ -1415,21 +1412,6 @@ void FragmentMgr::cancel_worker() { << print_id(id); } - if (!queries_pipeline_task_leak.empty()) { - // Print running_queries_on_all_fes in one line - std::string ss; - for (const auto& fe : running_queries_on_all_fes) { - ss += fmt::format("{}", fe.first); - ss += ": "; - for (const auto& qid : fe.second) { - ss += print_id(qid); - ss += " "; - } - ss += "\n"; - } - LOG_INFO("Running queries on all fes: {}", ss); - } - for (const auto& qid : queries_pipeline_task_leak) { // Cancel the query, and maybe try to report debug info to fe so that we can // collect debug info by sql or http api instead of search log. diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index bc3c38df108ce6..d37e54deba8965 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -4039,12 +4039,6 @@ public TFetchRunningQueriesResult fetchRunningQueries(TFetchRunningQueriesReques runningQueries.add(coordinator.getQueryId()); } - StringBuilder sb = new StringBuilder(); - for (TUniqueId queryId : runningQueries) { - sb.append(queryId.toString()).append(","); - } - LOG.info("fetchRunningQueries: {}", sb.toString()); - result.setStatus(new TStatus(TStatusCode.OK)); result.setRunningQueries(runningQueries); return result; From 45c09c147fd17b9258272886e10423f7f12015c3 Mon Sep 17 00:00:00 2001 From: zhiqiang-hhhh Date: Tue, 12 Nov 2024 15:35:55 +0800 Subject: [PATCH 4/4] FIX COMPILE --- fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java | 1 - 1 file changed, 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index 890506443523b0..51316630afff4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -54,7 +54,6 @@ import org.apache.doris.plsql.Exec; import org.apache.doris.plsql.executor.PlSqlOperation; import org.apache.doris.plugin.AuditEvent.AuditEventBuilder; -import org.apache.doris.proto.Types; import org.apache.doris.resource.Tag; import org.apache.doris.service.arrowflight.results.FlightSqlChannel; import org.apache.doris.statistics.ColumnStatistic;