From d29e99881db6ef2a502fe86a7814cdfe0c232451 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Mon, 28 Oct 2024 10:19:09 +0800 Subject: [PATCH 1/2] [chore](log) Reduce some BE execution log (#42383) 1. timeout does not print stack 2. when the runtime filter's relative query is finished before, it does not print the log. --- be/src/common/status.h | 2 +- be/src/runtime/fragment_mgr.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index e95b93431679a2..e6c4b6871168e7 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -478,7 +478,7 @@ class [[nodiscard]] Status { ERROR_CTOR_NOSTACK(Cancelled, CANCELLED) ERROR_CTOR(MemoryLimitExceeded, MEM_LIMIT_EXCEEDED) ERROR_CTOR(RpcError, THRIFT_RPC_ERROR) - ERROR_CTOR(TimedOut, TIMEOUT) + ERROR_CTOR_NOSTACK(TimedOut, TIMEOUT) ERROR_CTOR_NOSTACK(TooManyTasks, TOO_MANY_TASKS) ERROR_CTOR(Uninitialized, UNINITIALIZED) ERROR_CTOR(Aborted, ABORTED) diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index e683b84e2b4e4e..3a757fb65485bc 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -645,6 +645,7 @@ void FragmentMgr::remove_pipeline_context( .count(); g_fragment_executing_count << -1; g_fragment_last_active_time.set_value(now); + // this log will show when a query is really finished in BEs LOG_INFO("Removing query {} fragment {}", print_id(query_id), f_context->get_fragment_id()); _pipeline_map.erase({query_id, f_context->get_fragment_id()}); } @@ -1185,8 +1186,6 @@ Status FragmentMgr::apply_filterv2(const PPublishFilterRequestV2* request, auto iter = _pipeline_map.find( {UniqueId(request->query_id()).to_thrift(), fragment_id}); if (iter == _pipeline_map.end()) { - LOG(WARNING) << "No pipeline fragment is found: Query-ID = " - << request->query_id() << " fragment_id = " << fragment_id; continue; } pip_context = iter->second; From a35b1eaee60d232b5ffe91ec285aaa127f1ba045 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Tue, 5 Nov 2024 14:18:25 +0800 Subject: [PATCH 2/2] [Enhancement](log) Make query relative BE log more reasonable (#42900) Issue Number: close #xxx before we have many info logs which may not useful when a query runs. now we reduced them. keep one info log for each stage below: 1. query arrive 2. FE tells to run fragment 3. query finished and no others --- be/src/pipeline/exec/result_sink_operator.cpp | 11 +++++----- be/src/pipeline/pipeline_fragment_context.cpp | 5 ----- be/src/runtime/fragment_mgr.cpp | 21 +++++++------------ be/src/runtime/query_context.cpp | 5 ++--- 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/be/src/pipeline/exec/result_sink_operator.cpp b/be/src/pipeline/exec/result_sink_operator.cpp index ccd74c6d572e5c..b8faa4f76f7a30 100644 --- a/be/src/pipeline/exec/result_sink_operator.cpp +++ b/be/src/pipeline/exec/result_sink_operator.cpp @@ -17,11 +17,11 @@ #include "result_sink_operator.h" +#include + #include -#include #include "common/config.h" -#include "common/object_pool.h" #include "exec/rowid_fetcher.h" #include "pipeline/exec/operator.h" #include "runtime/buffer_control_block.h" @@ -190,9 +190,10 @@ Status ResultSinkLocalState::close(RuntimeState* state, Status exec_status) { final_status = st; } - LOG_INFO("Query {} result sink closed with status {} and has written {} rows", - print_id(state->query_id()), final_status.to_string_no_stack(), - _writer->get_written_rows()); + VLOG_NOTICE << fmt::format( + "Query {} result sink closed with status {} and has written {} rows", + print_id(state->query_id()), final_status.to_string_no_stack(), + _writer->get_written_rows()); } // close sender, this is normal path end diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 4e05a39d77cd62..75623b8f3e9e61 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -252,11 +252,6 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re auto* fragment_context = this; - LOG_INFO("PipelineFragmentContext::prepare") - .tag("query_id", print_id(_query_id)) - .tag("fragment_id", _fragment_id) - .tag("pthread_id", (uintptr_t)pthread_self()); - if (request.query_options.__isset.is_report_success) { fragment_context->set_is_report_success(request.query_options.is_report_success); } diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 3a757fb65485bc..8a40fd96fc3297 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -615,17 +615,19 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, } } +// Stage 2. prepare finished. then get FE instruction to execute Status FragmentMgr::start_query_execution(const PExecPlanFragmentStartRequest* request) { + TUniqueId query_id; + query_id.__set_hi(request->query_id().hi()); + query_id.__set_lo(request->query_id().lo()); std::shared_ptr q_ctx = nullptr; { std::lock_guard lock(_lock); - TUniqueId query_id; - query_id.__set_hi(request->query_id().hi()); - query_id.__set_lo(request->query_id().lo()); q_ctx = _get_or_erase_query_ctx(query_id); } if (q_ctx) { q_ctx->set_ready_to_execute(Status::OK()); + LOG_INFO("Query {} start execution", print_id(query_id)); } else { return Status::InternalError( "Failed to get query fragments context. Query may be " @@ -700,6 +702,7 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo return Status::OK(); } + // First time a fragment of a query arrived. print logs. LOG(INFO) << "query_id: " << print_id(query_id) << ", coord_addr: " << params.coord << ", total fragment num on current host: " << params.fragment_num_on_host << ", fe process uuid: " << params.query_options.fe_process_uuid @@ -728,7 +731,6 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo } _set_scan_concurrency(params, query_ctx.get()); - const bool is_pipeline = std::is_same_v; if (params.__isset.workload_groups && !params.workload_groups.empty()) { uint64_t tg_id = params.workload_groups[0].id; @@ -739,21 +741,14 @@ Status FragmentMgr::_get_query_ctx(const Params& params, TUniqueId query_id, boo RETURN_IF_ERROR(query_ctx->set_workload_group(workload_group_ptr)); _exec_env->runtime_query_statistics_mgr()->set_workload_group_id(print_id(query_id), tg_id); - - LOG(INFO) << "Query/load id: " << print_id(query_ctx->query_id()) - << ", use workload group: " << workload_group_ptr->debug_string() - << ", is pipeline: " << ((int)is_pipeline); } else { - LOG(INFO) << "Query/load id: " << print_id(query_ctx->query_id()) - << " carried group info but can not find group in be"; + LOG(WARNING) << "Query/load id: " << print_id(query_ctx->query_id()) + << "can't find its workload group " << tg_id; } } // There is some logic in query ctx's dctor, we could not check if exists and delete the // temp query ctx now. For example, the query id maybe removed from workload group's queryset. _query_ctx_map.insert(std::make_pair(query_ctx->query_id(), query_ctx)); - LOG(INFO) << "Register query/load memory tracker, query/load id: " - << print_id(query_ctx->query_id()) - << " limit: " << PrettyPrinter::print(query_ctx->mem_limit(), TUnit::BYTES); } return Status::OK(); } diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp index 354de1426e9ee7..ece4c76a17a563 100644 --- a/be/src/runtime/query_context.cpp +++ b/be/src/runtime/query_context.cpp @@ -26,11 +26,9 @@ #include #include #include -#include #include #include "common/logging.h" -#include "olap/olap_common.h" #include "pipeline/dependency.h" #include "pipeline/pipeline_fragment_context.h" #include "runtime/exec_env.h" @@ -198,7 +196,8 @@ QueryContext::~QueryContext() { _exec_env->spill_stream_mgr()->async_cleanup_query(_query_id); DorisMetrics::instance()->query_ctx_cnt->increment(-1); - LOG_INFO("Query {} deconstructed, {}", print_id(this->_query_id), mem_tracker_msg); + // the only one msg shows query's end. any other msg should append to it if need. + LOG_INFO("Query {} deconstructed, mem_tracker: {}", print_id(this->_query_id), mem_tracker_msg); } void QueryContext::set_ready_to_execute(Status reason) {