diff --git a/be/src/agent/utils.cpp b/be/src/agent/utils.cpp index c5e48a9f454f20..0a419f613e4c5e 100644 --- a/be/src/agent/utils.cpp +++ b/be/src/agent/utils.cpp @@ -227,12 +227,15 @@ std::string AgentUtils::print_agent_status(AgentStatus status) { } } -bool AgentUtils::exec_cmd(const string& command, string* errmsg) { +bool AgentUtils::exec_cmd(const string& command, string* errmsg, bool redirect_stderr) { // The exit status of the command. uint32_t rc = 0; // Redirect stderr to stdout to get error message. - string cmd = command + " 2>&1"; + string cmd = command; + if (redirect_stderr) { + cmd += " 2>&1"; + } // Execute command. FILE *fp = popen(cmd.c_str(), "r"); diff --git a/be/src/agent/utils.h b/be/src/agent/utils.h index b3cb6cfb9ff015..c732e0304ab242 100644 --- a/be/src/agent/utils.h +++ b/be/src/agent/utils.h @@ -83,7 +83,7 @@ class AgentUtils { virtual std::string print_agent_status(AgentStatus status); // Execute shell cmd - virtual bool exec_cmd(const std::string& command, std::string* errmsg); + virtual bool exec_cmd(const std::string& command, std::string* errmsg, bool redirect_stderr = true); // Write a map to file by json format virtual bool write_json_to_file( diff --git a/be/src/http/action/pprof_actions.cpp b/be/src/http/action/pprof_actions.cpp index 5bd987ee7553c9..6e2281c30e583a 100644 --- a/be/src/http/action/pprof_actions.cpp +++ b/be/src/http/action/pprof_actions.cpp @@ -26,7 +26,9 @@ #include #include +#include "agent/utils.h" #include "common/config.h" +#include "gutil/strings/substitute.h" #include "http/ev_http_server.h" #include "http/http_channel.h" #include "http/http_handler.h" @@ -36,6 +38,7 @@ #include "runtime/exec_env.h" #include "util/bfd_parser.h" #include "util/file_utils.h" +#include "util/pprof_utils.h" namespace doris { @@ -83,7 +86,18 @@ void HeapAction::handle(HttpRequest* req) { std::string str = profile; delete profile; - HttpChannel::send_reply(req, str); + const std::string& readable_str = req->param("readable"); + if (!readable_str.empty()) { + std::stringstream readable_res; + Status st = PprofUtils::get_readable_profile(str, false, &readable_res); + if (!st.ok()) { + HttpChannel::send_reply(req, st.to_string()); + } else { + HttpChannel::send_reply(req, readable_res.str()); + } + } else { + HttpChannel::send_reply(req, str); + } #endif } @@ -130,26 +144,50 @@ void ProfileAction::handle(HttpRequest* req) { seconds = std::atoi(seconds_str.c_str()); } - std::ostringstream tmp_prof_file_name; - // Build a temporary file name that is hopefully unique. - tmp_prof_file_name << config::pprof_profile_dir << "/doris_profile." << getpid() << "." - << rand(); - ProfilerStart(tmp_prof_file_name.str().c_str()); - sleep(seconds); - ProfilerStop(); - std::ifstream prof_file(tmp_prof_file_name.str().c_str(), std::ios::in); - std::stringstream ss; - if (!prof_file.is_open()) { - ss << "Unable to open cpu profile: " << tmp_prof_file_name.str(); - std::string str = ss.str(); - HttpChannel::send_reply(req, str); - return; - } - ss << prof_file.rdbuf(); - prof_file.close(); - std::string str = ss.str(); + const std::string& type_str = req->param("type"); + if (type_str != "flamegraph") { + // use pprof the sample the CPU + std::ostringstream tmp_prof_file_name; + tmp_prof_file_name << config::pprof_profile_dir << "/doris_profile." << getpid() << "." << rand(); + ProfilerStart(tmp_prof_file_name.str().c_str()); + sleep(seconds); + ProfilerStop(); + + if (type_str != "text") { + // return raw content via http response directly + std::ifstream prof_file(tmp_prof_file_name.str().c_str(), std::ios::in); + std::stringstream ss; + if (!prof_file.is_open()) { + ss << "Unable to open cpu profile: " << tmp_prof_file_name.str(); + std::string str = ss.str(); + HttpChannel::send_reply(req, str); + return; + } + ss << prof_file.rdbuf(); + prof_file.close(); + std::string str = ss.str(); + HttpChannel::send_reply(req, str); + } - HttpChannel::send_reply(req, str); + // text type. we will return readable content via http response + std::stringstream readable_res; + Status st = PprofUtils::get_readable_profile(tmp_prof_file_name.str(), true, &readable_res); + if (!st.ok()) { + HttpChannel::send_reply(req, st.to_string()); + } else { + HttpChannel::send_reply(req, readable_res.str()); + } + } else { + // generate flamegraph + std::string svg_file_content; + std::string flamegraph_install_dir = std::string(std::getenv("DORIS_HOME")) + "/tools/FlameGraph/"; + Status st = PprofUtils::generate_flamegraph(30, flamegraph_install_dir, false, &svg_file_content); + if (!st.ok()) { + HttpChannel::send_reply(req, st.to_string()); + } else { + HttpChannel::send_reply(req, svg_file_content); + } + } #endif } @@ -179,7 +217,6 @@ void CmdlineAction::handle(HttpRequest* req) { FILE* fp = fopen("/proc/self/cmdline", "r"); if (fp == nullptr) { std::string str = "Unable to open file: /proc/self/cmdline"; - HttpChannel::send_reply(req, str); return; } diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 0a43bd4b863f9f..933e6fbc651112 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -17,12 +17,14 @@ #include "http/default_path_handlers.h" +#include #include #include #include #include +#include "agent/utils.h" #include "common/configbase.h" #include "gutil/strings/numbers.h" #include "gutil/strings/substitute.h" @@ -154,6 +156,148 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr (*output) << "\n"; } +void heap_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { + (*output) << "

Heap Profile

" << std::endl; + +#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) + (*output) << "
" << std::endl;
+    (*output) << "Heap profiling is not available with address sanitizer builds." << std::endl;
+    (*output) << "
" << std::endl; + return; + +#else + (*output) << "
" << std::endl;
+    (*output) << "Heap profiling will use pprof tool to sample and get heap profile. It will take 30 seconds" << std::endl;
+    (*output) << "(Only one thread can obtain profile at the same time)" << std::endl;
+    (*output) << std::endl;
+    (*output) << "If you want to get the Heap profile, you need to install gperftools-2.0 on the host machine," << std::endl;
+    (*output) << "and make sure there is a 'pprof' executable file in the system PATH or 'be/tools/bin/' directory." << std::endl;
+    (*output) << "Doris will obtain Profile in the following ways:" << std::endl;
+    (*output) << std::endl;
+    (*output) << "    curl http://localhost:" << config::webserver_port << "/pprof/heap?seconds=30 > perf.data" << std::endl;
+    (*output) << "    pprof --text be/lib/palo_be perf.data" << std::endl;
+    (*output) << std::endl;
+    (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + + (*output) << "" << std::endl; + + return; +#endif +} + +void cpu_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { + (*output) << "

CPU Profile

" << std::endl; + +#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) + (*output) << "
" << std::endl;
+    (*output) << "CPU profiling is not available with address sanitizer builds." << std::endl;
+    (*output) << "
" << std::endl; + return; + +#else + + (*output) << "
" << std::endl;
+    (*output) << "CPU profiling will use perf tool to sample and get CPU profile. It will take 30 seconds" << std::endl;
+    (*output) << "(Only one thread can obtain profile at the same time)" << std::endl;
+    (*output) << std::endl;
+    (*output) << "If you want to get the CPU profile in text form, you need to install gperftools-2.0 on the host machine," << std::endl;
+    (*output) << "and make sure there is a 'pprof' executable file in the system PATH or 'be/tools/bin/' directory." << std::endl;
+    (*output) << "Doris will obtain Profile in the following ways:" << std::endl;
+    (*output) << std::endl;
+    (*output) << "    curl http://localhost:" << config::webserver_port << "/pprof/profile?seconds=30 > perf.data" << std::endl;
+    (*output) << "    pprof --text be/lib/palo_be perf.data" << std::endl;
+    (*output) << std::endl;
+    (*output) << "If you want to get the flame graph, you must first make sure that there is a 'perf' command on the host machine." << std::endl;
+    (*output) << "And you need to download the FlameGraph and place it under 'be/tools/FlameGraph'." << std::endl;
+    (*output) << "Finally, check if the following files exist" << std::endl;
+    (*output) << std::endl;
+    (*output) << "    be/tools/FlameGraph/stackcollapse-perf.pl" << std::endl;
+    (*output) << "    be/tools/FlameGraph/flamegraph.pl" << std::endl;
+    (*output) << std::endl;
+    (*output) << "Doris will obtain the flame graph in the following ways:" << std::endl;
+    (*output) << std::endl;
+    (*output) << "    perf record -m 2 -g -p be_pid -o perf.data - sleep 30" << std::endl;
+    (*output) << "    perf script -i perf.data | stackcollapse-perf.pl | flamegraph.pl > flamegraph.svg" << std::endl;
+    (*output) << std::endl;
+    (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + (*output) << "
" << std::endl; + + // for text profile + (*output) << "" << std::endl; + + return; +#endif +} + void add_default_path_handlers(WebPageHandler* web_page_handler, const std::shared_ptr& process_mem_tracker) { // TODO(yingchun): logs_handler is not implemented yet, so not show it on navigate bar @@ -162,7 +306,9 @@ void add_default_path_handlers(WebPageHandler* web_page_handler, web_page_handler->register_page( "/memz", "Memory", boost::bind(&mem_usage_handler, process_mem_tracker, _1, _2), true /* is_on_nav_bar */); - web_page_handler->register_page("/mem_tracker", "MemTracker", mem_tracker_handler,true /* is_on_nav_bar */); + web_page_handler->register_page("/mem_tracker", "MemTracker", mem_tracker_handler, true /* is_on_nav_bar */); + web_page_handler->register_page("/heap", "Heap Profile", heap_handler, true /* is_on_nav_bar */); + web_page_handler->register_page("/cpu", "CPU Profile", cpu_handler, true /* is_on_nav_bar */); register_thread_display_page(web_page_handler); web_page_handler->register_template_page("/tablets_page", "Tablets", boost::bind(&display_tablets_callback, _1, _2), true /* is_on_nav_bar */); } diff --git a/be/src/http/web_page_handler.cpp b/be/src/http/web_page_handler.cpp index 9881b9943aaefc..91b507b4fe8e99 100644 --- a/be/src/http/web_page_handler.cpp +++ b/be/src/http/web_page_handler.cpp @@ -127,7 +127,7 @@ static const std::string kMainTemplate = R"( - + diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 4c44ddbc7b13cf..adb21a66980f4f 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -101,6 +101,7 @@ set(UTIL_FILES mustache/mustache.cc brpc_stub_cache.cpp zlib.cpp + pprof_utils.cpp ) if (WITH_MYSQL) diff --git a/be/src/util/pprof_utils.cpp b/be/src/util/pprof_utils.cpp new file mode 100644 index 00000000000000..fff2e46ce637b3 --- /dev/null +++ b/be/src/util/pprof_utils.cpp @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/pprof_utils.h" + +#include + +#include "agent/utils.h" +#include "util/file_utils.h" +#include "gutil/strings/substitute.h" + +namespace doris { + +Status PprofUtils::get_pprof_cmd(std::string* cmd) { + AgentUtils util; + // check if pprof cmd exist + const static std::string tools_path = std::string(std::getenv("DORIS_HOME")) + "/tools/bin/"; + std::string pprof_cmd = tools_path + "pprof"; + std::string msg; + bool rc = util.exec_cmd(pprof_cmd + " --version", &msg); + if (!rc) { + // not found in BE tools dir, found in system + pprof_cmd = "pprof"; + rc = util.exec_cmd(pprof_cmd + " --version", &msg); + if (!rc) { + return Status::NotSupported("pprof: command not found in systemp PATH or be/tools/bin/. Install gperftools first."); + } + } + *cmd = pprof_cmd; + return Status::OK(); +} + +Status PprofUtils::get_perf_cmd(std::string* cmd) { + AgentUtils util; + // check if perf cmd exist + std::string perf_cmd = "perf"; + std::string msg; + bool rc = util.exec_cmd(perf_cmd + " --version", &msg); + if (!rc) { + return Status::NotSupported("perf: command not found in systemp PATH"); + } + *cmd = perf_cmd; + return Status::OK(); +} + +Status PprofUtils::get_self_cmdline(std::string* cmd) { + // get cmdline + FILE* fp = fopen("/proc/self/cmdline", "r"); + if (fp == nullptr) { + return Status::InternalError("Unable to open file: /proc/self/cmdline"); + } + char buf[1024]; + fscanf(fp, "%s ", buf); + fclose(fp); + *cmd = buf; + return Status::OK(); +} + +Status PprofUtils::get_readable_profile(const std::string& file_or_content, bool is_file, std::stringstream* output) { + // get pprof cmd + std::string pprof_cmd; + RETURN_IF_ERROR(PprofUtils::get_pprof_cmd(&pprof_cmd)); + + // get self cmdline + std::string self_cmdline; + RETURN_IF_ERROR(PprofUtils::get_self_cmdline(&self_cmdline)); + + // save file if necessary + std::string final_file; + if (!is_file) { + std::stringstream tmp_file; + tmp_file << config::pprof_profile_dir << "/pprof_profile." << getpid() << "." << rand(); + std::ofstream outfile; + outfile.open(tmp_file.str().c_str()); + outfile << file_or_content; + outfile.close(); + final_file = tmp_file.str(); + } else { + final_file = file_or_content; + } + + // parse raw with "pprof --text cmdline raw_file" + std::string cmd_output; + std::string final_cmd = pprof_cmd + strings::Substitute(" --text $0 $1", self_cmdline, final_file); + AgentUtils util; + bool rc = util.exec_cmd(final_cmd, &cmd_output, false); + + // delete raw file + FileUtils::remove(file_or_content); + + if (!rc) { + return Status::InternalError("Failed to execute command: " + cmd_output); + } + + (*output) << "Profile(Sample 30 seconds)" << std::endl; + (*output) << cmd_output << std::endl; + return Status::OK(); +} + +Status PprofUtils::generate_flamegraph(int32_t sample_seconds, const std::string& flame_graph_tool_dir, bool return_file, std::string* svg_file_or_content) { + // get perf cmd + std::string perf_cmd; + RETURN_IF_ERROR(PprofUtils::get_perf_cmd(&perf_cmd)); + + // check if FlameGraph has been installed + // check stackcollapse-perf.pl and flamegraph.pl exist + std::string stackcollapse_perf_pl = flame_graph_tool_dir + "/stackcollapse-perf.pl"; + std::string flamegraph_pl = flame_graph_tool_dir + "/flamegraph.pl"; + if (!FileUtils::check_exist(stackcollapse_perf_pl) || !FileUtils::check_exist(flamegraph_pl)) { + return Status::InternalError("Missing stackcollapse-perf.pl or flamegraph.pl in FlameGraph"); + } + + // tmp output profile file + std::stringstream tmp_file; + tmp_file << config::pprof_profile_dir << "/cpu_perf." << getpid() << "." << rand(); + + // sample + std::stringstream cmd; + cmd << perf_cmd << " record -m 2 -g -p " << getpid() << " -o " << tmp_file.str() << " -- sleep " << sample_seconds; + + AgentUtils util; + std::string cmd_output; + bool rc = util.exec_cmd(cmd.str(), &cmd_output); + if (!rc) { + FileUtils::remove(tmp_file.str()); + return Status::InternalError("Failed to execute perf command: " + cmd_output); + } + + // generate flamegraph + + std::string res_content; + if (return_file) { + std::stringstream graph_file; + graph_file << config::pprof_profile_dir << "/flamegraph." << getpid() << "." << rand() << ".svg"; + std::stringstream gen_cmd; + gen_cmd << perf_cmd << " script -i " << tmp_file.str() << " | " << stackcollapse_perf_pl << " | " << flamegraph_pl << " > " << graph_file.str(); + rc = util.exec_cmd(gen_cmd.str(), &res_content); + if (!rc) { + FileUtils::remove(tmp_file.str()); + FileUtils::remove(graph_file.str()); + return Status::InternalError("Failed to execute perf script command: " + res_content); + } + *svg_file_or_content = graph_file.str(); + } else { + std::stringstream gen_cmd; + gen_cmd << perf_cmd << " script -i " << tmp_file.str() << " | " << stackcollapse_perf_pl << " | " << flamegraph_pl; + rc = util.exec_cmd(gen_cmd.str(), &res_content, false); + if (!rc) { + FileUtils::remove(tmp_file.str()); + return Status::InternalError("Failed to execute perf script command: " + res_content); + } + *svg_file_or_content = res_content; + } + return Status::OK(); +} + +} // end namespace + diff --git a/be/src/util/pprof_utils.h b/be/src/util/pprof_utils.h new file mode 100644 index 00000000000000..338df6d52f6f8c --- /dev/null +++ b/be/src/util/pprof_utils.h @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "common/status.h" + +namespace doris { + +class PprofUtils { +public: + + /// check and get "perf" cmd + static Status get_perf_cmd(std::string* cmd); + + /// get current BE process cmdline from '/proc/self/cmdline' + static Status get_self_cmdline(std::string* cmd); + + /// check and get "pprof" command, return the cmd abs path via "cmd". + static Status get_pprof_cmd(std::string* cmd); + + /// get readable profile by `pprof --text palo_be perf.data` + /// if is_file is true, the file_or_content is an abs path of perf file. + /// if is_file is false, the file_or_content is the perf file content. + /// the readable content is returned via "output" + static Status get_readable_profile(const std::string& file_or_content, bool is_file, std::stringstream* output); + + /// generat flame graph of CPU profile of BE process. + /// flame_graph_tool_dir is the dir will FlameGraph installed. + /// if succeed, return return generated svg file path in "svg_file". + static Status generate_flamegraph(int32_t sample_seconds, const std::string& flame_graph_tool_dir, bool return_file, std::string* svg_file_or_content); +}; + +} // end namespace + diff --git a/conf/be.conf b/conf/be.conf index 5d9de5b790c694..a14436317f054e 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +PPROF_TMPDIR="$DORIS_HOME/log/" + # INFO, WARNING, ERROR, FATAL sys_log_level = INFO