From e60cf73840e54401c9f4af25e46ec393a11b6b62 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 8 Jan 2025 12:48:53 +0800 Subject: [PATCH 1/3] add dump actions dag tools --- .../local-engine/Common/ArrayJoinHelper.cpp | 9 +-- cpp-ch/local-engine/Common/DebugUtils.cpp | 69 +++++++++++++++++++ cpp-ch/local-engine/Common/DebugUtils.h | 2 + .../Parser/AggregateFunctionParser.cpp | 1 + 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp index acefad0aea2a..938774e1400e 100644 --- a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp +++ b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include namespace DB @@ -110,11 +111,11 @@ addArrayJoinStep(DB::ContextPtr context, DB::QueryPlan & plan, const DB::Actions { /// If generator in generate rel is explode/posexplode, transform arrayJoin function to ARRAY JOIN STEP to apply max_block_size /// which avoids OOM when several lateral view explode/posexplode is used in spark sqls - LOG_DEBUG(logger, "original actions_dag:{}", actions_dag.dumpDAG()); + LOG_DEBUG(logger, "original actions_dag:\n{}", debug::dumpActionsDAG(actions_dag)); auto splitted_actions_dags = splitActionsDAGInGenerate(actions_dag); - LOG_DEBUG(logger, "actions_dag before arrayJoin:{}", splitted_actions_dags.before_array_join.dumpDAG()); - LOG_DEBUG(logger, "actions_dag during arrayJoin:{}", splitted_actions_dags.array_join.dumpDAG()); - LOG_DEBUG(logger, "actions_dag after arrayJoin:{}", splitted_actions_dags.after_array_join.dumpDAG()); + LOG_DEBUG(logger, "actions_dag before arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.before_array_join)); + LOG_DEBUG(logger, "actions_dag during arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.array_join)); + LOG_DEBUG(logger, "actions_dag after arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.after_array_join)); auto ignore_actions_dag = [](const DB::ActionsDAG & actions_dag_) -> bool { diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp b/cpp-ch/local-engine/Common/DebugUtils.cpp index 5bad9dfc3c1d..018b34454ef8 100644 --- a/cpp-ch/local-engine/Common/DebugUtils.cpp +++ b/cpp-ch/local-engine/Common/DebugUtils.cpp @@ -31,6 +31,8 @@ #include #include #include +#include "Functions/IFunction.h" +#include namespace pb_util = google::protobuf::util; @@ -396,4 +398,71 @@ std::string showString(const DB::Block & block, size_t numRows, size_t truncate, [](const DB::ColumnWithTypeAndName & col) { return std::make_pair(col.name, col.column); }); return Utils::showString(name_and_columns, numRows, truncate, vertical); } + +std::string dumpActionsDAG(const DB::ActionsDAG & dag) +{ + std::stringstream ss; + ss << "digraph ActionsDAG {\n"; + ss << " rankdir=BT;\n"; // Add this line to invert the vertical direction + + std::unordered_map node_to_id; + size_t id = 0; + for (const auto & node : dag.getNodes()) + node_to_id[&node] = id++; + + std::unordered_set output_nodes(dag.getOutputs().begin(), dag.getOutputs().end()); + + for (const auto & node : dag.getNodes()) + { + ss << " n" << node_to_id[&node] << " [label=\""; + + ss << "id:" << node_to_id[&node] << "\\l"; + switch (node.type) + { + case DB::ActionsDAG::ActionType::COLUMN: + ss << "column:" + << (node.column && DB::isColumnConst(*node.column) + ? toString(assert_cast(*node.column).getField()) + : "null") + << "\\l"; + break; + case DB::ActionsDAG::ActionType::ALIAS: + ss << "alias" << "\\l"; + break; + case DB::ActionsDAG::ActionType::FUNCTION: + ss << "function: " << (node.function_base ? node.function_base->getName() : "null"); + if (node.is_function_compiled) + ss << " [compiled]"; + ss << "\\l"; + break; + case DB::ActionsDAG::ActionType::ARRAY_JOIN: + ss << "array join" << "\\l"; + break; + case DB::ActionsDAG::ActionType::INPUT: + ss << "input" << "\\l"; + break; + } + + ss << "result type: " << (node.result_type ? node.result_type->getName() : "null") << "\\l"; + + ss << "children:"; + for (const auto * child : node.children) + ss << " " << node_to_id[child]; + ss << "\\l"; + + ss << "\""; + if (output_nodes.contains(&node)) + ss << ", shape=doublecircle"; + + ss << "];\n"; + } + + for (const auto & node : dag.getNodes()) + for (const auto * child : node.children) + ss << " n" << node_to_id[child] << " -> n" << node_to_id[&node] << ";\n"; + + ss << "}\n"; + return ss.str(); +} + } \ No newline at end of file diff --git a/cpp-ch/local-engine/Common/DebugUtils.h b/cpp-ch/local-engine/Common/DebugUtils.h index 850da408fb22..dd29f93d8d31 100644 --- a/cpp-ch/local-engine/Common/DebugUtils.h +++ b/cpp-ch/local-engine/Common/DebugUtils.h @@ -25,6 +25,7 @@ class Message; namespace DB { class QueryPlan; +class ActionsDAG; } namespace debug { @@ -41,5 +42,6 @@ inline std::string verticalShowString(const DB::Block & block, size_t numRows = { return showString(block, numRows, truncate, true); } +std::string dumpActionsDAG(const DB::ActionsDAG & dag); } diff --git a/cpp-ch/local-engine/Parser/AggregateFunctionParser.cpp b/cpp-ch/local-engine/Parser/AggregateFunctionParser.cpp index e2387f071300..40737f470e1c 100644 --- a/cpp-ch/local-engine/Parser/AggregateFunctionParser.cpp +++ b/cpp-ch/local-engine/Parser/AggregateFunctionParser.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB { From ee41e360acc7908f2056b31d19297c2985d42cc9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 8 Jan 2025 12:51:37 +0800 Subject: [PATCH 2/3] modify log level --- cpp-ch/local-engine/Common/ArrayJoinHelper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp index 938774e1400e..aa88b42db1c6 100644 --- a/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp +++ b/cpp-ch/local-engine/Common/ArrayJoinHelper.cpp @@ -111,11 +111,11 @@ addArrayJoinStep(DB::ContextPtr context, DB::QueryPlan & plan, const DB::Actions { /// If generator in generate rel is explode/posexplode, transform arrayJoin function to ARRAY JOIN STEP to apply max_block_size /// which avoids OOM when several lateral view explode/posexplode is used in spark sqls - LOG_DEBUG(logger, "original actions_dag:\n{}", debug::dumpActionsDAG(actions_dag)); + LOG_TEST(logger, "original actions_dag:\n{}", debug::dumpActionsDAG(actions_dag)); auto splitted_actions_dags = splitActionsDAGInGenerate(actions_dag); - LOG_DEBUG(logger, "actions_dag before arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.before_array_join)); - LOG_DEBUG(logger, "actions_dag during arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.array_join)); - LOG_DEBUG(logger, "actions_dag after arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.after_array_join)); + LOG_TEST(logger, "actions_dag before arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.before_array_join)); + LOG_TEST(logger, "actions_dag during arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.array_join)); + LOG_TEST(logger, "actions_dag after arrayJoin:\n{}", debug::dumpActionsDAG(splitted_actions_dags.after_array_join)); auto ignore_actions_dag = [](const DB::ActionsDAG & actions_dag_) -> bool { From 464c0b8e762d182a89d7119b047ac057c596265f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 8 Jan 2025 12:59:13 +0800 Subject: [PATCH 3/3] update --- cpp-ch/local-engine/Common/DebugUtils.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp b/cpp-ch/local-engine/Common/DebugUtils.cpp index 018b34454ef8..2cb38ed59a13 100644 --- a/cpp-ch/local-engine/Common/DebugUtils.cpp +++ b/cpp-ch/local-engine/Common/DebugUtils.cpp @@ -403,7 +403,10 @@ std::string dumpActionsDAG(const DB::ActionsDAG & dag) { std::stringstream ss; ss << "digraph ActionsDAG {\n"; - ss << " rankdir=BT;\n"; // Add this line to invert the vertical direction + ss << " rankdir=BT;\n"; // Invert the vertical direction + ss << " nodesep=0.1;\n"; // Reduce space between nodes + ss << " ranksep=0.1;\n"; // Reduce space between ranks + ss << " margin=0.1;\n"; // Reduce graph margin std::unordered_map node_to_id; size_t id = 0;