From 79940013889039e2be34ae99648cca1eec88a4bf Mon Sep 17 00:00:00 2001 From: Weixian Zhou Date: Mon, 17 Dec 2018 13:41:08 -0800 Subject: [PATCH 1/3] minor refactor profiler. --- .../onnxruntime/core/common/logging/logging.h | 4 +- onnxruntime/core/common/profiler.cc | 10 ++-- onnxruntime/core/common/profiler.h | 7 ++- .../core/framework/parallel_executor.cc | 45 +++++++++++------- .../core/framework/sequential_executor.cc | 46 ++++++++++++------- 5 files changed, 72 insertions(+), 40 deletions(-) diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 3c808b9adbeb2..3d8e667d470c3 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -75,7 +75,7 @@ struct EventRecord { std::string event_name, long long time_stamp, long long duration, - std::unordered_map&& event_args) : cat(category), + std::initializer_list>&& event_args) : cat(category), pid(process_id), tid(thread_id), name(std::move(event_name)), @@ -88,7 +88,7 @@ struct EventRecord { std::string name; long long ts; long long dur; - std::unordered_map args; + std::initializer_list< std::pair> args; }; } // namespace profiling namespace logging { diff --git a/onnxruntime/core/common/profiler.cc b/onnxruntime/core/common/profiler.cc index 5c4384c5a3fde..40abdf22f9982 100644 --- a/onnxruntime/core/common/profiler.cc +++ b/onnxruntime/core/common/profiler.cc @@ -33,15 +33,14 @@ void Profiler::StartProfiling(const std::string& file_name) { void Profiler::EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::initializer_list>& event_args, + const std::string& op_name, bool /*sync_gpu*/) { if (!enabled_ && !profile_with_logger_) return; long long dur = TimeDiffMicroSeconds(start_time); long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time); - EventRecord event(category, logging::GetProcessId(), - logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() }); + EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { { "op_name", op_name } }); if (profile_with_logger_) { custom_logger_->SendProfileEvent(event); } else { @@ -99,6 +98,11 @@ std::string Profiler::EndProfiling() { return profile_stream_file_; } +bool Profiler::IsEnabled() const +{ + return enabled_; +} + // // Conditionally sync the GPU if the syncGPU flag is set. // diff --git a/onnxruntime/core/common/profiler.h b/onnxruntime/core/common/profiler.h index 36c7655973f6a..b1cbfdd9f672f 100644 --- a/onnxruntime/core/common/profiler.h +++ b/onnxruntime/core/common/profiler.h @@ -47,7 +47,7 @@ class Profiler { void EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::initializer_list>& event_args = {}, + const std::string& op_name = {}, bool sync_gpu = false); /* @@ -56,6 +56,11 @@ class Profiler { */ std::string EndProfiling(); + /* + Return true if profiler is enabled, false otherwise. + */ + bool IsEnabled() const; + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Profiler); diff --git a/onnxruntime/core/framework/parallel_executor.cc b/onnxruntime/core/framework/parallel_executor.cc index 98b8495a4d704..4765463871d32 100644 --- a/onnxruntime/core/framework/parallel_executor.cc +++ b/onnxruntime/core/framework/parallel_executor.cc @@ -31,6 +31,7 @@ Status ParallelExecutor::Execute(const SessionState& session_state, const std::vector& output_names, std::vector& fetches, const logging::Logger& logger) { + auto& profiler = session_state.Profiler(); auto tp = session_state.Profiler().StartTime(); root_frame_ = std::make_unique(feeds, output_names, fetches, session_state); @@ -72,7 +73,10 @@ Status ParallelExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp); + } + return Status::OK(); } @@ -115,8 +119,8 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, OpKernelContextInternal op_kernel_context(*root_frame_, *p_op_kernel, logger, p_op_kernel->Node().ImplicitInputDefs(), terminate_flag_); - - auto sync_time_begin = session_state.Profiler().StartTime(); + auto& profiler = session_state.Profiler(); + auto sync_time_begin = profiler.StartTime(); // sync before compute int queue_id = p_op_kernel->KernelDef().ExecQueueId(); @@ -144,15 +148,17 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, const std::string& node_name = p_op_kernel->Node().Name(); const std::string& op_name = p_op_kernel->KernelDef().OpName(); - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_before", - sync_time_begin, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_before", + sync_time_begin, + op_name); + } // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); - auto kernel_begin_time = session_state.Profiler().StartTime(); + auto kernel_begin_time = profiler.StartTime(); // Execute the kernel. auto status = p_op_kernel->Compute(&op_kernel_context); @@ -160,12 +166,14 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, ORT_THROW("Compute failed for node: ", graph_viewer->GetNode(node_index)->Name()); } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_kernel_time", - kernel_begin_time, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_kernel_time", + kernel_begin_time, + op_name); + } - sync_time_begin = session_state.Profiler().StartTime(); + sync_time_begin = profiler.StartTime(); // sync after compute for outputs for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { Fence_t fence = op_kernel_context.InputFence(input_index); @@ -187,10 +195,13 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, fence->AfterUsedAsOutput(queue_id); } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_after", - sync_time_begin, - {{"op_name", op_name}}); + + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_after", + sync_time_begin, + op_name); + } //std::cout << "Run async node finish: " << p_node_index << std::endl; diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index a5d704a0b0d69..a0379a0afc008 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -31,7 +31,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std::vector& output_names, std::vector& fetches, const logging::Logger& logger) { - auto tp = session_state.Profiler().StartTime(); + auto& profiler = session_state.Profiler(); + auto tp = profiler.StartTime(); ExecutionFrame frame{feeds, output_names, fetches, session_state}; @@ -65,7 +66,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, terminate_flag_); // TODO: log kernel outputs? - auto sync_time_begin = session_state.Profiler().StartTime(); + auto sync_time_begin = profiler.StartTime(); // sync before compute int queue_id = p_op_kernel->KernelDef().ExecQueueId(); for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { @@ -89,22 +90,27 @@ Status SequentialExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_before", - sync_time_begin, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_before", + sync_time_begin, + op_name); + } // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); - auto kernel_begin_time = session_state.Profiler().StartTime(); + auto kernel_begin_time = profiler.StartTime(); ORT_RETURN_IF_ERROR(p_op_kernel->Compute(&op_kernel_context)); - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_kernel_time", - kernel_begin_time, - {{"op_name", op_name}}); - sync_time_begin = session_state.Profiler().StartTime(); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_kernel_time", + kernel_begin_time, + op_name); + } + + sync_time_begin = profiler.StartTime(); // sync after compute for outputs for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { Fence_t fence = op_kernel_context.InputFence(input_index); @@ -126,10 +132,13 @@ Status SequentialExecutor::Execute(const SessionState& session_state, fence->AfterUsedAsOutput(queue_id); } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_after", - sync_time_begin, - {{"op_name", op_name}}); + + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_after", + sync_time_begin, + op_name); + } // free ml-values corresponding to this node VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name(); @@ -158,7 +167,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp); + } + return Status::OK(); } From e76b54facb785ab61107de2a21b3e7044f40d8fe Mon Sep 17 00:00:00 2001 From: Weixian Zhou Date: Tue, 18 Dec 2018 12:50:30 -0800 Subject: [PATCH 2/3] ad op_name back. --- onnxruntime/core/common/profiler.cc | 4 ++-- onnxruntime/core/common/profiler.h | 2 +- onnxruntime/core/framework/parallel_executor.cc | 6 +++--- onnxruntime/core/framework/sequential_executor.cc | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/common/profiler.cc b/onnxruntime/core/common/profiler.cc index 40abdf22f9982..599e7fc651fe2 100644 --- a/onnxruntime/core/common/profiler.cc +++ b/onnxruntime/core/common/profiler.cc @@ -33,14 +33,14 @@ void Profiler::StartProfiling(const std::string& file_name) { void Profiler::EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::string& op_name, + const std::pair& op_name, bool /*sync_gpu*/) { if (!enabled_ && !profile_with_logger_) return; long long dur = TimeDiffMicroSeconds(start_time); long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time); - EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { { "op_name", op_name } }); + EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { op_name }); if (profile_with_logger_) { custom_logger_->SendProfileEvent(event); } else { diff --git a/onnxruntime/core/common/profiler.h b/onnxruntime/core/common/profiler.h index b1cbfdd9f672f..f72b87279f12f 100644 --- a/onnxruntime/core/common/profiler.h +++ b/onnxruntime/core/common/profiler.h @@ -47,7 +47,7 @@ class Profiler { void EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::string& op_name = {}, + const std::pair& op_name = {}, bool sync_gpu = false); /* diff --git a/onnxruntime/core/framework/parallel_executor.cc b/onnxruntime/core/framework/parallel_executor.cc index 4765463871d32..fe53607aea9d9 100644 --- a/onnxruntime/core/framework/parallel_executor.cc +++ b/onnxruntime/core/framework/parallel_executor.cc @@ -152,7 +152,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_before", sync_time_begin, - op_name); + { "op_name", op_name }); } // call compute on the kernel @@ -170,7 +170,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_kernel_time", kernel_begin_time, - op_name); + { "op_name", op_name }); } sync_time_begin = profiler.StartTime(); @@ -200,7 +200,7 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_after", sync_time_begin, - op_name); + { "op_name", op_name }); } //std::cout << "Run async node finish: " << p_node_index << std::endl; diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index a0379a0afc008..7e54a5da9d5bd 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -94,7 +94,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_before", sync_time_begin, - op_name); + { "op_name", op_name }); } // call compute on the kernel @@ -107,7 +107,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_kernel_time", kernel_begin_time, - op_name); + { "op_name", op_name }); } sync_time_begin = profiler.StartTime(); @@ -137,7 +137,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, node_name + "_fence_after", sync_time_begin, - op_name); + { "op_name", op_name }); } // free ml-values corresponding to this node From 6f03da1499394303c3998c87336f68f449b5b21d Mon Sep 17 00:00:00 2001 From: Weixian Zhou Date: Tue, 18 Dec 2018 19:40:33 -0800 Subject: [PATCH 3/3] rename arg name to tags. --- onnxruntime/core/common/profiler.cc | 4 ++-- onnxruntime/core/common/profiler.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/common/profiler.cc b/onnxruntime/core/common/profiler.cc index 599e7fc651fe2..d52f251179e93 100644 --- a/onnxruntime/core/common/profiler.cc +++ b/onnxruntime/core/common/profiler.cc @@ -33,14 +33,14 @@ void Profiler::StartProfiling(const std::string& file_name) { void Profiler::EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::pair& op_name, + const std::pair& tags, bool /*sync_gpu*/) { if (!enabled_ && !profile_with_logger_) return; long long dur = TimeDiffMicroSeconds(start_time); long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time); - EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { op_name }); + EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { tags }); if (profile_with_logger_) { custom_logger_->SendProfileEvent(event); } else { diff --git a/onnxruntime/core/common/profiler.h b/onnxruntime/core/common/profiler.h index f72b87279f12f..ebccd0e3ee65b 100644 --- a/onnxruntime/core/common/profiler.h +++ b/onnxruntime/core/common/profiler.h @@ -47,7 +47,7 @@ class Profiler { void EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::pair& op_name = {}, + const std::pair& tags = {}, bool sync_gpu = false); /*