diff --git a/include/onnxruntime/core/common/logging/logging.h b/include/onnxruntime/core/common/logging/logging.h index 3c808b9adbeb2..3d8e667d470c3 100644 --- a/include/onnxruntime/core/common/logging/logging.h +++ b/include/onnxruntime/core/common/logging/logging.h @@ -75,7 +75,7 @@ struct EventRecord { std::string event_name, long long time_stamp, long long duration, - std::unordered_map&& event_args) : cat(category), + std::initializer_list>&& event_args) : cat(category), pid(process_id), tid(thread_id), name(std::move(event_name)), @@ -88,7 +88,7 @@ struct EventRecord { std::string name; long long ts; long long dur; - std::unordered_map args; + std::initializer_list< std::pair> args; }; } // namespace profiling namespace logging { diff --git a/onnxruntime/core/common/profiler.cc b/onnxruntime/core/common/profiler.cc index 5c4384c5a3fde..d52f251179e93 100644 --- a/onnxruntime/core/common/profiler.cc +++ b/onnxruntime/core/common/profiler.cc @@ -33,15 +33,14 @@ void Profiler::StartProfiling(const std::string& file_name) { void Profiler::EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::initializer_list>& event_args, + const std::pair& tags, bool /*sync_gpu*/) { if (!enabled_ && !profile_with_logger_) return; long long dur = TimeDiffMicroSeconds(start_time); long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time); - EventRecord event(category, logging::GetProcessId(), - logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() }); + EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { tags }); if (profile_with_logger_) { custom_logger_->SendProfileEvent(event); } else { @@ -99,6 +98,11 @@ std::string Profiler::EndProfiling() { return profile_stream_file_; } +bool Profiler::IsEnabled() const +{ + return enabled_; +} + // // Conditionally sync the GPU if the syncGPU flag is set. // diff --git a/onnxruntime/core/common/profiler.h b/onnxruntime/core/common/profiler.h index 36c7655973f6a..ebccd0e3ee65b 100644 --- a/onnxruntime/core/common/profiler.h +++ b/onnxruntime/core/common/profiler.h @@ -47,7 +47,7 @@ class Profiler { void EndTimeAndRecordEvent(EventCategory category, const std::string& event_name, TimePoint& start_time, - const std::initializer_list>& event_args = {}, + const std::pair& tags = {}, bool sync_gpu = false); /* @@ -56,6 +56,11 @@ class Profiler { */ std::string EndProfiling(); + /* + Return true if profiler is enabled, false otherwise. + */ + bool IsEnabled() const; + private: ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Profiler); diff --git a/onnxruntime/core/framework/parallel_executor.cc b/onnxruntime/core/framework/parallel_executor.cc index 98b8495a4d704..fe53607aea9d9 100644 --- a/onnxruntime/core/framework/parallel_executor.cc +++ b/onnxruntime/core/framework/parallel_executor.cc @@ -31,6 +31,7 @@ Status ParallelExecutor::Execute(const SessionState& session_state, const std::vector& output_names, std::vector& fetches, const logging::Logger& logger) { + auto& profiler = session_state.Profiler(); auto tp = session_state.Profiler().StartTime(); root_frame_ = std::make_unique(feeds, output_names, fetches, session_state); @@ -72,7 +73,10 @@ Status ParallelExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp); + } + return Status::OK(); } @@ -115,8 +119,8 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, OpKernelContextInternal op_kernel_context(*root_frame_, *p_op_kernel, logger, p_op_kernel->Node().ImplicitInputDefs(), terminate_flag_); - - auto sync_time_begin = session_state.Profiler().StartTime(); + auto& profiler = session_state.Profiler(); + auto sync_time_begin = profiler.StartTime(); // sync before compute int queue_id = p_op_kernel->KernelDef().ExecQueueId(); @@ -144,15 +148,17 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, const std::string& node_name = p_op_kernel->Node().Name(); const std::string& op_name = p_op_kernel->KernelDef().OpName(); - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_before", - sync_time_begin, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_before", + sync_time_begin, + { "op_name", op_name }); + } // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); - auto kernel_begin_time = session_state.Profiler().StartTime(); + auto kernel_begin_time = profiler.StartTime(); // Execute the kernel. auto status = p_op_kernel->Compute(&op_kernel_context); @@ -160,12 +166,14 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, ORT_THROW("Compute failed for node: ", graph_viewer->GetNode(node_index)->Name()); } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_kernel_time", - kernel_begin_time, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_kernel_time", + kernel_begin_time, + { "op_name", op_name }); + } - sync_time_begin = session_state.Profiler().StartTime(); + sync_time_begin = profiler.StartTime(); // sync after compute for outputs for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { Fence_t fence = op_kernel_context.InputFence(input_index); @@ -187,10 +195,13 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index, fence->AfterUsedAsOutput(queue_id); } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_after", - sync_time_begin, - {{"op_name", op_name}}); + + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_after", + sync_time_begin, + { "op_name", op_name }); + } //std::cout << "Run async node finish: " << p_node_index << std::endl; diff --git a/onnxruntime/core/framework/sequential_executor.cc b/onnxruntime/core/framework/sequential_executor.cc index a5d704a0b0d69..7e54a5da9d5bd 100644 --- a/onnxruntime/core/framework/sequential_executor.cc +++ b/onnxruntime/core/framework/sequential_executor.cc @@ -31,7 +31,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std::vector& output_names, std::vector& fetches, const logging::Logger& logger) { - auto tp = session_state.Profiler().StartTime(); + auto& profiler = session_state.Profiler(); + auto tp = profiler.StartTime(); ExecutionFrame frame{feeds, output_names, fetches, session_state}; @@ -65,7 +66,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state, terminate_flag_); // TODO: log kernel outputs? - auto sync_time_begin = session_state.Profiler().StartTime(); + auto sync_time_begin = profiler.StartTime(); // sync before compute int queue_id = p_op_kernel->KernelDef().ExecQueueId(); for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { @@ -89,22 +90,27 @@ Status SequentialExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_before", - sync_time_begin, - {{"op_name", op_name}}); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_before", + sync_time_begin, + { "op_name", op_name }); + } // call compute on the kernel VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name(); - auto kernel_begin_time = session_state.Profiler().StartTime(); + auto kernel_begin_time = profiler.StartTime(); ORT_RETURN_IF_ERROR(p_op_kernel->Compute(&op_kernel_context)); - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_kernel_time", - kernel_begin_time, - {{"op_name", op_name}}); - sync_time_begin = session_state.Profiler().StartTime(); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_kernel_time", + kernel_begin_time, + { "op_name", op_name }); + } + + sync_time_begin = profiler.StartTime(); // sync after compute for outputs for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) { Fence_t fence = op_kernel_context.InputFence(input_index); @@ -126,10 +132,13 @@ Status SequentialExecutor::Execute(const SessionState& session_state, fence->AfterUsedAsOutput(queue_id); } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT, - node_name + "_fence_after", - sync_time_begin, - {{"op_name", op_name}}); + + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT, + node_name + "_fence_after", + sync_time_begin, + { "op_name", op_name }); + } // free ml-values corresponding to this node VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name(); @@ -158,7 +167,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state, } } - session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp); + if (profiler.IsEnabled()) { + profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp); + } + return Status::OK(); }