Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/onnxruntime/core/common/logging/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct EventRecord {
std::string event_name,
long long time_stamp,
long long duration,
std::unordered_map<std::string, std::string>&& event_args) : cat(category),
std::initializer_list<std::pair<std::string, std::string>>&& event_args) : cat(category),
pid(process_id),
tid(thread_id),
name(std::move(event_name)),
Expand All @@ -88,7 +88,7 @@ struct EventRecord {
std::string name;
long long ts;
long long dur;
std::unordered_map<std::string, std::string> args;
std::initializer_list< std::pair<std::string, std::string>> args;
};
} // namespace profiling
namespace logging {
Expand Down
10 changes: 7 additions & 3 deletions onnxruntime/core/common/profiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,14 @@ void Profiler::StartProfiling(const std::string& file_name) {
void Profiler::EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
const std::initializer_list<std::pair<std::string, std::string>>& event_args,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this take away the ability to record multiple key-val pairs in a profile? Isn't invoking this function conditionally (that is only when profiling is enabled) enough?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont see why one EndTimeAndRecordEvent should take multiple events. At least existing code has no such cases. But if you insist, I can add it back.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking more about it, yeah recording just one event is fine. However we should've a name for the thing that you're recording. So in this case I don't know if the string that you're recording is an op_name or something else. Would be good to add event_name and event_value parameters to the function.

Copy link
Contributor Author

@zwxxx zwxxx Dec 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok it makes sense. added std::pair<std::string, std::string> back.

const std::pair<std::string, std::string>& tags,
bool /*sync_gpu*/) {
if (!enabled_ && !profile_with_logger_)
return;
long long dur = TimeDiffMicroSeconds(start_time);
long long ts = TimeDiffMicroSeconds(profiling_start_time_, start_time);

EventRecord event(category, logging::GetProcessId(),
logging::GetThreadId(), event_name, ts, dur, { event_args.begin(), event_args.end() });
EventRecord event(category, logging::GetProcessId(), logging::GetThreadId(), event_name, ts, dur, { tags });
if (profile_with_logger_) {
custom_logger_->SendProfileEvent(event);
} else {
Expand Down Expand Up @@ -99,6 +98,11 @@ std::string Profiler::EndProfiling() {
return profile_stream_file_;
}

bool Profiler::IsEnabled() const
{
return enabled_;
}

//
// Conditionally sync the GPU if the syncGPU flag is set.
//
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/core/common/profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Profiler {
void EndTimeAndRecordEvent(EventCategory category,
const std::string& event_name,
TimePoint& start_time,
const std::initializer_list<std::pair<std::string, std::string>>& event_args = {},
const std::pair<std::string, std::string>& tags = {},
bool sync_gpu = false);

/*
Expand All @@ -56,6 +56,11 @@ class Profiler {
*/
std::string EndProfiling();

/*
Return true if profiler is enabled, false otherwise.
*/
bool IsEnabled() const;

private:
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Profiler);

Expand Down
45 changes: 28 additions & 17 deletions onnxruntime/core/framework/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Status ParallelExecutor::Execute(const SessionState& session_state,
const std::vector<std::string>& output_names,
std::vector<MLValue>& fetches,
const logging::Logger& logger) {
auto& profiler = session_state.Profiler();
auto tp = session_state.Profiler().StartTime();

root_frame_ = std::make_unique<ExecutionFrame>(feeds, output_names, fetches, session_state);
Expand Down Expand Up @@ -72,7 +73,10 @@ Status ParallelExecutor::Execute(const SessionState& session_state,
}
}

session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp);
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "ParallelExecutor::Execute", tp);
}

return Status::OK();
}

Expand Down Expand Up @@ -115,8 +119,8 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
OpKernelContextInternal op_kernel_context(*root_frame_, *p_op_kernel, logger,
p_op_kernel->Node().ImplicitInputDefs(),
terminate_flag_);

auto sync_time_begin = session_state.Profiler().StartTime();
auto& profiler = session_state.Profiler();
auto sync_time_begin = profiler.StartTime();
// sync before compute
int queue_id = p_op_kernel->KernelDef().ExecQueueId();

Expand Down Expand Up @@ -144,28 +148,32 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
const std::string& node_name = p_op_kernel->Node().Name();
const std::string& op_name = p_op_kernel->KernelDef().OpName();

session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
{{"op_name", op_name}});
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
{ "op_name", op_name });
}

// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();

auto kernel_begin_time = session_state.Profiler().StartTime();
auto kernel_begin_time = profiler.StartTime();

// Execute the kernel.
auto status = p_op_kernel->Compute(&op_kernel_context);
if (!status.IsOK()) {
ORT_THROW("Compute failed for node: ", graph_viewer->GetNode(node_index)->Name());
}

session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
{{"op_name", op_name}});
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
{ "op_name", op_name });
}

sync_time_begin = session_state.Profiler().StartTime();
sync_time_begin = profiler.StartTime();
// sync after compute for outputs
for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) {
Fence_t fence = op_kernel_context.InputFence(input_index);
Expand All @@ -187,10 +195,13 @@ void ParallelExecutor::RunNodeAsyncInternal(size_t p_node_index,
fence->AfterUsedAsOutput(queue_id);
}
}
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
{{"op_name", op_name}});

if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
{ "op_name", op_name });
}

//std::cout << "Run async node finish: " << p_node_index << std::endl;

Expand Down
46 changes: 29 additions & 17 deletions onnxruntime/core/framework/sequential_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
const std::vector<std::string>& output_names,
std::vector<MLValue>& fetches,
const logging::Logger& logger) {
auto tp = session_state.Profiler().StartTime();
auto& profiler = session_state.Profiler();
auto tp = profiler.StartTime();

ExecutionFrame frame{feeds, output_names, fetches, session_state};

Expand Down Expand Up @@ -65,7 +66,7 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
terminate_flag_);
// TODO: log kernel outputs?

auto sync_time_begin = session_state.Profiler().StartTime();
auto sync_time_begin = profiler.StartTime();
// sync before compute
int queue_id = p_op_kernel->KernelDef().ExecQueueId();
for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) {
Expand All @@ -89,22 +90,27 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
}
}

session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
{{"op_name", op_name}});
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_before",
sync_time_begin,
{ "op_name", op_name });
}

// call compute on the kernel
VLOGS(logger, 1) << "Computing kernel: " << p_op_kernel->Node().Name();

auto kernel_begin_time = session_state.Profiler().StartTime();
auto kernel_begin_time = profiler.StartTime();
ORT_RETURN_IF_ERROR(p_op_kernel->Compute(&op_kernel_context));
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
{{"op_name", op_name}});

sync_time_begin = session_state.Profiler().StartTime();
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_kernel_time",
kernel_begin_time,
{ "op_name", op_name });
}

sync_time_begin = profiler.StartTime();
// sync after compute for outputs
for (int input_index = 0; input_index < op_kernel_context.InputCount(); ++input_index) {
Fence_t fence = op_kernel_context.InputFence(input_index);
Expand All @@ -126,10 +132,13 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
fence->AfterUsedAsOutput(queue_id);
}
}
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
{{"op_name", op_name}});

if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::NODE_EVENT,
node_name + "_fence_after",
sync_time_begin,
{ "op_name", op_name });
}

// free ml-values corresponding to this node
VLOGS(logger, 1) << "Releasing node ML values after computing kernel: " << p_op_kernel->Node().Name();
Expand Down Expand Up @@ -158,7 +167,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state,
}
}

session_state.Profiler().EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp);
if (profiler.IsEnabled()) {
profiler.EndTimeAndRecordEvent(profiling::SESSION_EVENT, "SequentialExecutor::Execute", tp);
}

return Status::OK();
}

Expand Down