From 8e580fd1e9a09d87754aa15dfd1968fe8df3d7fd Mon Sep 17 00:00:00 2001 From: Kai Li Date: Thu, 20 Feb 2014 18:29:08 +0800 Subject: [PATCH 1/5] Synchronize GPU before CPU timers start and stop in net_speed_benchmark --- tools/net_speed_benchmark.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index 9976744d35e..83fba1477e3 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -67,13 +67,22 @@ int main(int argc, char** argv) { vector*> >& bottom_vecs = caffe_net.bottom_vecs(); vector*> >& top_vecs = caffe_net.top_vecs(); LOG(ERROR) << "*** Benchmark begins ***"; + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t forward_start = clock(); for (int i = 0; i < layers.size(); ++i) { const string& layername = layers[i]->layer_param().name(); + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t start = clock(); for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } LOG(ERROR) << layername << "\tforward: " << static_cast(clock() - start) / CLOCKS_PER_SEC << " seconds."; @@ -84,10 +93,16 @@ int main(int argc, char** argv) { clock_t backward_start = clock(); for (int i = layers.size() - 1; i >= 0; --i) { const string& layername = layers[i]->layer_param().name(); + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } clock_t start = clock(); for (int j = 0; j < total_iter; ++j) { layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]); } + if (Caffe::mode() == Caffe::GPU) { + cudaDeviceSynchronize(); + } LOG(ERROR) << layername << "\tbackward: " << static_cast(clock() - start) / CLOCKS_PER_SEC << " seconds."; From 263ceea2e7fa6614c7bd4b700771e11907925fba Mon Sep 17 00:00:00 2001 From: Kai Li Date: Thu, 20 Feb 2014 19:42:57 +0800 Subject: [PATCH 2/5] Add Timer to wrap CPU clock_t and GPU cudaEvent_t based timing --- include/caffe/util/benchmark.hpp | 27 ++++++++++++++++ src/caffe/util/benchmark.cpp | 53 ++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 include/caffe/util/benchmark.hpp create mode 100644 src/caffe/util/benchmark.cpp diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp new file mode 100644 index 00000000000..073b33c56b8 --- /dev/null +++ b/include/caffe/util/benchmark.hpp @@ -0,0 +1,27 @@ +// Copyright 2014 kloud@github + +#ifndef CAFFE_UTIL_BENCHMARK_H_ +#define CAFFE_UTIL_BENCHMARK_H_ + +#include + +namespace caffe { + +class Timer { + public: + Timer(); + virtual ~Timer(); + void Start(); + void Stop(); + float ElapsedSeconds(); + + protected: + cudaEvent_t start_gpu_; + cudaEvent_t stop_gpu_; + clock_t start_cpu_; + clock_t stop_cpu_; +}; + +} // namespace caffe + +#endif // CAFFE_UTIL_BENCHMARK_H_ diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp new file mode 100644 index 00000000000..19a036d9879 --- /dev/null +++ b/src/caffe/util/benchmark.cpp @@ -0,0 +1,53 @@ +// Copyright 2014 kloud@github + +#include +#include + +#include "caffe/common.hpp" +#include "caffe/util/benchmark.hpp" + +namespace caffe { + +Timer::Timer() { + if (Caffe::mode() == Caffe::GPU) { + cudaEventCreate (&start_gpu_); + cudaEventCreate (&stop_gpu_); + } +} + +Timer::~Timer() { + if (Caffe::mode() == Caffe::GPU) { + cudaEventDestroy (start_gpu_); + cudaEventDestroy (stop_gpu_); + } +} + +void Timer::Start() { + if (Caffe::mode() == Caffe::GPU) { + cudaEventRecord(start_gpu_, 0); + } else { + start_cpu_ = clock(); + } +} + +void Timer::Stop() { + if (Caffe::mode() == Caffe::GPU) { + cudaEventRecord(stop_gpu_, 0); + } else { + stop_cpu_ = clock(); + } +} + +float Timer::ElapsedSeconds() { + float elapsed; + if (Caffe::mode() == Caffe::GPU) { + cudaEventSynchronize(stop_gpu_); + cudaEventElapsedTime(&elapsed, start_gpu_, stop_gpu_); + elapsed /= 1000.; + } else { + elapsed = float(stop_cpu_ - start_cpu_) / CLOCKS_PER_SEC; + } + return elapsed; +} + +} // namespace caffe From cd84539806b501c1f764f7b510a2cb3842a1c849 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Thu, 20 Feb 2014 19:44:41 +0800 Subject: [PATCH 3/5] Replace CPU timer with newly added Timer to benchmark net speed --- tools/net_speed_benchmark.cpp | 72 ++++++++++++++--------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index 83fba1477e3..a0e589e546a 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -1,31 +1,29 @@ // Copyright 2013 Yangqing Jia -#include -#include -#include +#include +#include +#include -#include "cuda_runtime.h" -#include "fcntl.h" -#include "google/protobuf/text_format.h" +#include +#include #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/net.hpp" #include "caffe/filler.hpp" #include "caffe/proto/caffe.pb.h" +#include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/solver.hpp" -using boost::shared_ptr; - -using namespace caffe; // NOLINT(build/namespaces) +using namespace caffe; int main(int argc, char** argv) { + int total_iter = 50; if (argc < 2) { - LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] " - << "[Device_id=0]"; + LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] [Device_id=0]"; return 0; } @@ -67,52 +65,38 @@ int main(int argc, char** argv) { vector*> >& bottom_vecs = caffe_net.bottom_vecs(); vector*> >& top_vecs = caffe_net.top_vecs(); LOG(ERROR) << "*** Benchmark begins ***"; - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t forward_start = clock(); + Timer total_timer; + total_timer.Start(); + Timer forward_timer; + forward_timer.Start(); + Timer timer; for (int i = 0; i < layers.size(); ++i) { const string& layername = layers[i]->layer_param().name(); - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t start = clock(); + timer.Start(); for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - LOG(ERROR) << layername << "\tforward: " - << static_cast(clock() - start) / CLOCKS_PER_SEC - << " seconds."; + timer.Stop(); + LOG(ERROR) << layername << "\tforward: " << timer.ElapsedSeconds() << " seconds."; } - LOG(ERROR) << "Forward pass: " - << static_cast(clock() - forward_start) / CLOCKS_PER_SEC - << " seconds."; - clock_t backward_start = clock(); + forward_timer.Stop(); + LOG(ERROR) << "Forward pass: " << forward_timer.ElapsedSeconds() << " seconds."; + Timer backward_timer; + backward_timer.Start(); for (int i = layers.size() - 1; i >= 0; --i) { const string& layername = layers[i]->layer_param().name(); - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } - clock_t start = clock(); + timer.Start(); for (int j = 0; j < total_iter; ++j) { layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]); } - if (Caffe::mode() == Caffe::GPU) { - cudaDeviceSynchronize(); - } + timer.Stop(); LOG(ERROR) << layername << "\tbackward: " - << static_cast(clock() - start) / CLOCKS_PER_SEC - << " seconds."; + << timer.ElapsedSeconds() << " seconds."; } - LOG(ERROR) << "Backward pass: " - << static_cast(clock() - backward_start) / CLOCKS_PER_SEC - << " seconds."; - LOG(ERROR) << "Total Time: " - << static_cast(clock() - forward_start) / CLOCKS_PER_SEC - << " seconds."; + backward_timer.Stop(); + LOG(ERROR) << "Backward pass: " << backward_timer.ElapsedSeconds() << " seconds."; + total_timer.Stop(); + LOG(ERROR) << "Total Time: " << total_timer.ElapsedSeconds() << " seconds."; LOG(ERROR) << "*** Benchmark ends ***"; return 0; } From 342a9111c0acae020f1647e0570b729cb293cacf Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 26 Feb 2014 00:00:59 +0800 Subject: [PATCH 4/5] Add state machine, boost::posix_time based cpu timer & tests for Timer --- include/caffe/util/benchmark.hpp | 18 +++- src/caffe/test/test_benchmark.cpp | 169 ++++++++++++++++++++++++++++++ src/caffe/util/benchmark.cpp | 72 +++++++++---- tools/net_speed_benchmark.cpp | 15 +-- 4 files changed, 238 insertions(+), 36 deletions(-) create mode 100644 src/caffe/test/test_benchmark.cpp diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp index 073b33c56b8..fd6719a6820 100644 --- a/include/caffe/util/benchmark.hpp +++ b/include/caffe/util/benchmark.hpp @@ -3,6 +3,7 @@ #ifndef CAFFE_UTIL_BENCHMARK_H_ #define CAFFE_UTIL_BENCHMARK_H_ +#include #include namespace caffe { @@ -13,13 +14,24 @@ class Timer { virtual ~Timer(); void Start(); void Stop(); - float ElapsedSeconds(); + float MilliSeconds(); + float Seconds(); + + inline bool initted() { return initted_; } + inline bool running() { return running_; } + inline bool has_run_at_least_once() { return has_run_at_least_once_; } protected: + void Init(); + + bool initted_; + bool running_; + bool has_run_at_least_once_; cudaEvent_t start_gpu_; cudaEvent_t stop_gpu_; - clock_t start_cpu_; - clock_t stop_cpu_; + boost::posix_time::ptime start_cpu_; + boost::posix_time::ptime stop_cpu_; + float elapsed_milliseconds_; }; } // namespace caffe diff --git a/src/caffe/test/test_benchmark.cpp b/src/caffe/test/test_benchmark.cpp new file mode 100644 index 00000000000..9866f7d0f20 --- /dev/null +++ b/src/caffe/test/test_benchmark.cpp @@ -0,0 +1,169 @@ +// Copyright 2014 kloud@github + +#include // for usleep +#include +#include + +#include "caffe/common.hpp" +#include "caffe/util/benchmark.hpp" +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +class BenchmarkTest : public ::testing::Test {}; + +TEST_F(BenchmarkTest, TestTimerConstructorCPU) { + Caffe::set_mode(Caffe::CPU); + Timer timer; + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerConstructorGPU) { + Caffe::set_mode(Caffe::GPU); + Timer timer; + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerStartCPU) { + Caffe::set_mode(Caffe::CPU); + Timer timer; + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Stop(); + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerStartGPU) { + Caffe::set_mode(Caffe::GPU); + Timer timer; + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Stop(); + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Start(); + EXPECT_TRUE(timer.initted()); + EXPECT_TRUE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerStopCPU) { + Caffe::set_mode(Caffe::CPU); + Timer timer; + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerStopGPU) { + Caffe::set_mode(Caffe::GPU); + Timer timer; + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); + timer.Stop(); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerMilliSecondsCPU) { + Caffe::set_mode(Caffe::CPU); + Timer timer; + CHECK_EQ(timer.MilliSeconds(), 0); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + usleep(300 * 1000); + CHECK_GE(timer.MilliSeconds(), 299); + CHECK_LE(timer.MilliSeconds(), 301); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerMilliSecondsGPU) { + Caffe::set_mode(Caffe::GPU); + Timer timer; + CHECK_EQ(timer.MilliSeconds(), 0); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + usleep(300 * 1000); + CHECK_GE(timer.MilliSeconds(), 299); + CHECK_LE(timer.MilliSeconds(), 301); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerSecondsCPU) { + Caffe::set_mode(Caffe::CPU); + Timer timer; + CHECK_EQ(timer.Seconds(), 0); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + usleep(300 * 1000); + CHECK_GE(timer.Seconds(), 0.299); + CHECK_LE(timer.Seconds(), 0.301); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +TEST_F(BenchmarkTest, TestTimerSecondsGPU) { + Caffe::set_mode(Caffe::GPU); + Timer timer; + CHECK_EQ(timer.Seconds(), 0); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_FALSE(timer.has_run_at_least_once()); + timer.Start(); + usleep(300 * 1000); + CHECK_GE(timer.Seconds(), 0.299); + CHECK_LE(timer.Seconds(), 0.301); + EXPECT_TRUE(timer.initted()); + EXPECT_FALSE(timer.running()); + EXPECT_TRUE(timer.has_run_at_least_once()); +} + +} // namespace caffe diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp index 19a036d9879..b97a3c3c3e5 100644 --- a/src/caffe/util/benchmark.cpp +++ b/src/caffe/util/benchmark.cpp @@ -1,6 +1,6 @@ // Copyright 2014 kloud@github -#include +#include #include #include "caffe/common.hpp" @@ -8,46 +8,72 @@ namespace caffe { -Timer::Timer() { - if (Caffe::mode() == Caffe::GPU) { - cudaEventCreate (&start_gpu_); - cudaEventCreate (&stop_gpu_); - } +Timer::Timer() + : initted_(false), + running_(false), + has_run_at_least_once_(false) { + Init(); } Timer::~Timer() { if (Caffe::mode() == Caffe::GPU) { - cudaEventDestroy (start_gpu_); - cudaEventDestroy (stop_gpu_); + CUDA_CHECK(cudaEventDestroy(start_gpu_)); + CUDA_CHECK(cudaEventDestroy(stop_gpu_)); } } void Timer::Start() { - if (Caffe::mode() == Caffe::GPU) { - cudaEventRecord(start_gpu_, 0); - } else { - start_cpu_ = clock(); + if (!running()) { + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaEventRecord(start_gpu_, 0)); + } else { + start_cpu_ = boost::posix_time::microsec_clock::local_time(); + } + running_ = true; + has_run_at_least_once_ = true; } } void Timer::Stop() { - if (Caffe::mode() == Caffe::GPU) { - cudaEventRecord(stop_gpu_, 0); - } else { - stop_cpu_ = clock(); + if (running()) { + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaEventRecord(stop_gpu_, 0)); + CUDA_CHECK(cudaEventSynchronize(stop_gpu_)); + } else { + stop_cpu_ = boost::posix_time::microsec_clock::local_time(); + } + running_ = false; } } -float Timer::ElapsedSeconds() { - float elapsed; +float Timer::MilliSeconds() { + if (!has_run_at_least_once()) { + LOG(WARNING) << "Timer has never been run before reading time."; + return 0; + } + if (running()) { + Stop(); + } if (Caffe::mode() == Caffe::GPU) { - cudaEventSynchronize(stop_gpu_); - cudaEventElapsedTime(&elapsed, start_gpu_, stop_gpu_); - elapsed /= 1000.; + CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_, stop_gpu_)); } else { - elapsed = float(stop_cpu_ - start_cpu_) / CLOCKS_PER_SEC; + elapsed_milliseconds_ = (stop_cpu_ - start_cpu_).total_milliseconds(); + } + return elapsed_milliseconds_; +} + +float Timer::Seconds() { + return MilliSeconds() / 1000.; +} + +void Timer::Init() { + if (!initted()) { + if (Caffe::mode() == Caffe::GPU) { + CUDA_CHECK(cudaEventCreate(&start_gpu_)); + CUDA_CHECK(cudaEventCreate(&stop_gpu_)); + } + initted_ = true; } - return elapsed; } } // namespace caffe diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index a0e589e546a..e7bcd4822f7 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -76,11 +76,9 @@ int main(int argc, char** argv) { for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } - timer.Stop(); - LOG(ERROR) << layername << "\tforward: " << timer.ElapsedSeconds() << " seconds."; + LOG(ERROR) << layername << "\tforward: " << timer.MilliSeconds() << " milli seconds."; } - forward_timer.Stop(); - LOG(ERROR) << "Forward pass: " << forward_timer.ElapsedSeconds() << " seconds."; + LOG(ERROR) << "Forward pass: " << forward_timer.MilliSeconds() << " milli seconds."; Timer backward_timer; backward_timer.Start(); for (int i = layers.size() - 1; i >= 0; --i) { @@ -89,14 +87,11 @@ int main(int argc, char** argv) { for (int j = 0; j < total_iter; ++j) { layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]); } - timer.Stop(); LOG(ERROR) << layername << "\tbackward: " - << timer.ElapsedSeconds() << " seconds."; + << timer.MilliSeconds() << " milli seconds."; } - backward_timer.Stop(); - LOG(ERROR) << "Backward pass: " << backward_timer.ElapsedSeconds() << " seconds."; - total_timer.Stop(); - LOG(ERROR) << "Total Time: " << total_timer.ElapsedSeconds() << " seconds."; + LOG(ERROR) << "Backward pass: " << backward_timer.MilliSeconds() << " milli seconds."; + LOG(ERROR) << "Total Time: " << total_timer.MilliSeconds() << " milli seconds."; LOG(ERROR) << "*** Benchmark ends ***"; return 0; } From cc509a913bcc3700b1e6ca9c67ae2708259282f1 Mon Sep 17 00:00:00 2001 From: Kai Li Date: Wed, 26 Feb 2014 21:11:53 +0800 Subject: [PATCH 5/5] Fix the cpplint errors for benchmark Timer --- src/caffe/test/test_benchmark.cpp | 2 +- src/caffe/util/benchmark.cpp | 3 ++- tools/net_speed_benchmark.cpp | 21 +++++++++++++-------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/caffe/test/test_benchmark.cpp b/src/caffe/test/test_benchmark.cpp index 9866f7d0f20..e8e3a116e3e 100644 --- a/src/caffe/test/test_benchmark.cpp +++ b/src/caffe/test/test_benchmark.cpp @@ -1,6 +1,6 @@ // Copyright 2014 kloud@github -#include // for usleep +#include // for usleep #include #include diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp index b97a3c3c3e5..21c38ad36fe 100644 --- a/src/caffe/util/benchmark.cpp +++ b/src/caffe/util/benchmark.cpp @@ -55,7 +55,8 @@ float Timer::MilliSeconds() { Stop(); } if (Caffe::mode() == Caffe::GPU) { - CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_, stop_gpu_)); + CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_, + stop_gpu_)); } else { elapsed_milliseconds_ = (stop_cpu_ - start_cpu_).total_milliseconds(); } diff --git a/tools/net_speed_benchmark.cpp b/tools/net_speed_benchmark.cpp index e7bcd4822f7..96d40a2eb37 100644 --- a/tools/net_speed_benchmark.cpp +++ b/tools/net_speed_benchmark.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include "caffe/blob.hpp" #include "caffe/common.hpp" @@ -16,14 +18,13 @@ #include "caffe/util/io.hpp" #include "caffe/solver.hpp" -using namespace caffe; +using namespace caffe; // NOLINT(build/namespaces) int main(int argc, char** argv) { - int total_iter = 50; - if (argc < 2) { - LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] [Device_id=0]"; + LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50]" + " [CPU/GPU] [Device_id=0]"; return 0; } @@ -76,9 +77,11 @@ int main(int argc, char** argv) { for (int j = 0; j < total_iter; ++j) { layers[i]->Forward(bottom_vecs[i], &top_vecs[i]); } - LOG(ERROR) << layername << "\tforward: " << timer.MilliSeconds() << " milli seconds."; + LOG(ERROR) << layername << "\tforward: " << timer.MilliSeconds() << + " milli seconds."; } - LOG(ERROR) << "Forward pass: " << forward_timer.MilliSeconds() << " milli seconds."; + LOG(ERROR) << "Forward pass: " << forward_timer.MilliSeconds() << + " milli seconds."; Timer backward_timer; backward_timer.Start(); for (int i = layers.size() - 1; i >= 0; --i) { @@ -90,8 +93,10 @@ int main(int argc, char** argv) { LOG(ERROR) << layername << "\tbackward: " << timer.MilliSeconds() << " milli seconds."; } - LOG(ERROR) << "Backward pass: " << backward_timer.MilliSeconds() << " milli seconds."; - LOG(ERROR) << "Total Time: " << total_timer.MilliSeconds() << " milli seconds."; + LOG(ERROR) << "Backward pass: " << backward_timer.MilliSeconds() << + " milli seconds."; + LOG(ERROR) << "Total Time: " << total_timer.MilliSeconds() << + " milli seconds."; LOG(ERROR) << "*** Benchmark ends ***"; return 0; }