diff --git a/docs/user_guide.md b/docs/user_guide.md
index 2ceb13eb59..b2dafe1092 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -830,6 +830,64 @@ BENCHMARK(BM_test)->Range(8, 8<<10)->UseRealTime();
Without `UseRealTime`, CPU time is used by default.
+### Manual Multithreaded Benchmarks
+
+Google/benchmark uses `std::thread` as multithreading environment per default.
+If you want to use another multithreading environment (e.g. OpenMP), you can
+turn off the automatic creation of threads using the `ManualThreading` function.
+```c++
+static void BM_MultiThreaded(benchmark::State& state) {
+ // Setup code here.
+ for (auto _ : state) {
+#pragma omp parallel num_threads(state.threads)
+ // Run the multithreaded test.
+ }
+ // Teardown code here.
+}
+
+BENCHMARK(BM_MultiThreaded)->ManualThreading()->Threads(1)->Threads(2)->Threads(4);
+```
+The above example creates a parallel region in each iteration.
+This includes the setup and teardown of the parallel region in the time measurement, and it
+adds an implicit barrier at the end of each iteration.
+You can avoid these effects, if you run the whole loop in parallel.
+Then you must not use the `state` object directly, but create a `ThreadState` object in each thread.
+```c++
+static void BM_MultiThreaded(benchmark::State& state) {
+ // Setup code (shared objects) here.
+#pragma omp parallel num_threads(state.threads)
+ {
+ // Thread-local setup code here.
+ for (auto _ : benchmark::ThreadState(state)) {
+ // Run the multithreaded test.
+ }
+ }
+ // Teardown code here.
+}
+
+BENCHMARK(BM_MultiThreaded)->ManualThreading()->Threads(1)->Threads(2)->Threads(4);
+```
+If you use the `ThreadState` object and explicitly specify the number of threads, then you must
+use `ManualThreading` and the number of created `ThreadState` objects must match the number of specified threads.
+However, if you use `ThreadState` without explicitly specifying the number of threads,
+then the number of threads is determined by the number of created `ThreadState` objects.
+Specifying `ManualThreading` is optional in this case.
+```c++
+static void BM_MultiThreaded(benchmark::State& state) {
+ // Setup code (shared objects) here.
+#pragma omp parallel
+ {
+ // Thread-local setup code here.
+ for (auto _ : benchmark::ThreadState(state)) {
+ // Run the multithreaded test.
+ }
+ }
+ // Teardown code here.
+}
+
+BENCHMARK(BM_MultiThreaded); // measures omp_get_max_threads number of threads.
+```
+
## CPU Timers
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index 23103571bb..2057fd6475 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -930,6 +930,9 @@ class BENCHMARK_EXPORT State {
return max_iterations - total_iterations_ + batch_leftover_;
}
+ BENCHMARK_ALWAYS_INLINE
+ int GetNumThreadStates() const { return num_thread_states_; }
+
BENCHMARK_ALWAYS_INLINE
std::string name() const { return name_; }
@@ -976,12 +979,31 @@ class BENCHMARK_EXPORT State {
const std::string name_;
const int thread_index_;
const int threads_;
+ int num_thread_states_;
- internal::ThreadTimer* const timer_;
internal::ThreadManager* const manager_;
- internal::PerfCountersMeasurement* const perf_counters_measurement_;
friend class internal::BenchmarkInstance;
+ friend class ThreadState;
+
+ protected:
+ void MergeThreadStateToParent(State& parent) const;
+ bool started() const { return started_; }
+
+ internal::ThreadTimer* timer_;
+ internal::PerfCountersMeasurement* perf_counters_measurement_;
+};
+
+// ThreadState can be used in a manually multithreaded benchmark loop.
+class BENCHMARK_EXPORT ThreadState : public State {
+ public:
+ explicit ThreadState(State& s);
+ ~ThreadState();
+
+ private:
+ State* parent_;
+
+ ThreadState(const ThreadState&);
};
inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
@@ -1274,6 +1296,13 @@ class BENCHMARK_EXPORT Benchmark {
// Equivalent to ThreadRange(NumCPUs(), NumCPUs())
Benchmark* ThreadPerCpu();
+ // Don't create threads. Let the user evaluate state.threads and/or use
+ // ThreadState.
+ Benchmark* ManualThreading() {
+ manual_threading_ = true;
+ return this;
+ }
+
virtual void Run(State& state) = 0;
TimeUnit GetTimeUnit() const;
@@ -1286,6 +1315,7 @@ class BENCHMARK_EXPORT Benchmark {
const char* GetName() const;
int ArgsCnt() const;
const char* GetArgName(int arg) const;
+ bool GetExplicitThreading() const { return !thread_counts_.empty(); }
private:
friend class BenchmarkFamilies;
@@ -1307,6 +1337,7 @@ class BENCHMARK_EXPORT Benchmark {
bool measure_process_cpu_time_;
bool use_real_time_;
bool use_manual_time_;
+ bool manual_threading_;
BigO complexity_;
BigOFunc* complexity_lambda_;
std::vector statistics_;
diff --git a/src/benchmark.cc b/src/benchmark.cc
index 6139e59d05..aefde5b410 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -172,8 +172,9 @@ State::State(std::string name, IterationCount max_iters,
name_(std::move(name)),
thread_index_(thread_i),
threads_(n_threads),
- timer_(timer),
+ num_thread_states_(0),
manager_(manager),
+ timer_(timer),
perf_counters_measurement_(perf_counters_measurement) {
BM_CHECK(max_iterations != 0) << "At least one iteration must be run";
BM_CHECK_LT(thread_index_, threads_)
@@ -309,6 +310,44 @@ void State::FinishKeepRunning() {
manager_->StartStopBarrier();
}
+void State::MergeThreadStateToParent(State& parent) const {
+ MutexLock l(manager_->GetBenchmarkMutex());
+ internal::MergeResults(*this, timer_, manager_);
+ assert(parent.total_iterations_ == 0 ||
+ parent.total_iterations_ == total_iterations_);
+ assert(parent.batch_leftover_ == 0 ||
+ parent.batch_leftover_ == batch_leftover_);
+ parent.total_iterations_ = total_iterations_;
+ parent.batch_leftover_ = batch_leftover_;
+ parent.started_ = parent.started_ || started_;
+ parent.finished_ = parent.finished_ || finished_;
+ parent.skipped_ =
+ (parent.error_occurred() || error_occurred())
+ ? internal::SkippedWithError
+ : (parent.skipped() || skipped() ? internal::SkippedWithMessage
+ : internal::NotSkipped);
+ parent.num_thread_states_++;
+}
+
+ThreadState::ThreadState(State& s)
+ : State(s.name(), s.max_iterations, s.range_, s.thread_index(), s.threads(),
+ new internal::ThreadTimer(
+ internal::ThreadTimer::CreateFromTimer(*s.timer_)),
+ s.manager_,
+ s.perf_counters_measurement_
+ ? new internal::PerfCountersMeasurement(
+ s.perf_counters_measurement_->names())
+ : 0),
+ parent_(&s) {}
+
+ThreadState::~ThreadState() {
+ BM_CHECK(error_occurred() || iterations() >= max_iterations)
+ << "Benchmark returned before ThreadState::KeepRunning() returned false!";
+ MergeThreadStateToParent(*parent_);
+ delete timer_;
+ delete perf_counters_measurement_;
+}
+
namespace internal {
namespace {
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 286f986530..801d2a8aa6 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -2,7 +2,10 @@
#include
+#include "counter.h"
#include "string_util.h"
+#include "thread_manager.h"
+#include "thread_timer.h"
namespace benchmark {
namespace internal {
@@ -27,7 +30,9 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
min_time_(benchmark_.min_time_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
- threads_(thread_count) {
+ threads_(thread_count),
+ manual_threading_(benchmark_.manual_threading_),
+ explicit_threading_(benchmark_.GetExplicitThreading()) {
name_.function_name = benchmark_.name_;
size_t arg_i = 0;
@@ -114,5 +119,16 @@ void BenchmarkInstance::Teardown() const {
teardown_(st);
}
}
+
+void MergeResults(const State& st, const ThreadTimer* timer,
+ ThreadManager* manager) NO_THREAD_SAFETY_ANALYSIS {
+ ThreadManager::Result& results = manager->results;
+ results.iterations += st.iterations();
+ results.cpu_time_used += timer->cpu_time_used();
+ results.real_time_used += timer->real_time_used();
+ results.manual_time_used += timer->manual_time_used();
+ results.complexity_n += st.complexity_length_n();
+ Increment(&results.counters, st.counters);
+}
} // namespace internal
} // namespace benchmark
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 94f516531b..8aa0a2bccb 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -10,6 +10,7 @@
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
+#include "mutex.h"
namespace benchmark {
namespace internal {
@@ -41,6 +42,8 @@ class BenchmarkInstance {
int threads() const { return threads_; }
void Setup() const;
void Teardown() const;
+ bool explicit_threading() const { return explicit_threading_; }
+ bool manual_threading() const { return manual_threading_; }
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
@@ -66,6 +69,9 @@ class BenchmarkInstance {
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
+ bool manual_threading_;
+ bool explicit_threading_; // true: Number of threads come from a Threads()
+ // call
typedef void (*callback_function)(const benchmark::State&);
callback_function setup_ = nullptr;
@@ -78,6 +84,10 @@ bool FindBenchmarksInternal(const std::string& re,
bool IsZero(double n);
+// only call while holding benchmark_mutex_:
+void MergeResults(const State& st, const ThreadTimer* timer,
+ ThreadManager* manager) NO_THREAD_SAFETY_ANALYSIS;
+
BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index e447c9a2d3..7c091c4ff9 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -217,6 +217,7 @@ Benchmark::Benchmark(const std::string& name)
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
+ manual_threading_(false),
complexity_(oNone),
complexity_lambda_(nullptr),
setup_(nullptr),
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index f5cd3e644b..0a2d389c2b 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -86,7 +86,7 @@ BenchmarkReporter::Run CreateRunReport(
// This is the total iterations across all threads.
report.iterations = results.iterations;
report.time_unit = b.time_unit();
- report.threads = b.threads();
+ report.threads = results.thread_count;
report.repetition_index = repetition_index;
report.repetitions = repeats;
@@ -130,17 +130,36 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
- BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
- << "Benchmark returned before State::KeepRunning() returned false!";
+
+ assert(b->explicit_threading() || b->threads() == 1);
+
+ if (st.GetNumThreadStates() > 0) {
+ BM_CHECK((!b->explicit_threading()) || b->manual_threading())
+ << "Benchmark " << b->name().str()
+ << " run with managed threading. It must not create ThreadStates!";
+ BM_CHECK((!b->explicit_threading()) ||
+ st.GetNumThreadStates() == b->threads())
+ << "The number of ThreadStates created by Benchmark " << b->name().str()
+ << " doesn't match the number of threads!";
+ } else {
+ BM_CHECK(st.skipped() || st.iterations() >= st.max_iterations)
+ << "Benchmark returned before State::KeepRunning() returned false!";
+ }
+
{
MutexLock l(manager->GetBenchmarkMutex());
internal::ThreadManager::Result& results = manager->results;
- results.iterations += st.iterations();
- results.cpu_time_used += timer.cpu_time_used();
- results.real_time_used += timer.real_time_used();
- results.manual_time_used += timer.manual_time_used();
- results.complexity_n += st.complexity_length_n();
- internal::Increment(&results.counters, st.counters);
+ if (st.GetNumThreadStates() > 0) {
+ // State values as well as thread state values are summed up for
+ // complexity_n and user counters:
+ results.complexity_n += st.complexity_length_n();
+ internal::Increment(&results.counters, st.counters);
+ results.thread_count =
+ b->explicit_threading() ? b->threads() : st.GetNumThreadStates();
+ } else {
+ internal::MergeResults(st, &timer, manager);
+ results.thread_count = b->threads();
+ }
}
manager->NotifyThreadComplete();
}
@@ -234,7 +253,8 @@ BenchmarkRunner::BenchmarkRunner(
has_explicit_iteration_count(b.iterations() != 0 ||
parsed_benchtime_flag.tag ==
BenchTimeType::ITERS),
- pool(b.threads() - 1),
+ num_managed_threads(b.manual_threading() ? 1 : b.threads()),
+ pool(num_managed_threads - 1),
iters(has_explicit_iteration_count
? ComputeIters(b_, parsed_benchtime_flag)
: 1),
@@ -260,7 +280,7 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr manager;
- manager.reset(new internal::ThreadManager(b.threads()));
+ manager.reset(new internal::ThreadManager(num_managed_threads));
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
@@ -287,17 +307,18 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
manager.reset();
// Adjust real/manual time stats since they were reported per thread.
- i.results.real_time_used /= b.threads();
- i.results.manual_time_used /= b.threads();
+ i.results.real_time_used /= i.results.thread_count;
+ i.results.manual_time_used /= i.results.thread_count;
// If we were measuring whole-process CPU usage, adjust the CPU time too.
- if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
+ if (b.measure_process_cpu_time())
+ i.results.cpu_time_used /= i.results.thread_count;
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// By using KeepRunningBatch a benchmark can iterate more times than
// requested, so take the iteration count from i.results.
- i.iters = i.results.iterations / b.threads();
+ i.iters = i.results.iterations / i.results.thread_count;
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h
index db2fa04396..32e91b76cc 100644
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@@ -93,6 +93,7 @@ class BenchmarkRunner {
bool warmup_done;
const int repeats;
const bool has_explicit_iteration_count;
+ const int num_managed_threads; // must be before pool
int num_repetitions_done = 0;
diff --git a/src/thread_manager.h b/src/thread_manager.h
index 819b3c44db..612e61e081 100644
--- a/src/thread_manager.h
+++ b/src/thread_manager.h
@@ -45,6 +45,7 @@ class ThreadManager {
std::string report_label_;
std::string skip_message_;
internal::Skipped skipped_ = internal::NotSkipped;
+ int thread_count = 0;
UserCounters counters;
};
GUARDED_BY(GetBenchmarkMutex()) Result results;
diff --git a/src/thread_timer.h b/src/thread_timer.h
index eb23f59561..5b226f22cd 100644
--- a/src/thread_timer.h
+++ b/src/thread_timer.h
@@ -18,6 +18,9 @@ class ThreadTimer {
static ThreadTimer CreateProcessCpuTime() {
return ThreadTimer(/*measure_process_cpu_time_=*/true);
}
+ static ThreadTimer CreateFromTimer(const ThreadTimer& timer) {
+ return ThreadTimer(timer.measure_process_cpu_time);
+ }
// Called by each thread
void StartTimer() {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ac1a00f582..833bb8bdb4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -167,6 +167,9 @@ add_test(NAME perf_counters_test COMMAND perf_counters_test --benchmark_min_time
compile_output_test(internal_threading_test)
add_test(NAME internal_threading_test COMMAND internal_threading_test --benchmark_min_time=0.01s)
+compile_output_test(manual_threading_test)
+add_test(NAME manual_threading_test COMMAND manual_threading_test --benchmark_min_time=0.01s)
+
compile_output_test(report_aggregates_only_test)
add_test(NAME report_aggregates_only_test COMMAND report_aggregates_only_test --benchmark_min_time=0.01s)
diff --git a/test/manual_threading_test.cc b/test/manual_threading_test.cc
new file mode 100644
index 0000000000..b8c249b9eb
--- /dev/null
+++ b/test/manual_threading_test.cc
@@ -0,0 +1,224 @@
+
+#undef NDEBUG
+
+#include
+#include
+#include
+
+#include "../src/timers.h"
+#include "benchmark/benchmark.h"
+#include "output_test.h"
+
+namespace {
+
+static const std::chrono::duration time_frame(50);
+static const double time_frame_in_sec(
+ std::chrono::duration_cast>>(
+ time_frame)
+ .count());
+
+void MyBusySpinwait() {
+ const auto start = benchmark::ChronoClockNow();
+
+ while (true) {
+ const auto now = benchmark::ChronoClockNow();
+ const auto elapsed = now - start;
+
+ if (std::chrono::duration(elapsed) >=
+ time_frame)
+ return;
+ }
+}
+
+} // namespace
+
+// ========================================================================= //
+// --------------------------- TEST CASES BEGIN ---------------------------- //
+// ========================================================================= //
+
+// ========================================================================= //
+// BM_ManualThreadingInLoop
+// Measurements include the creation and joining of threads.
+
+void BM_ManualThreadingInLoop(benchmark::State& state) {
+ int numWorkerThreads = state.threads() - 1;
+ std::vector pool(numWorkerThreads);
+
+ for (auto _ : state) {
+ for (int i = 0; i < numWorkerThreads; ++i) {
+ pool[i] = std::thread(MyBusySpinwait);
+ }
+ MyBusySpinwait();
+ for (int i = 0; i < numWorkerThreads; ++i) {
+ pool[i].join();
+ }
+ state.SetIterationTime(time_frame_in_sec);
+ }
+ state.counters["invtime"] =
+ benchmark::Counter{1, benchmark::Counter::kIsRate};
+}
+
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1);
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->UseManualTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime()
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime()
+ ->UseManualTime();
+
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2);
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->UseManualTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime()
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingInLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime()
+ ->UseManualTime();
+
+// ========================================================================= //
+// BM_ManualThreadingBeforeLoop
+// Creation of threads is done before the start of the measurement,
+// joining after the finish of the measurement.
+
+void BM_ManualThreadingBeforeLoop(benchmark::State& state) {
+ std::promise thread_starter;
+ auto starter_future = thread_starter.get_future();
+
+ auto threadedLoop = [&]() {
+ starter_future.wait();
+ benchmark::ThreadState ts(state);
+ for (auto _ : ts) {
+ MyBusySpinwait();
+ ts.SetIterationTime(time_frame_in_sec);
+ }
+ };
+
+ std::vector pool(state.threads());
+ for (int i = 0; i < state.threads(); ++i) {
+ pool[i] = std::thread(threadedLoop);
+ }
+ thread_starter.set_value();
+ for (int i = 0; i < state.threads(); ++i) {
+ pool[i].join();
+ }
+
+ state.counters["invtime"] =
+ benchmark::Counter{1, benchmark::Counter::kIsRate};
+}
+
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1);
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->UseManualTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime()
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(1)
+ ->MeasureProcessCPUTime()
+ ->UseManualTime();
+
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2);
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->UseManualTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime()
+ ->UseRealTime();
+BENCHMARK(BM_ManualThreadingBeforeLoop)
+ ->Iterations(1)
+ ->ManualThreading()
+ ->Threads(2)
+ ->MeasureProcessCPUTime()
+ ->UseManualTime();
+
+// ========================================================================= //
+// ---------------------------- TEST CASES END ----------------------------- //
+// ========================================================================= //
+
+int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }