Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Sayan Bhattacharjee <aero.sayan@gmail.com>
Shuo Chen <chenshuo@chenshuo.com>
Steinar H. Gunderson <sgunderson@bigfoot.com>
Stripe, Inc.
Yandex LLC
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>
1 change: 1 addition & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kai Wolf <kai.wolf@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Lei Xu <eddyxu@gmail.com>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -910,9 +910,12 @@ Without `UseRealTime`, CPU time is used by default.
### CPU Timers

By default, the CPU timer only measures the time spent by the main thread.
For multithreaded benchmarks timer measures average among working threads.

If the benchmark itself uses threads internally, this measurement may not
be what you are looking for. Instead, there is a way to measure the total
CPU usage of the process, by all the threads.
CPU usage of the process, by all the threads. Resulting total CPU time
also will be divided among working threads.
Comment on lines +913 to +918
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this is too vague. Maybe this should mention that it is specifically about https://github.com/google/benchmark#multithreaded-benchmarks, not the threads inside of the workload itself.


```c++
void callee(int i);
Expand Down
4 changes: 4 additions & 0 deletions include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -1381,11 +1381,15 @@ class BenchmarkReporter {
bool error_occurred;
std::string error_message;

// Total iterations across all threads.
IterationCount iterations;

int64_t threads;
int64_t repetition_index;
int64_t repetitions;
TimeUnit time_unit;

// Total time across all threads.
double real_accumulated_time;
double cpu_accumulated_time;

Expand Down
10 changes: 6 additions & 4 deletions src/benchmark_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ BenchmarkReporter::Run CreateRunReport(
report.repetitions = b.repetitions;

if (!report.error_occurred) {
// This is the total time across all threads.
if (b.use_manual_time) {
report.real_accumulated_time = results.manual_time_used;
} else {
Expand Down Expand Up @@ -227,10 +228,8 @@ class BenchmarkRunner {
// And get rid of the manager.
manager.reset();

// Adjust real/manual time stats since they were reported per thread.
i.results.real_time_used /= b.threads;
i.results.manual_time_used /= b.threads;
// If we were measuring whole-process CPU usage, adjust the CPU time too.
// If we were measuring whole-process CPU usage then each thread reports
// total CPU time of all threads. Divide by threads to get real value.
if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;

VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
Expand All @@ -246,6 +245,9 @@ class BenchmarkRunner {
i.seconds = i.results.real_time_used;
}

// Adjust time stats to average since they were reported by all threads.
i.seconds /= b.threads;

return i;
}

Expand Down
211 changes: 171 additions & 40 deletions test/internal_threading_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,27 @@
#include <chrono>
#include <thread>
#include "../src/timers.h"
#include "../src/check.h"
#include "benchmark/benchmark.h"
#include "output_test.h"

static const std::chrono::duration<double, std::milli> time_frame(50);
static const double time_frame_in_ns(
std::chrono::duration_cast<std::chrono::duration<double, std::nano>>(
time_frame)
.count());
static const double time_frame_in_sec(
std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(
time_frame)
.count());

static const int nr_iterations = 10;

// Waste exact amount of CPU time in busy-loop
void MyBusySpinwait() {
const auto start = benchmark::ChronoClockNow();
const auto start = benchmark::ThreadCPUUsage();

while (true) {
const auto now = benchmark::ChronoClockNow();
const auto now = benchmark::ThreadCPUUsage();
const auto elapsed = now - start;

if (std::chrono::duration<double, std::chrono::seconds::period>(elapsed) >=
Expand All @@ -26,6 +33,77 @@ void MyBusySpinwait() {
}
}

class TestReporter : public benchmark::ConsoleReporter {
int num_cpus = 0;
public:
virtual bool ReportContext(const Context& context) {
num_cpus = context.cpu_info.num_cpus;
return ConsoleReporter::ReportContext(context);
};

virtual void ReportRuns(const std::vector<Run>& report) {
ConsoleReporter::ReportRuns(report);

for (auto &run: report) {
double expected_cpus = 1;
int64_t min_cpus = run.threads;

if (run.run_name.function_name == "BM_WorkerThread") {
if (run.run_name.time_type == "" ||
run.run_name.time_type == "real_time" ||
run.run_name.time_type == "manual_time") {
expected_cpus = 0;
}
}

if (run.run_name.function_name == "BM_MainThreadAndWorkerThread") {
min_cpus *= 2;
if (run.run_name.time_type == "process_time" ||
run.run_name.time_type == "process_time/real_time" ||
run.run_name.time_type == "process_time/manual_time") {
expected_cpus = 2;
}
}

if (run.run_name.time_type == "manual_time" ||
run.run_name.time_type == "process_time/manual_time") {
min_cpus = 0;
}

double cpus = run.GetAdjustedCPUTime() / time_frame_in_ns;
double real = run.GetAdjustedRealTime() / time_frame_in_ns;

// Check that result >= expected (accuracy: relative=0.1, absolute=0.1)
CHECK_FLOAT_GE(cpus, expected_cpus, expected_cpus * 0.1 + 0.1);
CHECK_FLOAT_GE(real, 1.0, 0.2);

// Warn if cpu time is bigger than expected.
if (cpus > expected_cpus * 1.1 + 0.1) {
VLOG(0) << "CPU time bigger than expected, might be cpu overload\n";
}

// Warn if real time is bigger than expected.
if (real > 1.2) {
VLOG(0) << "Real time bigger than expected, might be cpu overload\n";
}

// Check that cpu time <= expected * 2 + 20%
CHECK_FLOAT_LE(cpus, expected_cpus, expected_cpus + 0.2);

// For checking real time require one more CPU for infrastructure
if (num_cpus < min_cpus + 1) {
VLOG(0) << "Not enough cpus to get valid real time\n";
} else {
// Measurements in CI are noisy, check real time <= expected * 4 + 20%
CHECK_FLOAT_LE(real, 4.0, 0.2);
}
}
}

TestReporter() {}
virtual ~TestReporter() {}
};

// ========================================================================= //
// --------------------------- TEST CASES BEGIN ---------------------------- //
// ========================================================================= //
Expand All @@ -42,32 +120,54 @@ void BM_MainThread(benchmark::State& state) {
benchmark::Counter{1, benchmark::Counter::kIsRate};
}

BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1);
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseRealTime();
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->UseManualTime();
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime();
BENCHMARK(BM_MainThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1);
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(1)
->UseRealTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(1)
->UseManualTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_MainThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseManualTime();

BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2);
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseRealTime();
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->UseManualTime();
BENCHMARK(BM_MainThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime();
BENCHMARK(BM_MainThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2);
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(2)
->UseRealTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(2)
->UseManualTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime();
BENCHMARK(BM_MainThread)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_MainThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseManualTime();
Expand All @@ -85,32 +185,54 @@ void BM_WorkerThread(benchmark::State& state) {
benchmark::Counter{1, benchmark::Counter::kIsRate};
}

BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1);
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseRealTime();
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->UseManualTime();
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(1)->MeasureProcessCPUTime();
BENCHMARK(BM_WorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1);
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(1)
->UseRealTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(1)
->UseManualTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_WorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseManualTime();

BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2);
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseRealTime();
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->UseManualTime();
BENCHMARK(BM_WorkerThread)->Iterations(1)->Threads(2)->MeasureProcessCPUTime();
BENCHMARK(BM_WorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2);
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(2)
->UseRealTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(2)
->UseManualTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime();
BENCHMARK(BM_WorkerThread)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_WorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseManualTime();
Expand All @@ -129,50 +251,54 @@ void BM_MainThreadAndWorkerThread(benchmark::State& state) {
benchmark::Counter{1, benchmark::Counter::kIsRate};
}

BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(1);
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1);
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(nr_iterations)
->Threads(1)
->UseRealTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->UseManualTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(1)
->MeasureProcessCPUTime()
->UseManualTime();

BENCHMARK(BM_MainThreadAndWorkerThread)->Iterations(1)->Threads(2);
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2);
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(nr_iterations)
->Threads(2)
->UseRealTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->UseManualTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseRealTime();
BENCHMARK(BM_MainThreadAndWorkerThread)
->Iterations(1)
->Iterations(nr_iterations)
->Threads(2)
->MeasureProcessCPUTime()
->UseManualTime();
Expand All @@ -181,4 +307,9 @@ BENCHMARK(BM_MainThreadAndWorkerThread)
// ---------------------------- TEST CASES END ----------------------------- //
// ========================================================================= //

int main(int argc, char* argv[]) { RunOutputTests(argc, argv); }
int main(int argc, char* argv[]) {
benchmark::Initialize(&argc, argv);
TestReporter test_reporter;
benchmark::RunSpecifiedBenchmarks(&test_reporter);
return 0;
}
Loading