-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Add support for other multi-threading APIs #1027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -641,6 +641,9 @@ class State { | |
| return max_iterations - total_iterations_ + batch_leftover_; | ||
| } | ||
|
|
||
| BENCHMARK_ALWAYS_INLINE | ||
| int GetNumThreadStates() const { return num_thread_states_; } | ||
|
|
||
| private | ||
| : // items we expect on the first cache line (ie 64 bytes of the struct) | ||
| // When total_iterations_ is 0, KeepRunning() and friends will return false. | ||
|
|
@@ -685,8 +688,21 @@ class State { | |
| void FinishKeepRunning(); | ||
| internal::ThreadTimer* timer_; | ||
| internal::ThreadManager* manager_; | ||
| int num_thread_states_; | ||
|
|
||
| friend struct internal::BenchmarkInstance; | ||
| friend class ThreadState; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. avoid friends please. add a protected section with accessors if you need that. |
||
| }; | ||
|
|
||
| class ThreadState : public State | ||
| { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note we use google style so this (and other braces) should be on the previous line. you can use clang-format to make this easier. |
||
| public: | ||
| explicit ThreadState(State& s); | ||
| ~ThreadState(); | ||
| private: | ||
| State* parent_; | ||
|
|
||
| ThreadState(const ThreadState&); | ||
| }; | ||
|
|
||
| inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { | ||
|
|
@@ -945,6 +961,9 @@ class Benchmark { | |
| // Equivalent to ThreadRange(NumCPUs(), NumCPUs()) | ||
| Benchmark* ThreadPerCpu(); | ||
|
|
||
| // Don't create threads. Let the user evaluate state.threads and/or use ThreadState. | ||
| Benchmark* ManualThreading() { manual_threading_ = true; return this; } | ||
|
|
||
| virtual void Run(State& state) = 0; | ||
|
|
||
| protected: | ||
|
|
@@ -969,6 +988,7 @@ class Benchmark { | |
| bool measure_process_cpu_time_; | ||
| bool use_real_time_; | ||
| bool use_manual_time_; | ||
| bool manual_threading_; | ||
| BigO complexity_; | ||
| BigOFunc* complexity_lambda_; | ||
| std::vector<Statistics> statistics_; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,7 +130,8 @@ State::State(IterationCount max_iters, const std::vector<int64_t>& ranges, | |
| thread_index(thread_i), | ||
| threads(n_threads), | ||
| timer_(timer), | ||
| manager_(manager) { | ||
| manager_(manager), | ||
| num_thread_states_(0) { | ||
| CHECK(max_iterations != 0) << "At least one iteration must be run"; | ||
| CHECK_LT(thread_index, threads) << "thread_index must be less than threads"; | ||
|
|
||
|
|
@@ -212,6 +213,33 @@ void State::FinishKeepRunning() { | |
| manager_->StartStopBarrier(); | ||
| } | ||
|
|
||
| ThreadState::ThreadState(State& s) : | ||
| State(s), | ||
| parent_(&s) | ||
| { | ||
| CHECK(!s.started_) << "Don't create a ThreadState object after measurement has started"; | ||
| timer_ = new internal::ThreadTimer(*timer_); | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| ThreadState::~ThreadState() | ||
| { | ||
| CHECK(error_occurred() || iterations() >= max_iterations) | ||
| << "Benchmark returned before ThreadState::KeepRunning() returned false!"; | ||
| { | ||
| MutexLock l(manager_->GetBenchmarkMutex()); | ||
| internal::MergeResults(*this, timer_, manager_); | ||
| assert(parent_->total_iterations_ == 0 || parent_->total_iterations_ == total_iterations_); | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assert(parent_->batch_leftover_ == 0 || parent_->batch_leftover_ == batch_leftover_); | ||
| parent_->total_iterations_ = total_iterations_; | ||
| parent_->batch_leftover_ = batch_leftover_; | ||
| parent_->started_ = parent_->started_ || started_; | ||
| parent_->finished_ = parent_->finished_ || finished_; | ||
| parent_->error_occurred_ = parent_->error_occurred_ || error_occurred_; | ||
| parent_->num_thread_states_++; | ||
| } | ||
| delete timer_; | ||
| } | ||
|
|
||
| namespace internal { | ||
| namespace { | ||
|
|
||
|
|
@@ -315,6 +343,16 @@ bool IsZero(double n) { | |
| return std::abs(n) < std::numeric_limits<double>::epsilon(); | ||
| } | ||
|
|
||
| void MergeResults(State& st, ThreadTimer* timer, ThreadManager* manager) | ||
| { | ||
| ThreadManager::Result& results = manager->results; | ||
| results.cpu_time_used += timer->cpu_time_used(); | ||
| results.real_time_used = std::max(results.real_time_used, timer->real_time_used()); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note to myself: leftover of another fix. |
||
| results.manual_time_used += timer->manual_time_used(); | ||
| results.complexity_n += st.complexity_length_n(); | ||
| Increment(&results.counters, st.counters); | ||
| } | ||
|
|
||
| ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color) { | ||
| int output_opts = ConsoleReporter::OO_Defaults; | ||
| auto is_benchmark_color = [force_no_color]() -> bool { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,7 +77,7 @@ BenchmarkReporter::Run CreateRunReport( | |
| // This is the total iterations across all threads. | ||
| report.iterations = results.iterations; | ||
| report.time_unit = b.time_unit; | ||
| report.threads = b.threads; | ||
| report.threads = results.thread_count; | ||
| report.repetition_index = repetition_index; | ||
| report.repetitions = b.repetitions; | ||
|
|
||
|
|
@@ -117,17 +117,36 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, | |
| ? internal::ThreadTimer::CreateProcessCpuTime() | ||
| : internal::ThreadTimer::Create()); | ||
| State st = b->Run(iters, thread_id, &timer, manager); | ||
| CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) | ||
| << "Benchmark returned before State::KeepRunning() returned false!"; | ||
| assert(b->explicit_threading || b->threads == 1); | ||
| if (st.GetNumThreadStates() > 0) | ||
| { | ||
| CHECK((!b->explicit_threading) || b->manual_threading) | ||
| << "Benchmark " << b->name.str() << " run with managed threading. It must not create ThreadStates!"; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i believe this line is too long. please check with clang-format. |
||
| CHECK((!b->explicit_threading) || st.GetNumThreadStates() == b->threads) | ||
| << "The number of ThreadStates created by Benchmark " << b->name.str() | ||
| << " doesn't match the number of threads!"; | ||
| } | ||
| else | ||
| { | ||
| CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) | ||
| << "Benchmark returned before State::KeepRunning() returned false!"; | ||
| } | ||
| { | ||
| MutexLock l(manager->GetBenchmarkMutex()); | ||
| internal::ThreadManager::Result& results = manager->results; | ||
| results.iterations += st.iterations(); | ||
| results.cpu_time_used += timer.cpu_time_used(); | ||
| results.real_time_used += timer.real_time_used(); | ||
| results.manual_time_used += timer.manual_time_used(); | ||
| results.complexity_n += st.complexity_length_n(); | ||
| internal::Increment(&results.counters, st.counters); | ||
| if (st.GetNumThreadStates() > 0) | ||
| { | ||
| // State values as well as thread state values are summed up for complexity_n and user counters: | ||
| results.complexity_n += st.complexity_length_n(); | ||
| internal::Increment(&results.counters, st.counters); | ||
| results.thread_count = b->explicit_threading ? b->threads : st.GetNumThreadStates(); | ||
| } | ||
| else | ||
| { | ||
| internal::MergeResults(st, &timer, manager); | ||
| results.thread_count = b->threads; | ||
| } | ||
| } | ||
| manager->NotifyThreadComplete(); | ||
| } | ||
|
|
@@ -142,7 +161,8 @@ class BenchmarkRunner { | |
| repeats(b.repetitions != 0 ? b.repetitions | ||
| : FLAGS_benchmark_repetitions), | ||
| has_explicit_iteration_count(b.iterations != 0), | ||
| pool(b.threads - 1), | ||
| num_managed_threads(b.manual_threading ? 1 : b.threads), | ||
| pool(num_managed_threads - 1), | ||
| iters(has_explicit_iteration_count ? b.iterations : 1) { | ||
| run_results.display_report_aggregates_only = | ||
| (FLAGS_benchmark_report_aggregates_only || | ||
|
|
@@ -186,6 +206,7 @@ class BenchmarkRunner { | |
| const int repeats; | ||
| const bool has_explicit_iteration_count; | ||
|
|
||
| const int num_managed_threads; // number of managed threads, must be before pool | ||
dmah42 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| std::vector<std::thread> pool; | ||
|
|
||
| IterationCount iters; // preserved between repetitions! | ||
|
|
@@ -201,7 +222,7 @@ class BenchmarkRunner { | |
| VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n"; | ||
|
|
||
| std::unique_ptr<internal::ThreadManager> manager; | ||
| manager.reset(new internal::ThreadManager(b.threads)); | ||
| manager.reset(new internal::ThreadManager(num_managed_threads)); | ||
|
|
||
| // Run all but one thread in separate threads | ||
| for (std::size_t ti = 0; ti < pool.size(); ++ti) { | ||
|
|
@@ -228,10 +249,10 @@ class BenchmarkRunner { | |
| manager.reset(); | ||
|
|
||
| // Adjust real/manual time stats since they were reported per thread. | ||
| i.results.real_time_used /= b.threads; | ||
| i.results.manual_time_used /= b.threads; | ||
| i.results.real_time_used /= i.results.thread_count; | ||
| i.results.manual_time_used /= i.results.thread_count; | ||
| // If we were measuring whole-process CPU usage, adjust the CPU time too. | ||
| if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; | ||
| if (b.measure_process_cpu_time) i.results.cpu_time_used /= i.results.thread_count; | ||
|
|
||
| VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" | ||
| << i.results.real_time_used << "\n"; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this postcondition actually required? Can it be lifted?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean the postcondition, that number of ThreadStates == number of threads? I don't see a way to do this.
Note, that this postcondition is only requried, if you explicitly specify the number of threads via
Threads()and if you create at least oneThreadStateobject during the benchmark run. It would be odd, if in this case the numbers doesn't match.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes.
In particular, as discussed (?) in previous PR, i would like to call
->Threads(),but disable
benchmark-based threading. I.e. i'd like a way to use threadsas yet another parameter, without it actually creating all those threads,
and without dividing the measurements by the "thread count".