From 938d3e870ef14bc24ede1abc73cb3bc38209d799 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 14:16:00 +0200 Subject: [PATCH 01/15] Update CHANGELOG for v2.0.0 with breaking changes and new features - Introduced breaking changes including type aliases for `ThreadPool` and `FastThreadPool`, updated return types for thread configuration methods, and removal of `submit_range()`. - Added new types such as `ThreadPoolBase` and `GlobalPool`. - Centralized OS-level logic for thread priority, scheduling, and affinity into detail functions, reducing code duplication by ~1000 lines. - Refactored `apply_profile()` methods for better consistency and clarity across thread types. - Updated `VERSION` to 2.0.0. --- CHANGELOG.md | 82 ++ VERSION | 2 +- include/threadschedule/profiles.hpp | 116 ++- include/threadschedule/pthread_wrapper.hpp | 28 +- include/threadschedule/scheduled_pool.hpp | 62 +- include/threadschedule/scheduler_policy.hpp | 141 ++++ include/threadschedule/thread_pool.hpp | 718 +++++------------- .../thread_pool_with_errors.hpp | 329 +------- include/threadschedule/thread_registry.hpp | 67 +- include/threadschedule/thread_wrapper.hpp | 144 +--- include/threadschedule/threadschedule.hpp | 5 + 11 files changed, 534 insertions(+), 1160 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93ea620..e1b01d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,85 @@ +# Changelog + +## v2.0.0 (unreleased) + +### Breaking Changes + +- **`ThreadPool` and `FastThreadPool` are now type aliases** for + `ThreadPoolBase` and `ThreadPoolBase`. Behavior + is unchanged, but code that forward-declares or specializes on the concrete + class name may need adjustment. + +- **`configure_threads()`, `set_affinity()`, `distribute_across_cpus()`** on + `ThreadPool` and `FastThreadPool` now return `expected` + instead of `bool`. `HighPerformancePool` already used this return type. + Migration: `if (pool.configure_threads(...))` still compiles (expected has + `operator bool`), but code that stores the result in a `bool` variable needs + updating to `auto` or the expected type. + +- **`ThreadPool::Statistics`** now includes `tasks_per_second` and + `avg_task_time` fields (previously only on `FastThreadPool` and + `HighPerformancePool`). + +- **`submit_range()` removed** from `ThreadPool`. Use `submit_batch()` instead + (consistent with `FastThreadPool` and `HighPerformancePool`). `submit_batch()` + is also more efficient: it acquires the queue lock once for the entire batch + instead of per-item. + +- **`GlobalThreadPool::submit_range()` removed**. Use + `GlobalThreadPool::submit_batch()`. + +- **`HighPerformancePoolWithErrors`, `FastThreadPoolWithErrors`, + `ThreadPoolWithErrors`** are now type aliases for `PoolWithErrors`. The + public API is unchanged. + +- **`GlobalThreadPool`, `GlobalHighPerformancePool`** are now type aliases for + `GlobalPool`. The public API is unchanged. + +### New Types + +- `ThreadPoolBase` - parameterized single-queue thread pool. +- `IndefiniteWait` / `PollingWait` - wait policy types for `ThreadPoolBase`. +- `PoolWithErrors` - generic error-handling pool wrapper. +- `GlobalPool` - generic singleton pool accessor. + +### Internal Improvements + +- **~1000 lines of code duplication removed** across `thread_pool.hpp`, + `thread_pool_with_errors.hpp`, `thread_wrapper.hpp`, `thread_registry.hpp`, + `pthread_wrapper.hpp`, `profiles.hpp`, and `scheduled_pool.hpp`. + +- **Priority / affinity / scheduling policy** OS-level logic centralized into + `detail::apply_priority()`, `detail::apply_scheduling_policy()`, and + `detail::apply_affinity()` free functions (overloaded for `pthread_t`, + `pid_t`, and `HANDLE`). `BaseThreadWrapper`, `ThreadControlBlock`, + `PThreadWrapper`, and `ThreadByNameView` now delegate to these shared + implementations. + +- **`apply_profile()` overloads** refactored to use shared + `detail::apply_profile_to()` and `detail::apply_profile_to_pool()` helpers. + +- **`ScheduledThreadPoolT`**: `schedule_at()` and `schedule_periodic_after()` + now share a private `insert_task()` helper. + +### Migration Guide + +```cpp +// v1: bool return +bool ok = pool.configure_threads("worker"); + +// v2: expected return (operator bool still works in conditions) +auto result = pool.configure_threads("worker"); +if (!result.has_value()) { + std::cerr << result.error().message() << std::endl; +} + +// v1: submit_range +auto futures = pool.submit_range(tasks.begin(), tasks.end()); + +// v2: submit_batch (same signature, more efficient) +auto futures = pool.submit_batch(tasks.begin(), tasks.end()); +``` + ## v1.4.1 - Fix: `*WrapperReg` types (`ThreadWrapperReg`, `JThreadWrapperReg`, diff --git a/VERSION b/VERSION index 3eefcb9..227cea2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.0 +2.0.0 diff --git a/include/threadschedule/profiles.hpp b/include/threadschedule/profiles.hpp index 9bafd81..82e7aa7 100644 --- a/include/threadschedule/profiles.hpp +++ b/include/threadschedule/profiles.hpp @@ -58,7 +58,7 @@ struct ThreadProfile std::string name; SchedulingPolicy policy; ThreadPriority priority; - std::optional affinity; // optional pinning + std::optional affinity; }; namespace profiles @@ -103,20 +103,15 @@ inline auto background() -> ThreadProfile } } // namespace profiles +namespace detail +{ + /** - * @brief Apply a profile to a thread wrapper or view. - * - * SFINAE-constrained: only participates in overload resolution when - * @c is_thread_like_v is true (ThreadWrapper, - * JThreadWrapper, PThreadWrapper, and their views). - * - * @tparam ThreadLike A type satisfying the is_thread_like trait. - * @param t Thread wrapper or view to configure. - * @param p Profile to apply. - * @return Empty expected on success, or @c operation_not_permitted. + * @brief Apply policy + optional affinity to any type exposing + * set_scheduling_policy() and set_affinity(). */ -template , int> = 0> -inline auto apply_profile(ThreadLike& t, ThreadProfile const& p) -> expected +template +inline auto apply_profile_to(T& t, ThreadProfile const& p) -> expected { bool ok = true; if (!t.set_scheduling_policy(p.policy, p.priority).has_value()) @@ -132,20 +127,18 @@ inline auto apply_profile(ThreadLike& t, ThreadProfile const& p) -> expected expected +template +inline auto apply_profile_to_pool(PoolType& pool, std::string const& name_prefix, ThreadProfile const& p) + -> expected { bool ok = true; - if (!t.set_scheduling_policy(p.policy, p.priority).has_value()) + if (!pool.configure_threads(name_prefix, p.policy, p.priority).has_value()) ok = false; if (p.affinity.has_value()) { - if (!t.set_affinity(*p.affinity).has_value()) + if (!pool.set_affinity(*p.affinity).has_value()) ok = false; } if (ok) @@ -153,18 +146,33 @@ inline auto apply_profile(ThreadControlBlock& t, ThreadProfile const& p) -> expe return unexpected(std::make_error_code(std::errc::operation_not_permitted)); } +} // namespace detail + /** - * @brief Apply a profile to a registered thread via its info record. - * - * Dereferences @c t.control and delegates to the ThreadControlBlock - * overload. - * - * @warning Undefined behaviour if @c t.control is @c nullptr. + * @brief Apply a profile to a thread wrapper or view. * - * @param t Registered thread info whose control pointer is dereferenced. + * @tparam ThreadLike A type satisfying the is_thread_like trait. + * @param t Thread wrapper or view to configure. * @param p Profile to apply. * @return Empty expected on success, or @c operation_not_permitted. */ +template , int> = 0> +inline auto apply_profile(ThreadLike& t, ThreadProfile const& p) -> expected +{ + return detail::apply_profile_to(t, p); +} + +/** + * @brief Apply a profile to a ThreadControlBlock directly. + */ +inline auto apply_profile(ThreadControlBlock& t, ThreadProfile const& p) -> expected +{ + return detail::apply_profile_to(t, p); +} + +/** + * @brief Apply a profile to a registered thread via its info record. + */ inline auto apply_profile(RegisteredThreadInfo& t, ThreadProfile const& p) -> expected { return apply_profile(*t.control, p); @@ -172,62 +180,30 @@ inline auto apply_profile(RegisteredThreadInfo& t, ThreadProfile const& p) -> ex /** * @brief Apply a profile to every worker in a ThreadPool. - * - * Uses @c "pool" as the thread name prefix passed to - * ThreadPool::configure_threads(). - * - * @param pool Thread pool to configure. - * @param p Profile to apply. - * @return Empty expected on success, or @c operation_not_permitted. */ inline auto apply_profile(ThreadPool& pool, ThreadProfile const& p) -> expected { - bool ok = true; - // Name prefix left to caller via configure_threads; here just policy/priority - if (!pool.configure_threads("pool", p.policy, p.priority)) - ok = false; - if (p.affinity.has_value()) - { - if (!pool.set_affinity(*p.affinity)) - ok = false; - } - if (ok) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::apply_profile_to_pool(pool, "pool", p); +} + +/** + * @brief Apply a profile to every worker in a FastThreadPool. + */ +inline auto apply_profile(FastThreadPool& pool, ThreadProfile const& p) -> expected +{ + return detail::apply_profile_to_pool(pool, "fast", p); } /** * @brief Apply a profile to every worker in a HighPerformancePool. - * - * Uses @c "hp" as the thread name prefix passed to - * HighPerformancePool::configure_threads(). - * - * @param pool High-performance pool to configure. - * @param p Profile to apply. - * @return Empty expected on success, or @c operation_not_permitted. */ inline auto apply_profile(HighPerformancePool& pool, ThreadProfile const& p) -> expected { - bool ok = true; - if (!pool.configure_threads("hp", p.policy, p.priority).has_value()) - ok = false; - if (p.affinity.has_value()) - { - if (!pool.set_affinity(*p.affinity).has_value()) - ok = false; - } - if (ok) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::apply_profile_to_pool(pool, "hp", p); } /** * @brief Apply a profile to a registry-managed thread identified by TID. - * - * @param reg Thread registry that owns the thread. - * @param tid Thread identifier within the registry. - * @param p Profile to apply. - * @return Empty expected on success, or @c operation_not_permitted. */ inline auto apply_profile(ThreadRegistry& reg, Tid tid, ThreadProfile const& p) -> expected { diff --git a/include/threadschedule/pthread_wrapper.hpp b/include/threadschedule/pthread_wrapper.hpp index 61d5f8c..86c8b4d 100644 --- a/include/threadschedule/pthread_wrapper.hpp +++ b/include/threadschedule/pthread_wrapper.hpp @@ -168,40 +168,18 @@ class PThreadWrapper [[nodiscard]] auto set_priority(ThreadPriority priority) const -> expected { - int const policy = SCHED_OTHER; - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); - - if (!params_result.has_value()) - { - return unexpected(params_result.error()); - } - - if (pthread_setschedparam(thread_, policy, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_priority(thread_, priority); } [[nodiscard]] auto set_scheduling_policy(SchedulingPolicy policy, ThreadPriority priority) const -> expected { - int const policy_int = static_cast(policy); - auto params_result = SchedulerParams::create_for_policy(policy, priority); - - if (!params_result.has_value()) - { - return unexpected(params_result.error()); - } - - if (pthread_setschedparam(thread_, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_scheduling_policy(thread_, policy, priority); } [[nodiscard]] auto set_affinity(ThreadAffinity const& affinity) const -> expected { - if (pthread_setaffinity_np(thread_, sizeof(cpu_set_t), &affinity.native_handle()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_affinity(thread_, affinity); } [[nodiscard]] auto get_affinity() const -> std::optional diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp index 99912b2..31c149f 100644 --- a/include/threadschedule/scheduled_pool.hpp +++ b/include/threadschedule/scheduled_pool.hpp @@ -184,23 +184,7 @@ class ScheduledThreadPoolT */ auto schedule_at(TimePoint time_point, Task task) -> ScheduledTaskHandle { - std::lock_guard lock(mutex_); - - uint64_t const task_id = next_task_id_++; - ScheduledTaskHandle handle(task_id); - - ScheduledTaskInfo info; - info.id = task_id; - info.next_run = time_point; - info.interval = Duration::zero(); - info.task = std::move(task); - info.cancelled = handle.get_cancel_flag(); - info.periodic = false; - - scheduled_tasks_.insert({time_point, std::move(info)}); - condition_.notify_one(); - - return handle; + return insert_task(time_point, Duration::zero(), std::move(task), false); } /** @@ -226,23 +210,8 @@ class ScheduledThreadPoolT */ auto schedule_periodic_after(Duration initial_delay, Duration interval, Task task) -> ScheduledTaskHandle { - std::lock_guard lock(mutex_); - - uint64_t const task_id = next_task_id_++; - ScheduledTaskHandle handle(task_id); - - ScheduledTaskInfo info; - info.id = task_id; - info.next_run = std::chrono::steady_clock::now() + initial_delay; - info.interval = interval; - info.task = std::move(task); - info.cancelled = handle.get_cancel_flag(); - info.periodic = true; - - scheduled_tasks_.insert({info.next_run, std::move(info)}); - condition_.notify_one(); - - return handle; + auto const run_time = std::chrono::steady_clock::now() + initial_delay; + return insert_task(run_time, interval, std::move(task), true); } /** @@ -298,9 +267,7 @@ class ScheduledThreadPoolT /** * @brief Configure worker threads * - * Note: Return type depends on the underlying pool type. - * @ref ThreadPool returns bool, @ref HighPerformancePool returns expected. - * For consistent behavior, access the pool directly via thread_pool(). + * Returns expected from the underlying pool. */ auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) @@ -319,6 +286,27 @@ class ScheduledThreadPoolT std::multimap scheduled_tasks_; std::atomic next_task_id_; + auto insert_task(TimePoint run_time, Duration interval, Task task, bool periodic) -> ScheduledTaskHandle + { + std::lock_guard lock(mutex_); + + uint64_t const task_id = next_task_id_++; + ScheduledTaskHandle handle(task_id); + + ScheduledTaskInfo info; + info.id = task_id; + info.next_run = run_time; + info.interval = interval; + info.task = std::move(task); + info.cancelled = handle.get_cancel_flag(); + info.periodic = periodic; + + scheduled_tasks_.insert({run_time, std::move(info)}); + condition_.notify_one(); + + return handle; + } + void scheduler_loop() { while (true) diff --git a/include/threadschedule/scheduler_policy.hpp b/include/threadschedule/scheduler_policy.hpp index 8f99628..3b201fd 100644 --- a/include/threadschedule/scheduler_policy.hpp +++ b/include/threadschedule/scheduler_policy.hpp @@ -11,6 +11,7 @@ #ifdef _WIN32 #include #else +#include #include #include #endif @@ -482,4 +483,144 @@ inline auto to_string(SchedulingPolicy policy) -> std::string } } +// --------------------------------------------------------------------------- +// detail:: free functions for thread configuration (priority, policy, affinity) +// +// Overloaded by handle type so that every wrapper class can delegate with a +// single call: detail::apply_priority(handle, priority). +// --------------------------------------------------------------------------- + +namespace detail +{ + +#ifdef _WIN32 + +inline auto map_priority_to_win32(int prio_val) -> int +{ + if (prio_val <= -10) + return THREAD_PRIORITY_IDLE; + if (prio_val <= -5) + return THREAD_PRIORITY_LOWEST; + if (prio_val < 0) + return THREAD_PRIORITY_BELOW_NORMAL; + if (prio_val == 0) + return THREAD_PRIORITY_NORMAL; + if (prio_val <= 5) + return THREAD_PRIORITY_ABOVE_NORMAL; + if (prio_val <= 10) + return THREAD_PRIORITY_HIGHEST; + return THREAD_PRIORITY_TIME_CRITICAL; +} + +inline auto apply_priority(HANDLE handle, ThreadPriority priority) -> expected +{ + if (!handle) + return unexpected(std::make_error_code(std::errc::no_such_process)); + if (SetThreadPriority(handle, map_priority_to_win32(priority.value())) != 0) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +inline auto apply_scheduling_policy(HANDLE handle, SchedulingPolicy /*policy*/, ThreadPriority priority) + -> expected +{ + return apply_priority(handle, priority); +} + +inline auto apply_affinity(HANDLE handle, ThreadAffinity const& affinity) -> expected +{ + if (!handle) + return unexpected(std::make_error_code(std::errc::no_such_process)); + using SetThreadGroupAffinityFn = BOOL(WINAPI*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); + HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); + if (hMod) + { + auto set_group_affinity = reinterpret_cast( + reinterpret_cast(GetProcAddress(hMod, "SetThreadGroupAffinity"))); + if (set_group_affinity && affinity.has_any()) + { + GROUP_AFFINITY ga{}; + ga.Mask = static_cast(affinity.get_mask()); + ga.Group = affinity.get_group(); + if (set_group_affinity(handle, &ga, nullptr) != 0) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + } + } + DWORD_PTR mask = static_cast(affinity.get_mask()); + if (SetThreadAffinityMask(handle, mask) != 0) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +#else // POSIX + +// --- pthread_t overloads (BaseThreadWrapper, ThreadControlBlock, PThreadWrapper) --- + +inline auto apply_priority(pthread_t handle, ThreadPriority priority) -> expected +{ + int const policy = SCHED_OTHER; + auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); + if (!params_result.has_value()) + return unexpected(params_result.error()); + if (pthread_setschedparam(handle, policy, ¶ms_result.value()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +inline auto apply_scheduling_policy(pthread_t handle, SchedulingPolicy policy, ThreadPriority priority) + -> expected +{ + int const policy_int = static_cast(policy); + auto params_result = SchedulerParams::create_for_policy(policy, priority); + if (!params_result.has_value()) + return unexpected(params_result.error()); + if (pthread_setschedparam(handle, policy_int, ¶ms_result.value()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +inline auto apply_affinity(pthread_t handle, ThreadAffinity const& affinity) -> expected +{ + if (pthread_setaffinity_np(handle, sizeof(cpu_set_t), &affinity.native_handle()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +// --- pid_t / TID overloads (ThreadByNameView) --- + +inline auto apply_priority(pid_t tid, ThreadPriority priority) -> expected +{ + int const policy = SCHED_OTHER; + auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); + if (!params_result.has_value()) + return unexpected(params_result.error()); + if (sched_setscheduler(tid, policy, ¶ms_result.value()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +inline auto apply_scheduling_policy(pid_t tid, SchedulingPolicy policy, ThreadPriority priority) + -> expected +{ + int const policy_int = static_cast(policy); + auto params_result = SchedulerParams::create_for_policy(policy, priority); + if (!params_result.has_value()) + return unexpected(params_result.error()); + if (sched_setscheduler(tid, policy_int, ¶ms_result.value()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +inline auto apply_affinity(pid_t tid, ThreadAffinity const& affinity) -> expected +{ + if (sched_setaffinity(tid, sizeof(cpu_set_t), &affinity.native_handle()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +#endif + +} // namespace detail + } // namespace threadschedule diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index 99e94bd..5a488f2 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -79,7 +79,6 @@ class WorkStealingDeque { } - // Thread-safe operations [[nodiscard]] auto push(T&& item) -> bool { std::lock_guard lock(mutex_); @@ -88,7 +87,7 @@ class WorkStealingDeque if (t - b >= capacity_) { - return false; // Queue full + return false; } buffer_[t % capacity_] = AlignedItem(std::move(item)); @@ -104,7 +103,7 @@ class WorkStealingDeque if (t - b >= capacity_) { - return false; // Queue full + return false; } buffer_[t % capacity_] = AlignedItem(item); @@ -120,7 +119,7 @@ class WorkStealingDeque if (t <= b) { - return false; // Empty + return false; } size_t const new_top = t - 1; @@ -129,7 +128,6 @@ class WorkStealingDeque return true; } - // Thief operations (other threads stealing work) [[nodiscard]] auto steal(T& item) -> bool { std::lock_guard lock(mutex_); @@ -138,7 +136,7 @@ class WorkStealingDeque if (b >= t) { - return false; // Empty + return false; } item = std::move(buffer_[b % capacity_].item); @@ -259,7 +257,6 @@ class HighPerformancePool : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false), next_victim_(0), start_time_(std::chrono::steady_clock::now()) { - // Initialize per-thread work queues worker_queues_.resize(num_threads_); for (size_t i = 0; i < num_threads_; ++i) { @@ -268,7 +265,6 @@ class HighPerformancePool workers_.reserve(num_threads_); - // Create worker threads with thread-local storage for (size_t i = 0; i < num_threads_; ++i) { workers_.emplace_back(&HighPerformancePool::worker_function, this, i); @@ -298,20 +294,17 @@ class HighPerformancePool if (stop_.load(std::memory_order_acquire)) { - throw std::runtime_error("ThreadPool is shutting down"); + throw std::runtime_error("HighPerformancePool is shutting down"); } - // Try to submit to least loaded queue (round-robin with fallback) size_t const preferred_queue = next_victim_.fetch_add(1, std::memory_order_relaxed) % num_threads_; - // First try the preferred queue if (worker_queues_[preferred_queue]->push([task]() { (*task)(); })) { wakeup_condition_.notify_one(); return result; } - // If preferred queue is full, try a few random ones for (size_t attempts = 0; attempts < (std::min)(num_threads_, size_t(3)); ++attempts) { size_t const idx = (preferred_queue + attempts + 1) % num_threads_; @@ -322,12 +315,11 @@ class HighPerformancePool } } - // All local queues full, use overflow queue { std::lock_guard lock(overflow_mutex_); if (stop_.load(std::memory_order_relaxed)) { - throw std::runtime_error("ThreadPool is shutting down"); + throw std::runtime_error("HighPerformancePool is shutting down"); } overflow_tasks_.emplace([task]() { (*task)(); }); } @@ -348,10 +340,9 @@ class HighPerformancePool if (stop_.load(std::memory_order_acquire)) { - throw std::runtime_error("ThreadPool is shutting down"); + throw std::runtime_error("HighPerformancePool is shutting down"); } - // Distribute batch across worker queues size_t queue_idx = next_victim_.fetch_add(batch_size, std::memory_order_relaxed) % num_threads_; for (auto it = begin; it != end; ++it) @@ -359,7 +350,6 @@ class HighPerformancePool auto task = std::make_shared>(*it); futures.push_back(task->get_future()); - // Try to place in worker queue, round-robin style bool queued = false; for (size_t attempts = 0; attempts < num_threads_; ++attempts) { @@ -373,13 +363,11 @@ class HighPerformancePool if (!queued) { - // Overflow to global queue std::lock_guard lock(overflow_mutex_); overflow_tasks_.emplace([task]() { (*task)(); }); } } - // Wake up workers for the batch wakeup_condition_.notify_all(); return futures; } @@ -394,7 +382,6 @@ class HighPerformancePool if (total_items == 0) return; - // Calculate optimal chunk size for cache efficiency size_t const chunk_size = (std::max)(size_t(1), total_items / (num_threads_ * 4)); std::vector> futures; @@ -412,7 +399,6 @@ class HighPerformancePool it = chunk_end; } - // Wait for all chunks to complete for (auto& future : futures) { future.wait(); @@ -514,7 +500,7 @@ class HighPerformancePool std::lock_guard lock(overflow_mutex_); if (stop_.exchange(true, std::memory_order_acq_rel)) { - return; // Already shutting down + return; } } @@ -573,11 +559,9 @@ class HighPerformancePool std::vector workers_; std::vector>> worker_queues_; - // Overflow queue for when worker queues are full std::queue overflow_tasks_; mutable std::mutex overflow_mutex_; - // Synchronization std::atomic stop_; std::condition_variable wakeup_condition_; std::mutex wakeup_mutex_; @@ -585,19 +569,17 @@ class HighPerformancePool std::condition_variable completion_condition_; std::mutex completion_mutex_; - // Load balancing and statistics std::atomic next_victim_; std::atomic active_tasks_{0}; std::atomic completed_tasks_{0}; std::atomic stolen_tasks_{0}; - std::atomic total_task_time_{0}; // microseconds + std::atomic total_task_time_{0}; std::chrono::steady_clock::time_point start_time_; // NOLINTNEXTLINE(readability-function-cognitive-complexity) void worker_function(size_t worker_id) { - // Thread-local random number generator for work stealing thread_local std::mt19937 gen = []() { std::random_device device; return std::mt19937(device()); @@ -610,12 +592,10 @@ class HighPerformancePool { bool found_task = false; - // 1. Try to get task from own queue (fast path) if (worker_queues_[worker_id]->pop(task)) { found_task = true; } - // 2. Try to steal from other workers (limit attempts to reduce contention) else { size_t const max_steal_attempts = (std::min)(num_threads_, size_t(4)); @@ -631,7 +611,6 @@ class HighPerformancePool } } - // 3. Try overflow queue if (!found_task) { std::lock_guard lock(overflow_mutex_); @@ -645,7 +624,6 @@ class HighPerformancePool if (found_task) { - // Execute task with timing active_tasks_.fetch_add(1, std::memory_order_relaxed); auto const start_time = std::chrono::steady_clock::now(); @@ -655,7 +633,6 @@ class HighPerformancePool } catch (...) { - // Log exception or handle as needed } auto const end_time = std::chrono::steady_clock::now(); @@ -669,13 +646,11 @@ class HighPerformancePool } else { - // No work found, check if we should stop if (stop_.load(std::memory_order_acquire)) { break; } - // Wait for work with adaptive timeout std::unique_lock lock(wakeup_mutex_); wakeup_condition_.wait_for(lock, std::chrono::microseconds(100)); } @@ -683,68 +658,100 @@ class HighPerformancePool } }; +// --------------------------------------------------------------------------- +// Wait policies for ThreadPoolBase +// --------------------------------------------------------------------------- + /** - * @brief Single-queue thread pool with optimized locking for medium workloads. + * @brief Wait policy that blocks indefinitely until work is available. * - * Alternative to @ref HighPerformancePool for cases where work-stealing overhead is - * not justified. All tasks share one std::queue protected by a single mutex, - * which keeps per-task overhead low while still scaling to multiple workers. + * Workers consume zero CPU while idle but wake instantly when a task is + * enqueued. Used by the @c ThreadPool type alias. + */ +struct IndefiniteWait +{ + template + static auto wait(std::condition_variable& cv, Lock& lock, Pred pred) -> bool + { + cv.wait(lock, pred); + return true; + } +}; + +/** + * @brief Wait policy that polls with a 10 ms timeout. * - * Best for: Medium workloads (100-10k tasks), consistent task patterns where - * work-stealing complexity is not needed but better performance than the basic - * @ref ThreadPool is desired. + * Workers periodically re-check the queue even without notification, trading + * a small amount of CPU for lower wake-up latency under bursty workloads. + * Used by the @c FastThreadPool type alias. + */ +struct PollingWait +{ + template + static auto wait(std::condition_variable& cv, Lock& lock, Pred pred) -> bool + { + return cv.wait_for(lock, std::chrono::milliseconds(10), pred); + } +}; + +// --------------------------------------------------------------------------- +// ThreadPoolBase +// --------------------------------------------------------------------------- + +/** + * @brief Single-queue thread pool parameterized by its idle-wait strategy. + * + * All tasks share one std::queue protected by a single mutex. The + * @p WaitPolicy template parameter controls how workers wait for new + * work: + * - @ref IndefiniteWait - blocks on condition_variable::wait() (zero CPU + * while idle, instant wake). Instantiated as @c ThreadPool. + * - @ref PollingWait - polls with condition_variable::wait_for(10 ms). + * Slightly higher idle CPU but lower worst-case latency under bursty + * loads. Instantiated as @c FastThreadPool. * * @par How task execution works * When you call submit(), the callable is wrapped in a std::packaged_task, - * pushed into the single shared task queue under a mutex lock, and one - * sleeping worker is woken via condition_variable::notify_one(). The woken - * worker pops the front element from the queue and executes it. If the queue - * is empty when a worker wakes up, it goes back to sleep with a 10 ms - * timeout before checking again. + * pushed into the shared task queue under a mutex lock, and one sleeping + * worker is woken via condition_variable::notify_one(). The woken worker + * pops the front element and executes it. * * @par Execution guarantees * - Every successfully submitted task (submit() returned without throwing) - * is guaranteed to eventually execute, as long as the pool is not - * destroyed while shutdown() is draining remaining work. + * is guaranteed to eventually execute. * - submit() throws std::runtime_error if the pool is already shutting - * down. In that case the task is NOT enqueued and will NOT execute. - * - Tasks are stored in a FIFO queue, so they are picked up roughly in - * submission order. However, since multiple workers pop concurrently, - * the actual completion order is non-deterministic. + * down. In that case the task is NOT enqueued. + * - Tasks are stored in a FIFO queue. Multiple workers pop concurrently, + * so submission order is roughly preserved but completion order is + * non-deterministic. * - The returned std::future becomes ready once the task finishes. If the - * task threw an exception, future.get() rethrows it. The worker thread - * itself is not affected and continues processing further tasks. + * task threw an exception, future.get() rethrows it. * - On shutdown(), workers finish their current task, then drain all - * remaining queued tasks before exiting. Tasks submitted before - * shutdown() are guaranteed to execute. + * remaining queued tasks before exiting. + * - wait_for_tasks() blocks until the queue is empty AND no worker is + * currently executing a task. * * @par Thread safety * submit() and submit_batch() may be called from any thread concurrently. * shutdown() is internally guarded and safe to call more than once. * - * @par Polling / wake-up - * Workers use condition_variable::wait_for with a 10 ms timeout, so an idle - * worker may take up to 10 ms to notice the stop flag after shutdown() is - * called. - * * @par Exception handling * Exceptions thrown by tasks are caught inside the worker loop. They are * stored in the std::future returned by submit(). The worker thread * continues processing. * - * @par Configuration return type - * configure_threads() and set_affinity() return bool (not - * expected as in @ref HighPerformancePool). A return - * value of false means at least one worker could not be configured. - * * @par Lifetime * The destructor calls shutdown() and joins all worker threads. Can block * if tasks are still running. * * @par Copyability / movability * Not copyable, not movable. + * + * @tparam WaitPolicy Strategy type with a static + * @c wait(cv, lock, predicate) -> bool method. */ -class FastThreadPool +template +class ThreadPoolBase { public: using Task = std::function; @@ -759,28 +766,28 @@ class FastThreadPool std::chrono::microseconds avg_task_time; }; - explicit FastThreadPool(size_t num_threads = std::thread::hardware_concurrency()) - : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false), start_time_(std::chrono::steady_clock::now()) + explicit ThreadPoolBase(size_t num_threads = std::thread::hardware_concurrency()) + : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false), + start_time_(std::chrono::steady_clock::now()) { workers_.reserve(num_threads_); - // Create worker threads for (size_t i = 0; i < num_threads_; ++i) { - workers_.emplace_back(&FastThreadPool::worker_function, this, i); + workers_.emplace_back(&ThreadPoolBase::worker_function, this, i); } } - FastThreadPool(FastThreadPool const&) = delete; - auto operator=(FastThreadPool const&) -> FastThreadPool& = delete; + ThreadPoolBase(ThreadPoolBase const&) = delete; + auto operator=(ThreadPoolBase const&) -> ThreadPoolBase& = delete; - ~FastThreadPool() + ~ThreadPoolBase() { shutdown(); } /** - * @brief Optimized task submission with minimal locking + * @brief Submit a task to the thread pool */ template auto submit(F&& f, Args&&... args) -> std::future> @@ -796,7 +803,7 @@ class FastThreadPool std::lock_guard lock(queue_mutex_); if (stop_) { - throw std::runtime_error("FastThreadPool is shutting down"); + throw std::runtime_error("Pool is shutting down"); } tasks_.emplace([task]() { (*task)(); }); } @@ -806,20 +813,19 @@ class FastThreadPool } /** - * @brief Efficient batch processing + * @brief Submit multiple tasks under a single lock acquisition */ template auto submit_batch(Iterator begin, Iterator end) -> std::vector> { std::vector> futures; - size_t const batch_size = std::distance(begin, end); - futures.reserve(batch_size); + futures.reserve(std::distance(begin, end)); { std::lock_guard lock(queue_mutex_); if (stop_) { - throw std::runtime_error("FastThreadPool is shutting down"); + throw std::runtime_error("Pool is shutting down"); } for (auto it = begin; it != end; ++it) @@ -830,35 +836,46 @@ class FastThreadPool } } - // Wake up all workers for batch processing condition_.notify_all(); return futures; } - void shutdown() + /** + * @brief Apply a function to a range of values in parallel + */ + template + void parallel_for_each(Iterator begin, Iterator end, F&& func) { + std::vector> futures; + futures.reserve(std::distance(begin, end)); + + for (auto it = begin; it != end; ++it) { - std::lock_guard lock(queue_mutex_); - if (stop_) - return; - stop_ = true; + futures.push_back(submit([func, it]() { func(*it); })); } - condition_.notify_all(); - - for (auto& worker : workers_) + for (auto& future : futures) { - if (worker.joinable()) - { - worker.join(); - } + future.wait(); } + } - workers_.clear(); + [[nodiscard]] auto size() const noexcept -> size_t + { + return num_threads_; } + [[nodiscard]] auto pending_tasks() const -> size_t + { + std::lock_guard lock(queue_mutex_); + return tasks_.size(); + } + + /** + * @brief Configure all worker threads (name, scheduling policy, priority) + */ auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> bool + ThreadPriority priority = ThreadPriority::normal()) -> expected { bool success = true; @@ -866,64 +883,62 @@ class FastThreadPool { std::string const thread_name = name_prefix + "_" + std::to_string(i); - if (!workers_[i].set_name(thread_name)) + if (!workers_[i].set_name(thread_name).has_value()) { success = false; } - if (!workers_[i].set_scheduling_policy(policy, priority)) + if (!workers_[i].set_scheduling_policy(policy, priority).has_value()) { success = false; } } - - return success; + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); } - auto set_affinity(ThreadAffinity const& affinity) -> bool + /** + * @brief Set CPU affinity for all worker threads + */ + auto set_affinity(ThreadAffinity const& affinity) -> expected { bool success = true; for (auto& worker : workers_) { - if (!worker.set_affinity(affinity)) + if (!worker.set_affinity(affinity).has_value()) { success = false; } } - - return success; + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); } - auto distribute_across_cpus() -> bool + /** + * @brief Distribute workers across available CPUs (round-robin) + */ + auto distribute_across_cpus() -> expected { auto const cpu_count = std::thread::hardware_concurrency(); if (cpu_count == 0) - return false; + return unexpected(std::make_error_code(std::errc::invalid_argument)); bool success = true; for (size_t i = 0; i < workers_.size(); ++i) { ThreadAffinity affinity({static_cast(i % cpu_count)}); - if (!workers_[i].set_affinity(affinity)) + if (!workers_[i].set_affinity(affinity).has_value()) { success = false; } } - - return success; - } - - [[nodiscard]] auto size() const noexcept -> size_t - { - return num_threads_; - } - - [[nodiscard]] auto pending_tasks() const -> size_t - { - std::lock_guard lock(queue_mutex_); - return tasks_.size(); + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); } void wait_for_tasks() @@ -933,6 +948,31 @@ class FastThreadPool lock, [this] { return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; }); } + void shutdown() + { + { + std::lock_guard lock(queue_mutex_); + if (stop_) + return; + stop_ = true; + } + + condition_.notify_all(); + + for (auto& worker : workers_) + { + if (worker.joinable()) + { + worker.join(); + } + } + + workers_.clear(); + } + + /** + * @brief Get performance statistics + */ [[nodiscard]] auto get_statistics() const -> Statistics { auto const now = std::chrono::steady_clock::now(); @@ -978,11 +1018,11 @@ class FastThreadPool std::atomic stop_; std::atomic active_tasks_{0}; std::atomic completed_tasks_{0}; - std::atomic total_task_time_{0}; // microseconds + std::atomic total_task_time_{0}; std::chrono::steady_clock::time_point start_time_; - void worker_function(size_t /* worker_id */) + void worker_function(size_t /*worker_id*/) { while (true) { @@ -992,8 +1032,7 @@ class FastThreadPool { std::unique_lock lock(queue_mutex_); - if (condition_.wait_for(lock, std::chrono::milliseconds(10), - [this] { return stop_ || !tasks_.empty(); })) + if (WaitPolicy::wait(condition_, lock, [this] { return stop_ || !tasks_.empty(); })) { if (stop_ && tasks_.empty()) { @@ -1039,426 +1078,64 @@ class FastThreadPool }; /** - * @brief Simple, general-purpose thread pool. - * - * This is a straightforward thread pool implementation suitable for: - * - Simple workloads with low task counts (< 1k tasks) - * - General application use (50k-500k tasks/second) - * - Simple task submission patterns - * - Lower memory overhead and complexity - * - Easier to understand and debug + * @brief General-purpose thread pool with indefinite blocking wait. * - * For high-throughput scenarios (> 1k tasks), consider @ref FastThreadPool or - * @ref HighPerformancePool. + * Workers block on condition_variable::wait() when idle - zero CPU + * consumption, instant wake-up on task submission. Suitable for most + * workloads. * - * @par How task execution works - * When you call submit(), the callable is wrapped in a std::packaged_task - * and pushed into a single shared std::queue under a mutex lock. One - * sleeping worker is then woken via condition_variable::notify_one(). The - * woken worker pops the front task from the queue and executes it. Workers - * block indefinitely on the condition_variable when the queue is empty (no - * polling timeout), so they consume zero CPU while idle. - * - * @par Execution guarantees - * - Every successfully submitted task (submit() returned without throwing) - * is guaranteed to eventually execute. - * - submit() throws std::runtime_error if the pool is already shutting - * down. In that case the task is NOT enqueued. - * - Tasks are stored in a FIFO queue. Multiple workers pop concurrently, so - * submission order is roughly preserved but completion order is - * non-deterministic. - * - The returned std::future becomes ready once the task finishes. If the - * task threw an exception, future.get() rethrows it. - * - On shutdown(), the stop flag is set and all workers are woken. Each - * worker finishes its current task and then exits only if the queue is - * empty. This means all tasks that were enqueued before shutdown() are - * guaranteed to execute. - * - wait_for_tasks() blocks until the queue is empty AND no worker is - * currently executing a task. - * - * @par Thread safety - * submit() may be called from any thread concurrently. All task-queue access - * is serialized through queue_mutex_. - * - * @par Wake-up behaviour - * Workers block on a std::condition_variable (no polling timeout), so they - * consume no CPU while idle but wake instantly when a task is enqueued. - * - * @par Internal counter note - * Unlike @ref FastThreadPool and @ref HighPerformancePool, active_tasks_ and - * completed_tasks_ are incremented/decremented while queue_mutex_ is held. - * This means they are always consistent with the queue size, but every task - * completion acquires the mutex an extra time. - * - * @par Exception handling - * Exceptions thrown by tasks are caught inside the worker loop. They are - * stored in the std::future returned by submit(). The worker thread - * continues processing. - * - * @par Lifetime - * The destructor calls shutdown() and joins all worker threads. Can block - * if tasks are still running. - * - * @par Copyability / movability - * Not copyable, not movable. + * @see ThreadPoolBase, IndefiniteWait */ -class ThreadPool -{ - public: - using Task = std::function; - - struct Statistics - { - size_t total_threads; - size_t active_threads; - size_t pending_tasks; - size_t completed_tasks; - }; - - explicit ThreadPool(size_t num_threads = std::thread::hardware_concurrency()) - : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false) - { - workers_.reserve(num_threads_); - - // Create worker threads - for (size_t i = 0; i < num_threads_; ++i) - { - workers_.emplace_back(&ThreadPool::worker_function, this); - } - } - - ThreadPool(ThreadPool const&) = delete; - auto operator=(ThreadPool const&) -> ThreadPool& = delete; - - ~ThreadPool() - { - shutdown(); - } - - /** - * @brief Submit a task to the thread pool - */ - template - auto submit(F&& f, Args&&... args) -> std::future> - { - using return_type = std::invoke_result_t; - - auto task = std::make_shared>( - std::bind(std::forward(f), std::forward(args)...)); - - std::future result = task->get_future(); - - { - std::lock_guard lock(queue_mutex_); - - if (stop_) - { - throw std::runtime_error("ThreadPool is shutting down"); - } - - tasks_.emplace([task]() { (*task)(); }); - } - - condition_.notify_one(); - return result; - } - - /** - * @brief Submit multiple tasks - */ - template - auto submit_range(Iterator begin, Iterator end) -> std::vector> - { - std::vector> futures; - futures.reserve(std::distance(begin, end)); - - for (auto it = begin; it != end; ++it) - { - futures.push_back(submit(*it)); - } - - return futures; - } - - /** - * @brief Apply a function to a range of values in parallel - */ - template - void parallel_for_each(Iterator begin, Iterator end, F&& func) - { - std::vector> futures; - futures.reserve(std::distance(begin, end)); - - for (auto it = begin; it != end; ++it) - { - futures.push_back(submit([func, it]() { func(*it); })); - } - - // Wait for all tasks to complete - for (auto& future : futures) - { - future.wait(); - } - } - - [[nodiscard]] auto size() const noexcept -> size_t - { - return num_threads_; - } - - [[nodiscard]] auto pending_tasks() const -> size_t - { - std::lock_guard lock(queue_mutex_); - return tasks_.size(); - } - - /** - * @brief Configure thread properties - */ - auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> bool - { - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - std::string const thread_name = name_prefix + "_" + std::to_string(i); - - if (!workers_[i].set_name(thread_name)) - { - success = false; - } - - if (!workers_[i].set_scheduling_policy(policy, priority)) - { - success = false; - } - } - - return success; - } - - auto set_affinity(ThreadAffinity const& affinity) -> bool - { - bool success = true; - - for (auto& worker : workers_) - { - if (!worker.set_affinity(affinity)) - { - success = false; - } - } - - return success; - } - - auto distribute_across_cpus() -> bool - { - auto const cpu_count = std::thread::hardware_concurrency(); - if (cpu_count == 0) - return false; - - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - ThreadAffinity affinity({static_cast(i % cpu_count)}); - if (!workers_[i].set_affinity(affinity)) - { - success = false; - } - } - - return success; - } - - void wait_for_tasks() - { - std::unique_lock lock(queue_mutex_); - task_finished_condition_.wait(lock, [this] { return tasks_.empty() && active_tasks_ == 0; }); - } - - void shutdown() - { - { - std::lock_guard lock(queue_mutex_); - if (stop_) - return; - stop_ = true; - } - - condition_.notify_all(); - - for (auto& worker : workers_) - { - if (worker.joinable()) - { - worker.join(); - } - } - - workers_.clear(); - } - - [[nodiscard]] auto get_statistics() const -> Statistics - { - std::lock_guard lock(queue_mutex_); - Statistics stats; - stats.total_threads = num_threads_; - stats.active_threads = active_tasks_; - stats.pending_tasks = tasks_.size(); - stats.completed_tasks = completed_tasks_; - return stats; - } - - private: - size_t num_threads_; - std::vector workers_; - std::queue tasks_; - - mutable std::mutex queue_mutex_; - std::condition_variable condition_; - std::condition_variable task_finished_condition_; - std::atomic stop_; - std::atomic active_tasks_{0}; - std::atomic completed_tasks_{0}; - - void worker_function() - { - while (true) - { - Task task; - - { - std::unique_lock lock(queue_mutex_); - - condition_.wait(lock, [this] { return stop_ || !tasks_.empty(); }); - - if (stop_ && tasks_.empty()) - { - return; - } - - task = std::move(tasks_.front()); - tasks_.pop(); - ++active_tasks_; - } - - try - { - task(); - } - catch (...) - { - // Log exception or handle as needed - } - - { - std::lock_guard lock(queue_mutex_); - --active_tasks_; - ++completed_tasks_; - } - - task_finished_condition_.notify_all(); - } - } -}; +using ThreadPool = ThreadPoolBase; /** - * @brief Singleton accessor for a process-wide @ref ThreadPool instance. + * @brief Thread pool with 10 ms polling wait for lower wake-up latency. * - * Provides static convenience methods that forward to a single @ref ThreadPool - * whose lifetime is managed as a function-local static (Meyer's singleton). + * Workers poll with condition_variable::wait_for(10 ms), trading a small + * amount of idle CPU for more consistent latency under bursty workloads. * - * @par Thread safety - * The underlying @ref ThreadPool is created on the first call to instance() and is - * guaranteed to be thread-safe in C++11 and later (magic statics). All - * forwarded methods (submit, submit_range, parallel_for_each) are as - * thread-safe as the corresponding @ref ThreadPool methods. - * - * @par Pool size - * The pool is created with @c std::thread::hardware_concurrency() threads. - * This size is fixed for the lifetime of the process; there is no API to - * resize the singleton pool after creation. - * - * @par Static destruction order - * Because the pool is a function-local static, it is destroyed during static - * destruction in reverse order of construction. Submitting work to the global - * pool from destructors of other static objects is undefined behaviour if the - * pool has already been destroyed. Prefer explicit lifetime management in - * programs with complex static initialization dependencies. - * - * @par Copyability / movability - * Not instantiable (private constructor). All access is through static - * methods. + * @see ThreadPoolBase, PollingWait */ -class GlobalThreadPool -{ - public: - static auto instance() -> ThreadPool& - { - static ThreadPool pool(std::thread::hardware_concurrency()); - return pool; - } - - template - static auto submit(F&& f, Args&&... args) - { - return instance().submit(std::forward(f), std::forward(args)...); - } - - template - static auto submit_range(Iterator begin, Iterator end) - { - return instance().submit_range(begin, end); - } - - template - static void parallel_for_each(Iterator begin, Iterator end, F&& func) - { - instance().parallel_for_each(begin, end, std::forward(func)); - } +using FastThreadPool = ThreadPoolBase; - private: - GlobalThreadPool() = default; -}; +// --------------------------------------------------------------------------- +// GlobalPool +// --------------------------------------------------------------------------- /** - * @brief Singleton accessor for a process-wide @ref HighPerformancePool instance. + * @brief Singleton accessor for a process-wide pool instance. * - * Provides static convenience methods that forward to a single - * @ref HighPerformancePool whose lifetime is managed as a function-local static - * (Meyer's singleton). + * Provides static convenience methods that forward to a single pool + * whose lifetime is managed as a function-local static (Meyer's singleton). * * @par Thread safety * The underlying pool is created on the first call to instance() and is * guaranteed to be thread-safe in C++11 and later (magic statics). All - * forwarded methods (submit, submit_batch, parallel_for_each) are as - * thread-safe as the corresponding @ref HighPerformancePool methods. + * forwarded methods are as thread-safe as the corresponding pool methods. * * @par Pool size * The pool is created with @c std::thread::hardware_concurrency() threads. - * This size is fixed for the lifetime of the process; there is no API to - * resize the singleton pool after creation. + * This size is fixed for the lifetime of the process. * * @par Static destruction order * Because the pool is a function-local static, it is destroyed during static * destruction in reverse order of construction. Submitting work to the global * pool from destructors of other static objects is undefined behaviour if the - * pool has already been destroyed. Prefer explicit lifetime management in - * programs with complex static initialization dependencies. + * pool has already been destroyed. * * @par Copyability / movability * Not instantiable (private constructor). All access is through static * methods. + * + * @tparam PoolType The concrete pool type to wrap. */ -class GlobalHighPerformancePool +template +class GlobalPool { public: - static auto instance() -> HighPerformancePool& + static auto instance() -> PoolType& { - static HighPerformancePool pool(std::thread::hardware_concurrency()); + static PoolType pool(std::thread::hardware_concurrency()); return pool; } @@ -1481,9 +1158,15 @@ class GlobalHighPerformancePool } private: - GlobalHighPerformancePool() = default; + GlobalPool() = default; }; +/** @brief Singleton @ref ThreadPool accessor. */ +using GlobalThreadPool = GlobalPool; + +/** @brief Singleton @ref HighPerformancePool accessor. */ +using GlobalHighPerformancePool = GlobalPool; + /** * @brief Convenience wrapper that applies a callable to every element of a * container in parallel using the @ref GlobalThreadPool singleton. @@ -1495,17 +1178,6 @@ class GlobalHighPerformancePool * * The call blocks until every element has been processed. * - * @par Thread safety - * The function itself is thread-safe (it forwards to @ref GlobalThreadPool which - * guards its queue with a mutex). However, the caller must ensure that - * concurrent invocations of @p func on different elements do not race on - * shared state. - * - * @par Pool lifetime - * On the first call, GlobalThreadPool::instance() lazily creates the - * singleton pool sized to @c std::thread::hardware_concurrency(). See - * @ref GlobalThreadPool for static-destruction-order caveats. - * * @tparam Container Any type exposing begin() / end() iterators. * @tparam F Callable compatible with @c void(Container::value_type&). * diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp index 1694cd4..36b5c61 100644 --- a/include/threadschedule/thread_pool_with_errors.hpp +++ b/include/threadschedule/thread_pool_with_errors.hpp @@ -8,10 +8,9 @@ namespace threadschedule { /** - * @brief @ref HighPerformancePool combined with an @ref ErrorHandler. + * @brief Thread pool wrapper that combines any pool type with an @ref ErrorHandler. * - * Non-copyable, non-movable. Thread-safe (delegates to the underlying - * @ref HighPerformancePool). + * Non-copyable, non-movable. Thread-safe (delegates to the underlying pool). * * submit() wraps every task so that exceptions are both reported to * the @ref ErrorHandler (via registered callbacks) **and** re-thrown, making @@ -20,11 +19,15 @@ namespace threadschedule * description string to the error report for easier diagnostics. * * @see FutureWithErrorHandler, ErrorHandler, TaskError + * + * @tparam PoolType The underlying pool type (e.g. ThreadPool, + * FastThreadPool, HighPerformancePool). */ -class HighPerformancePoolWithErrors +template +class PoolWithErrors { public: - explicit HighPerformancePoolWithErrors(size_t num_threads = std::thread::hardware_concurrency()) + explicit PoolWithErrors(size_t num_threads = std::thread::hardware_concurrency()) : pool_(num_threads), error_handler_(std::make_shared()) { } @@ -86,167 +89,6 @@ class HighPerformancePoolWithErrors return FutureWithErrorHandler>(std::move(future)); } - /** - * @brief Add a global error callback for all tasks - */ - auto add_error_callback(ErrorCallback callback) -> size_t - { - return error_handler_->add_callback(std::move(callback)); - } - - /** - * @brief Clear all error callbacks - */ - void clear_error_callbacks() - { - error_handler_->clear_callbacks(); - } - - /** - * @brief Get total error count - */ - [[nodiscard]] auto error_count() const -> size_t - { - return error_handler_->error_count(); - } - - /** - * @brief Reset error count - */ - void reset_error_count() - { - error_handler_->reset_error_count(); - } - - /** - * @brief Get the underlying pool - */ - [[nodiscard]] auto pool() -> HighPerformancePool& - { - return pool_; - } - - /** - * @brief Get statistics from underlying pool - */ - [[nodiscard]] auto get_statistics() const -> HighPerformancePool::Statistics - { - return pool_.get_statistics(); - } - - /** - * @brief Configure threads - */ - auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> expected - { - return pool_.configure_threads(name_prefix, policy, priority); - } - - auto set_affinity(ThreadAffinity const& affinity) -> expected - { - return pool_.set_affinity(affinity); - } - - auto distribute_across_cpus() -> expected - { - return pool_.distribute_across_cpus(); - } - - void shutdown() - { - pool_.shutdown(); - } - - void wait_for_tasks() - { - pool_.wait_for_tasks(); - } - - [[nodiscard]] auto size() const noexcept -> size_t - { - return pool_.size(); - } - - [[nodiscard]] auto pending_tasks() const -> size_t - { - return pool_.pending_tasks(); - } - - private: - HighPerformancePool pool_; - std::shared_ptr error_handler_; -}; - -/** - * @brief FastThreadPool combined with an ErrorHandler. - * - * Non-copyable, non-movable. Thread-safe (delegates to the underlying - * FastThreadPool). Same error-handling semantics as - * HighPerformancePoolWithErrors: exceptions are reported to the - * ErrorHandler **and** re-thrown through the future. - * - * @see HighPerformancePoolWithErrors for detailed behaviour. - */ -class FastThreadPoolWithErrors -{ - public: - explicit FastThreadPoolWithErrors(size_t num_threads = std::thread::hardware_concurrency()) - : pool_(num_threads), error_handler_(std::make_shared()) - { - } - - template - auto submit(F&& f, Args&&... args) -> FutureWithErrorHandler> - { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); - } - - template - auto submit_with_description(std::string const& description, F&& f, Args&&... args) - -> FutureWithErrorHandler> - { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler, - description]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.task_description = description; - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); - } - auto add_error_callback(ErrorCallback callback) -> size_t { return error_handler_->add_callback(std::move(callback)); @@ -267,28 +109,28 @@ class FastThreadPoolWithErrors error_handler_->reset_error_count(); } - [[nodiscard]] auto pool() -> FastThreadPool& + [[nodiscard]] auto pool() -> PoolType& { return pool_; } - [[nodiscard]] auto get_statistics() const -> FastThreadPool::Statistics + [[nodiscard]] auto get_statistics() const -> decltype(auto) { return pool_.get_statistics(); } auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> bool + ThreadPriority priority = ThreadPriority::normal()) -> decltype(auto) { return pool_.configure_threads(name_prefix, policy, priority); } - auto set_affinity(ThreadAffinity const& affinity) -> bool + auto set_affinity(ThreadAffinity const& affinity) -> decltype(auto) { return pool_.set_affinity(affinity); } - auto distribute_across_cpus() -> bool + auto distribute_across_cpus() -> decltype(auto) { return pool_.distribute_across_cpus(); } @@ -314,148 +156,17 @@ class FastThreadPoolWithErrors } private: - FastThreadPool pool_; + PoolType pool_; std::shared_ptr error_handler_; }; -/** - * @brief ThreadPool combined with an ErrorHandler. - * - * Non-copyable, non-movable. Thread-safe (delegates to the underlying - * ThreadPool). Same error-handling semantics as - * HighPerformancePoolWithErrors: exceptions are reported to the - * ErrorHandler **and** re-thrown through the future. - * - * @see HighPerformancePoolWithErrors for detailed behaviour. - */ -class ThreadPoolWithErrors -{ - public: - explicit ThreadPoolWithErrors(size_t num_threads = std::thread::hardware_concurrency()) - : pool_(num_threads), error_handler_(std::make_shared()) - { - } - - template - auto submit(F&& f, Args&&... args) -> FutureWithErrorHandler> - { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); - } - - template - auto submit_with_description(std::string const& description, F&& f, Args&&... args) - -> FutureWithErrorHandler> - { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler, - description]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.task_description = description; - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); - } - - auto add_error_callback(ErrorCallback callback) -> size_t - { - return error_handler_->add_callback(std::move(callback)); - } - - void clear_error_callbacks() - { - error_handler_->clear_callbacks(); - } - - [[nodiscard]] auto error_count() const -> size_t - { - return error_handler_->error_count(); - } - - void reset_error_count() - { - error_handler_->reset_error_count(); - } - - [[nodiscard]] auto pool() -> ThreadPool& - { - return pool_; - } - - [[nodiscard]] auto get_statistics() const -> ThreadPool::Statistics - { - return pool_.get_statistics(); - } - - auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> bool - { - return pool_.configure_threads(name_prefix, policy, priority); - } +/** @brief @ref HighPerformancePool with integrated error handling. */ +using HighPerformancePoolWithErrors = PoolWithErrors; - auto set_affinity(ThreadAffinity const& affinity) -> bool - { - return pool_.set_affinity(affinity); - } +/** @brief @ref FastThreadPool with integrated error handling. */ +using FastThreadPoolWithErrors = PoolWithErrors; - auto distribute_across_cpus() -> bool - { - return pool_.distribute_across_cpus(); - } - - void wait_for_tasks() - { - pool_.wait_for_tasks(); - } - - void shutdown() - { - pool_.shutdown(); - } - - [[nodiscard]] auto size() const noexcept -> size_t - { - return pool_.size(); - } - - [[nodiscard]] auto pending_tasks() const -> size_t - { - return pool_.pending_tasks(); - } - - private: - ThreadPool pool_; - std::shared_ptr error_handler_; -}; +/** @brief @ref ThreadPool with integrated error handling. */ +using ThreadPoolWithErrors = PoolWithErrors; } // namespace threadschedule diff --git a/include/threadschedule/thread_registry.hpp b/include/threadschedule/thread_registry.hpp index 3a0dae5..bb35270 100644 --- a/include/threadschedule/thread_registry.hpp +++ b/include/threadschedule/thread_registry.hpp @@ -159,67 +159,18 @@ class ThreadControlBlock [[nodiscard]] auto set_affinity(ThreadAffinity const& affinity) const -> expected { #ifdef _WIN32 - if (!handle_) - return unexpected(std::make_error_code(std::errc::no_such_process)); - using SetThreadGroupAffinityFn = BOOL(WINAPI*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (hMod) - { - auto set_group_affinity = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "SetThreadGroupAffinity"))); - if (set_group_affinity && affinity.has_any()) - { - GROUP_AFFINITY ga{}; - ga.Mask = static_cast(affinity.get_mask()); - ga.Group = affinity.get_group(); - if (set_group_affinity(handle_, &ga, nullptr) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); - } - } - DWORD_PTR mask = static_cast(affinity.get_mask()); - if (SetThreadAffinityMask(handle_, mask) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::apply_affinity(handle_, affinity); #else - if (pthread_setaffinity_np(pthreadHandle_, sizeof(cpu_set_t), &affinity.native_handle()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_affinity(pthreadHandle_, affinity); #endif } [[nodiscard]] auto set_priority(ThreadPriority priority) const -> expected { #ifdef _WIN32 - if (!handle_) - return unexpected(std::make_error_code(std::errc::no_such_process)); - int win_priority; - int prio_val = priority.value(); - if (prio_val <= -10) - win_priority = THREAD_PRIORITY_IDLE; - else if (prio_val <= -5) - win_priority = THREAD_PRIORITY_LOWEST; - else if (prio_val < 0) - win_priority = THREAD_PRIORITY_BELOW_NORMAL; - else if (prio_val == 0) - win_priority = THREAD_PRIORITY_NORMAL; - else if (prio_val <= 5) - win_priority = THREAD_PRIORITY_ABOVE_NORMAL; - else if (prio_val <= 10) - win_priority = THREAD_PRIORITY_HIGHEST; - else - win_priority = THREAD_PRIORITY_TIME_CRITICAL; - if (SetThreadPriority(handle_, win_priority) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::apply_priority(handle_, priority); #else - const int policy = SCHED_OTHER; - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (pthread_setschedparam(pthreadHandle_, policy, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_priority(pthreadHandle_, priority); #endif } @@ -227,15 +178,9 @@ class ThreadControlBlock -> expected { #ifdef _WIN32 - return set_priority(priority); + return detail::apply_scheduling_policy(handle_, policy, priority); #else - const int policy_int = static_cast(policy); - auto params_result = SchedulerParams::create_for_policy(policy, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (pthread_setschedparam(pthreadHandle_, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_scheduling_policy(pthreadHandle_, policy, priority); #endif } diff --git a/include/threadschedule/thread_wrapper.hpp b/include/threadschedule/thread_wrapper.hpp index 0f055dd..df270c0 100644 --- a/include/threadschedule/thread_wrapper.hpp +++ b/include/threadschedule/thread_wrapper.hpp @@ -279,118 +279,18 @@ class BaseThreadWrapper : protected detail::ThreadStorage expected { -#ifdef _WIN32 - const auto handle = native_handle(); - // Map ThreadPriority to Windows priority - // Windows thread priorities range from -15 (THREAD_PRIORITY_IDLE) to +15 (THREAD_PRIORITY_TIME_CRITICAL) - // We'll map the priority value to Windows constants - int win_priority; - int prio_val = priority.value(); - - if (prio_val <= -10) - { - win_priority = THREAD_PRIORITY_IDLE; - } - else if (prio_val <= -5) - { - win_priority = THREAD_PRIORITY_LOWEST; - } - else if (prio_val < 0) - { - win_priority = THREAD_PRIORITY_BELOW_NORMAL; - } - else if (prio_val == 0) - { - win_priority = THREAD_PRIORITY_NORMAL; - } - else if (prio_val <= 5) - { - win_priority = THREAD_PRIORITY_ABOVE_NORMAL; - } - else if (prio_val <= 10) - { - win_priority = THREAD_PRIORITY_HIGHEST; - } - else - { - win_priority = THREAD_PRIORITY_TIME_CRITICAL; - } - - if (SetThreadPriority(handle, win_priority) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); -#else - const auto handle = native_handle(); - int const policy = SCHED_OTHER; - - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); - - if (!params_result.has_value()) - { - return unexpected(params_result.error()); - } - - if (pthread_setschedparam(handle, policy, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); -#endif + return detail::apply_priority(native_handle(), priority); } [[nodiscard]] auto set_scheduling_policy(SchedulingPolicy policy, ThreadPriority priority) -> expected { -#ifdef _WIN32 - // Windows doesn't have the same scheduling policy concept as Linux - // We'll just set the priority and return success - return set_priority(priority); -#else - const auto handle = native_handle(); - int const policy_int = static_cast(policy); - - auto params_result = SchedulerParams::create_for_policy(policy, priority); - if (!params_result.has_value()) - { - return unexpected(params_result.error()); - } - - if (pthread_setschedparam(handle, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); -#endif + return detail::apply_scheduling_policy(native_handle(), policy, priority); } [[nodiscard]] auto set_affinity(ThreadAffinity const& affinity) -> expected { -#ifdef _WIN32 - const auto handle = native_handle(); - // Prefer Group Affinity if available - using SetThreadGroupAffinityFn = BOOL(WINAPI*)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (hMod) - { - auto set_group_affinity = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "SetThreadGroupAffinity"))); - if (set_group_affinity && affinity.has_any()) - { - GROUP_AFFINITY ga{}; - ga.Mask = static_cast(affinity.get_mask()); - ga.Group = affinity.get_group(); - if (set_group_affinity(handle, &ga, nullptr) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); - } - } - // Fallback to legacy mask (single-group systems) - DWORD_PTR mask = static_cast(affinity.get_mask()); - if (SetThreadAffinityMask(handle, mask) != 0) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); -#else - const auto handle = native_handle(); - if (pthread_setaffinity_np(handle, sizeof(cpu_set_t), &affinity.native_handle()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); -#endif + return detail::apply_affinity(native_handle(), affinity); } [[nodiscard]] auto get_affinity() const -> std::optional @@ -610,14 +510,9 @@ class ThreadWrapper : public BaseThreadWrapper static auto create_with_config(std::string const& name, SchedulingPolicy policy, ThreadPriority priority, F&& f, Args&&... args) -> ThreadWrapper { - ThreadWrapper wrapper(std::forward(f), std::forward(args)...); - if (auto r = wrapper.set_name(name); !r.has_value()) - { - } - if (auto r = wrapper.set_scheduling_policy(policy, priority); !r.has_value()) - { - } + (void)wrapper.set_name(name); + (void)wrapper.set_scheduling_policy(policy, priority); return wrapper; } }; @@ -765,14 +660,9 @@ class JThreadWrapper : public BaseThreadWrapper static auto create_with_config(std::string const& name, SchedulingPolicy policy, ThreadPriority priority, F&& f, Args&&... args) -> JThreadWrapper { - JThreadWrapper wrapper(std::forward(f), std::forward(args)...); - if (auto r = wrapper.set_name(name); !r.has_value()) - { - } - if (auto r = wrapper.set_scheduling_policy(policy, priority); !r.has_value()) - { - } + (void)wrapper.set_name(name); + (void)wrapper.set_scheduling_policy(policy, priority); return wrapper; } }; @@ -977,13 +867,7 @@ class ThreadByNameView #else if (!found()) return unexpected(std::make_error_code(std::errc::no_such_process)); - int const policy = SCHED_OTHER; - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (sched_setscheduler(handle_, policy, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_priority(handle_, priority); #endif } @@ -995,13 +879,7 @@ class ThreadByNameView #else if (!found()) return unexpected(std::make_error_code(std::errc::no_such_process)); - int policy_int = static_cast(policy); - auto params_result = SchedulerParams::create_for_policy(policy, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (sched_setscheduler(handle_, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_scheduling_policy(handle_, policy, priority); #endif } @@ -1012,9 +890,7 @@ class ThreadByNameView #else if (!found()) return unexpected(std::make_error_code(std::errc::no_such_process)); - if (sched_setaffinity(handle_, sizeof(cpu_set_t), &affinity.native_handle()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_affinity(handle_, affinity); #endif } diff --git a/include/threadschedule/threadschedule.hpp b/include/threadschedule/threadschedule.hpp index c698e43..06b293d 100644 --- a/include/threadschedule/threadschedule.hpp +++ b/include/threadschedule/threadschedule.hpp @@ -55,6 +55,9 @@ using ts::ErrorHandler; using ts::FastThreadPool; using ts::FastThreadPoolWithErrors; using ts::FutureWithErrorHandler; +using ts::GlobalHighPerformancePool; +using ts::GlobalPool; +using ts::GlobalThreadPool; using ts::HighPerformancePool; using ts::HighPerformancePoolWithErrors; using ts::JThreadWrapper; @@ -70,7 +73,9 @@ using ts::TaskError; using ts::ThreadAffinity; using ts::ThreadByNameView; using ts::ThreadPool; +using ts::ThreadPoolBase; using ts::ThreadPoolWithErrors; +using ts::PoolWithErrors; using ts::ThreadPriority; using ts::ThreadProfile; using ts::ThreadWrapper; From 10e7d0dcdd8e2f5be51a05aa8b0a74aa28dc0791 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 16:09:28 +0200 Subject: [PATCH 02/15] Refactor thread pool and worker configurations for improved maintainability - Centralized thread configuration methods in `detail` namespace, reducing code duplication and enhancing clarity. - Removed the `FutureWithErrorHandler` specialization, streamlining the handling of void futures. - Deduplicated thread naming and affinity methods across various classes, improving consistency. - Introduced a new `QueryFacadeMixin` for `CompositeThreadRegistry`, simplifying query operations. - Updated CHANGELOG to reflect internal improvements and code reductions across multiple files. --- CHANGELOG.md | 24 +++ include/threadschedule/error_handler.hpp | 80 +-------- include/threadschedule/pthread_wrapper.hpp | 21 +-- include/threadschedule/scheduler_policy.hpp | 108 ++++++++++++ include/threadschedule/thread_pool.hpp | 158 +++++++---------- include/threadschedule/thread_registry.hpp | 177 ++++++++++---------- include/threadschedule/thread_wrapper.hpp | 110 +----------- 7 files changed, 293 insertions(+), 385 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1b01d1..29f1f26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -80,6 +80,30 @@ auto futures = pool.submit_range(tasks.begin(), tasks.end()); auto futures = pool.submit_batch(tasks.begin(), tasks.end()); ``` +### Internal improvements (v2.0.0 continued) + +- **Pool worker configuration deduplicated**: `configure_threads()`, + `set_affinity()`, `distribute_across_cpus()` in `HighPerformancePool` and + `ThreadPoolBase` now delegate to shared `detail::configure_worker_threads`, + `detail::set_worker_affinity`, `detail::distribute_workers_across_cpus` + templates. + +- **Thread naming/affinity reading centralized**: `set_name()`, `get_name()`, + `get_affinity()` across `BaseThreadWrapper`, `PThreadWrapper`, and + `ThreadControlBlock` now delegate to `detail::apply_name`, + `detail::read_name`, `detail::read_affinity` in `scheduler_policy.hpp`. + +- **`FutureWithErrorHandler` specialization removed**: The primary + template now handles both `T` and `void` via `if constexpr`, eliminating + ~70 lines of duplicated code. No API change. + +- **`CompositeThreadRegistry` facade deduplicated**: The 12 query facade + methods (filter, map, for_each, find_if, any, all, none, take, skip, count, + empty, apply) are now inherited from `detail::QueryFacadeMixin` + CRTP base. No API change. + +- Net reduction: ~116 lines across 6 files. + ## v1.4.1 - Fix: `*WrapperReg` types (`ThreadWrapperReg`, `JThreadWrapperReg`, diff --git a/include/threadschedule/error_handler.hpp b/include/threadschedule/error_handler.hpp index b3e3fb6..ebc4161 100644 --- a/include/threadschedule/error_handler.hpp +++ b/include/threadschedule/error_handler.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace threadschedule @@ -327,14 +328,17 @@ class FutureWithErrorHandler * If the underlying future holds an exception, the error callback (if any) * is called **before** the exception is re-thrown to the caller. * - * @return The stored value of type @p T. + * @return The stored value of type @p T (void when @p T is @c void). * @throws Any exception stored in the underlying @c std::future. */ auto get() -> T { try { - return future_.get(); + if constexpr (std::is_void_v) + future_.get(); + else + return future_.get(); } catch (...) { @@ -398,76 +402,4 @@ class FutureWithErrorHandler bool has_callback_{false}; }; -/** - * @brief Specialization of FutureWithErrorHandler for @c void futures. - * - * Behaves identically to the primary template except that get() returns - * @c void instead of a value. - * - * @see FutureWithErrorHandler - */ -template <> -class FutureWithErrorHandler -{ - public: - explicit FutureWithErrorHandler(std::future future) : future_(std::move(future)), error_callback_(nullptr) - { - } - - FutureWithErrorHandler(FutureWithErrorHandler const&) = delete; - auto operator=(FutureWithErrorHandler const&) -> FutureWithErrorHandler& = delete; - FutureWithErrorHandler(FutureWithErrorHandler&&) = default; - auto operator=(FutureWithErrorHandler&&) -> FutureWithErrorHandler& = default; - - auto on_error(std::function callback) -> FutureWithErrorHandler& - { - error_callback_ = std::move(callback); - has_callback_ = true; - return *this; - } - - void get() - { - try - { - future_.get(); - } - catch (...) - { - if (has_callback_ && error_callback_) - { - error_callback_(std::current_exception()); - } - throw; - } - } - - void wait() const - { - future_.wait(); - } - - template - auto wait_for(std::chrono::duration const& timeout_duration) const - { - return future_.wait_for(timeout_duration); - } - - template - auto wait_until(std::chrono::time_point const& timeout_time) const - { - return future_.wait_until(timeout_time); - } - - [[nodiscard]] auto valid() const -> bool - { - return future_.valid(); - } - - private: - std::future future_; - std::function error_callback_; - bool has_callback_{}; -}; - } // namespace threadschedule diff --git a/include/threadschedule/pthread_wrapper.hpp b/include/threadschedule/pthread_wrapper.hpp index 86c8b4d..db6485a 100644 --- a/include/threadschedule/pthread_wrapper.hpp +++ b/include/threadschedule/pthread_wrapper.hpp @@ -146,24 +146,14 @@ class PThreadWrapper return thread_; } - // Extended pthread functionality [[nodiscard]] auto set_name(std::string const& name) const -> expected { - if (name.length() > 15) - return expected(unexpect, std::make_error_code(std::errc::invalid_argument)); - if (pthread_setname_np(thread_, name.c_str()) == 0) - return {}; - return expected(unexpect, std::error_code(errno, std::generic_category())); + return detail::apply_name(thread_, name); } [[nodiscard]] auto get_name() const -> std::optional { - char name[16]; // Linux limit + 1 - if (pthread_getname_np(thread_, name, sizeof(name)) == 0) - { - return std::string(name); - } - return std::nullopt; + return detail::read_name(thread_); } [[nodiscard]] auto set_priority(ThreadPriority priority) const -> expected @@ -184,12 +174,7 @@ class PThreadWrapper [[nodiscard]] auto get_affinity() const -> std::optional { - ThreadAffinity affinity; - if (pthread_getaffinity_np(thread_, sizeof(cpu_set_t), const_cast(&affinity.native_handle())) == 0) - { - return affinity; - } - return std::nullopt; + return detail::read_affinity(thread_); } // Cancellation support diff --git a/include/threadschedule/scheduler_policy.hpp b/include/threadschedule/scheduler_policy.hpp index 3b201fd..cb75892 100644 --- a/include/threadschedule/scheduler_policy.hpp +++ b/include/threadschedule/scheduler_policy.hpp @@ -3,6 +3,7 @@ #include "expected.hpp" #include #include +#include #include #include #include @@ -553,6 +554,79 @@ inline auto apply_affinity(HANDLE handle, ThreadAffinity const& affinity) -> exp return unexpected(std::make_error_code(std::errc::operation_not_permitted)); } +inline auto apply_name(HANDLE handle, std::string const& name) -> expected +{ + if (!handle) + return unexpected(std::make_error_code(std::errc::no_such_process)); + using SetThreadDescriptionFn = HRESULT(WINAPI*)(HANDLE, PCWSTR); + HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); + if (!hMod) + return unexpected(std::make_error_code(std::errc::function_not_supported)); + auto set_desc = reinterpret_cast( + reinterpret_cast(GetProcAddress(hMod, "SetThreadDescription"))); + if (!set_desc) + return unexpected(std::make_error_code(std::errc::function_not_supported)); + std::wstring wide(name.begin(), name.end()); + if (SUCCEEDED(set_desc(handle, wide.c_str()))) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +inline auto read_name(HANDLE handle) -> std::optional +{ + if (!handle) + return std::nullopt; + using GetThreadDescriptionFn = HRESULT(WINAPI*)(HANDLE, PWSTR*); + HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); + if (!hMod) + return std::nullopt; + auto get_desc = reinterpret_cast( + reinterpret_cast(GetProcAddress(hMod, "GetThreadDescription"))); + if (!get_desc) + return std::nullopt; + PWSTR thread_name = nullptr; + if (SUCCEEDED(get_desc(handle, &thread_name)) && thread_name) + { + int size = WideCharToMultiByte(CP_UTF8, 0, thread_name, -1, nullptr, 0, nullptr, nullptr); + if (size > 0) + { + std::string result(size - 1, '\0'); + WideCharToMultiByte(CP_UTF8, 0, thread_name, -1, &result[0], size, nullptr, nullptr); + LocalFree(thread_name); + return result; + } + LocalFree(thread_name); + } + return std::nullopt; +} + +inline auto read_affinity(HANDLE handle) -> std::optional +{ + if (!handle) + return std::nullopt; + using GetThreadGroupAffinityFn = BOOL(WINAPI*)(HANDLE, PGROUP_AFFINITY); + HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); + if (!hMod) + return std::nullopt; + auto get_group_affinity = reinterpret_cast( + reinterpret_cast(GetProcAddress(hMod, "GetThreadGroupAffinity"))); + if (!get_group_affinity) + return std::nullopt; + GROUP_AFFINITY ga{}; + if (get_group_affinity(handle, &ga) != 0) + { + ThreadAffinity affinity; + for (int i = 0; i < 64; ++i) + { + if ((ga.Mask & (static_cast(1) << i)) != 0) + affinity.add_cpu(static_cast(ga.Group) * 64 + i); + } + if (affinity.has_any()) + return affinity; + } + return std::nullopt; +} + #else // POSIX // --- pthread_t overloads (BaseThreadWrapper, ThreadControlBlock, PThreadWrapper) --- @@ -587,6 +661,40 @@ inline auto apply_affinity(pthread_t handle, ThreadAffinity const& affinity) -> return unexpected(std::error_code(errno, std::generic_category())); } +inline auto apply_name(pthread_t handle, std::string const& name) -> expected +{ + if (name.length() > 15) + return unexpected(std::make_error_code(std::errc::invalid_argument)); + if (pthread_setname_np(handle, name.c_str()) == 0) + return {}; + return unexpected(std::error_code(errno, std::generic_category())); +} + +inline auto read_name(pthread_t handle) -> std::optional +{ + char name[16]; + if (pthread_getname_np(handle, name, sizeof(name)) == 0) + return std::string(name); + return std::nullopt; +} + +inline auto read_affinity(pthread_t handle) -> std::optional +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + if (pthread_getaffinity_np(handle, sizeof(cpu_set_t), &cpuset) == 0) + { + std::vector cpus; + for (int i = 0; i < CPU_SETSIZE; ++i) + { + if (CPU_ISSET(i, &cpuset)) + cpus.push_back(i); + } + return ThreadAffinity(cpus); + } + return std::nullopt; +} + // --- pid_t / TID overloads (ThreadByNameView) --- inline auto apply_priority(pid_t tid, ThreadPriority priority) -> expected diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index 5a488f2..c26e6f2 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -16,6 +16,62 @@ namespace threadschedule { +namespace detail +{ + +template +inline auto configure_worker_threads(WorkerRange& workers, std::string const& name_prefix, SchedulingPolicy policy, + ThreadPriority priority) -> expected +{ + bool success = true; + for (size_t i = 0; i < workers.size(); ++i) + { + std::string const thread_name = name_prefix + "_" + std::to_string(i); + if (!workers[i].set_name(thread_name).has_value()) + success = false; + if (!workers[i].set_scheduling_policy(policy, priority).has_value()) + success = false; + } + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +template +inline auto set_worker_affinity(WorkerRange& workers, ThreadAffinity const& affinity) -> expected +{ + bool success = true; + for (auto& worker : workers) + { + if (!worker.set_affinity(affinity).has_value()) + success = false; + } + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +template +inline auto distribute_workers_across_cpus(WorkerRange& workers) -> expected +{ + auto const cpu_count = std::thread::hardware_concurrency(); + if (cpu_count == 0) + return unexpected(std::make_error_code(std::errc::invalid_argument)); + + bool success = true; + for (size_t i = 0; i < workers.size(); ++i) + { + ThreadAffinity affinity({static_cast(i % cpu_count)}); + if (!workers[i].set_affinity(affinity).has_value()) + success = false; + } + if (success) + return {}; + return unexpected(std::make_error_code(std::errc::operation_not_permitted)); +} + +} // namespace detail + /** * @brief Work-stealing deque for per-thread task queues in a thread pool. * @@ -429,62 +485,17 @@ class HighPerformancePool auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) -> expected { - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - std::string const thread_name = name_prefix + "_" + std::to_string(i); - - if (!workers_[i].set_name(thread_name).has_value()) - { - success = false; - } - - if (!workers_[i].set_scheduling_policy(policy, priority).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::configure_worker_threads(workers_, name_prefix, policy, priority); } auto set_affinity(ThreadAffinity const& affinity) -> expected { - bool success = true; - - for (auto& worker : workers_) - { - if (!worker.set_affinity(affinity).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::set_worker_affinity(workers_, affinity); } auto distribute_across_cpus() -> expected { - auto const cpu_count = std::thread::hardware_concurrency(); - if (cpu_count == 0) - return unexpected(std::make_error_code(std::errc::invalid_argument)); - - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - ThreadAffinity affinity({static_cast(i % cpu_count)}); - if (!workers_[i].set_affinity(affinity).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::distribute_workers_across_cpus(workers_); } void wait_for_tasks() @@ -877,25 +888,7 @@ class ThreadPoolBase auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) -> expected { - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - std::string const thread_name = name_prefix + "_" + std::to_string(i); - - if (!workers_[i].set_name(thread_name).has_value()) - { - success = false; - } - - if (!workers_[i].set_scheduling_policy(policy, priority).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::configure_worker_threads(workers_, name_prefix, policy, priority); } /** @@ -903,18 +896,7 @@ class ThreadPoolBase */ auto set_affinity(ThreadAffinity const& affinity) -> expected { - bool success = true; - - for (auto& worker : workers_) - { - if (!worker.set_affinity(affinity).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::set_worker_affinity(workers_, affinity); } /** @@ -922,23 +904,7 @@ class ThreadPoolBase */ auto distribute_across_cpus() -> expected { - auto const cpu_count = std::thread::hardware_concurrency(); - if (cpu_count == 0) - return unexpected(std::make_error_code(std::errc::invalid_argument)); - - bool success = true; - - for (size_t i = 0; i < workers_.size(); ++i) - { - ThreadAffinity affinity({static_cast(i % cpu_count)}); - if (!workers_[i].set_affinity(affinity).has_value()) - { - success = false; - } - } - if (success) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::distribute_workers_across_cpus(workers_); } void wait_for_tasks() diff --git a/include/threadschedule/thread_registry.hpp b/include/threadschedule/thread_registry.hpp index bb35270..699a830 100644 --- a/include/threadschedule/thread_registry.hpp +++ b/include/threadschedule/thread_registry.hpp @@ -187,26 +187,9 @@ class ThreadControlBlock [[nodiscard]] auto set_name(std::string const& name) const -> expected { #ifdef _WIN32 - if (!handle_) - return unexpected(std::make_error_code(std::errc::no_such_process)); - using SetThreadDescriptionFn = HRESULT(WINAPI*)(HANDLE, PCWSTR); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (!hMod) - return unexpected(std::make_error_code(std::errc::function_not_supported)); - auto set_desc = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "SetThreadDescription"))); - if (!set_desc) - return unexpected(std::make_error_code(std::errc::function_not_supported)); - std::wstring wide(name.begin(), name.end()); - if (SUCCEEDED(set_desc(handle_, wide.c_str()))) - return {}; - return unexpected(std::make_error_code(std::errc::operation_not_permitted)); + return detail::apply_name(handle_, name); #else - if (name.length() > 15) - return unexpected(std::make_error_code(std::errc::invalid_argument)); - if (pthread_setname_np(pthreadHandle_, name.c_str()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return detail::apply_name(pthreadHandle_, name); #endif } @@ -795,6 +778,89 @@ inline auto build_mode_string() -> char const* return is_runtime_build ? "runtime" : "header-only"; } +namespace detail +{ + +/** + * @brief CRTP mixin that provides functional-style query facade methods. + * + * The derived class must implement a public @c query() method returning a + * @ref ThreadRegistry::QueryView. All facade methods (filter, map, for_each, + * find_if, any, all, none, take, skip, count, empty, apply) delegate to it. + * + * @tparam Derived CRTP derived type. + */ +template +class QueryFacadeMixin +{ + auto self() const -> Derived const& { return static_cast(*this); } + + public: + template + [[nodiscard]] auto filter(Predicate&& pred) const -> ThreadRegistry::QueryView + { + return self().query().filter(std::forward(pred)); + } + + [[nodiscard]] auto count() const -> size_t { return self().query().count(); } + + [[nodiscard]] auto empty() const -> bool { return self().query().empty(); } + + template + void for_each(Fn&& fn) const + { + self().query().for_each(std::forward(fn)); + } + + template + void apply(Predicate&& pred, Fn&& fn) const + { + self().query().filter(std::forward(pred)).for_each(std::forward(fn)); + } + + template + [[nodiscard]] auto map(Fn&& fn) const -> std::vector> + { + return self().query().map(std::forward(fn)); + } + + template + [[nodiscard]] auto find_if(Predicate&& pred) const -> std::optional + { + return self().query().find_if(std::forward(pred)); + } + + template + [[nodiscard]] auto any(Predicate&& pred) const -> bool + { + return self().query().any(std::forward(pred)); + } + + template + [[nodiscard]] auto all(Predicate&& pred) const -> bool + { + return self().query().all(std::forward(pred)); + } + + template + [[nodiscard]] auto none(Predicate&& pred) const -> bool + { + return self().query().none(std::forward(pred)); + } + + [[nodiscard]] auto take(size_t n) const -> ThreadRegistry::QueryView + { + return self().query().take(n); + } + + [[nodiscard]] auto skip(size_t n) const -> ThreadRegistry::QueryView + { + return self().query().skip(n); + } +}; + +} // namespace detail + /** * @brief Aggregates multiple ThreadRegistry instances into a single queryable * view. @@ -824,9 +890,9 @@ inline auto build_mode_string() -> char const* * query() iterates over every attached registry, calls its own query(), and * concatenates the results into a single @ref ThreadRegistry::QueryView snapshot. * The same functional-style helpers (filter, map, for_each, etc.) are - * available directly on CompositeThreadRegistry for convenience. + * inherited from @ref detail::QueryFacadeMixin. */ -class CompositeThreadRegistry +class CompositeThreadRegistry : public detail::QueryFacadeMixin { public: void attach(ThreadRegistry* reg) @@ -837,7 +903,6 @@ class CompositeThreadRegistry registries_.push_back(reg); } - // Chainable query API [[nodiscard]] auto query() const -> ThreadRegistry::QueryView { std::vector merged; @@ -855,74 +920,6 @@ class CompositeThreadRegistry return ThreadRegistry::QueryView(std::move(merged)); } - template - [[nodiscard]] auto filter(Predicate&& pred) const -> ThreadRegistry::QueryView - { - return query().filter(std::forward(pred)); - } - - [[nodiscard]] auto count() const -> size_t - { - return query().count(); - } - - [[nodiscard]] auto empty() const -> bool - { - return query().empty(); - } - - template - void for_each(Fn&& fn) const - { - query().for_each(std::forward(fn)); - } - - template - void apply(Predicate&& pred, Fn&& fn) const - { - query().filter(std::forward(pred)).for_each(std::forward(fn)); - } - - template - [[nodiscard]] auto map(Fn&& fn) const -> std::vector> - { - return query().map(std::forward(fn)); - } - - template - [[nodiscard]] auto find_if(Predicate&& pred) const -> std::optional - { - return query().find_if(std::forward(pred)); - } - - template - [[nodiscard]] auto any(Predicate&& pred) const -> bool - { - return query().any(std::forward(pred)); - } - - template - [[nodiscard]] auto all(Predicate&& pred) const -> bool - { - return query().all(std::forward(pred)); - } - - template - [[nodiscard]] auto none(Predicate&& pred) const -> bool - { - return query().none(std::forward(pred)); - } - - [[nodiscard]] auto take(size_t n) const -> ThreadRegistry::QueryView - { - return query().take(n); - } - - [[nodiscard]] auto skip(size_t n) const -> ThreadRegistry::QueryView - { - return query().skip(n); - } - private: mutable std::mutex mutex_; std::vector registries_; diff --git a/include/threadschedule/thread_wrapper.hpp b/include/threadschedule/thread_wrapper.hpp index df270c0..2847cb5 100644 --- a/include/threadschedule/thread_wrapper.hpp +++ b/include/threadschedule/thread_wrapper.hpp @@ -202,79 +202,14 @@ class BaseThreadWrapper : protected detail::ThreadStorage expected { -#ifdef _WIN32 - // Windows supports longer thread names. Try SetThreadDescription dynamically. - auto const handle = native_handle(); - std::wstring wide_name(name.begin(), name.end()); - - using SetThreadDescriptionFn = HRESULT(WINAPI*)(HANDLE, PCWSTR); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (hMod) - { - auto set_desc = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "SetThreadDescription"))); - if (set_desc) - { - if (SUCCEEDED(set_desc(handle, wide_name.c_str()))) - return expected(); - return expected(unexpect, std::make_error_code(std::errc::invalid_argument)); - } - } - // Fallback unavailable - return expected(unexpect, std::make_error_code(std::errc::function_not_supported)); -#else - if (name.length() > 15) - return expected(unexpect, std::make_error_code(std::errc::invalid_argument)); - - auto const handle = native_handle(); - if (pthread_setname_np(handle, name.c_str()) == 0) - return {}; - return expected(unexpect, std::error_code(errno, std::generic_category())); -#endif + return detail::apply_name(native_handle(), name); } [[nodiscard]] auto get_name() const -> std::optional { -#ifdef _WIN32 - const auto handle = const_cast(this)->native_handle(); - using GetThreadDescriptionFn = HRESULT(WINAPI*)(HANDLE, PWSTR*); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (hMod) - { - auto get_desc = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "GetThreadDescription"))); - if (get_desc) - { - PWSTR thread_name = nullptr; - HRESULT hr = get_desc(handle, &thread_name); - if (SUCCEEDED(hr) && thread_name) - { - int size = WideCharToMultiByte(CP_UTF8, 0, thread_name, -1, nullptr, 0, nullptr, nullptr); - if (size > 0) - { - std::string result(size - 1, '\0'); - WideCharToMultiByte(CP_UTF8, 0, thread_name, -1, &result[0], size, nullptr, nullptr); - LocalFree(thread_name); - return result; - } - LocalFree(thread_name); - } - } - } - return std::nullopt; -#else - char name[16]; // Linux limit + 1 - auto const handle = const_cast(this)->native_handle(); - - if (pthread_getname_np(handle, name, sizeof(name)) == 0) - { - return std::string(name); - } - return std::nullopt; -#endif + return detail::read_name(const_cast(this)->native_handle()); } [[nodiscard]] auto set_priority(ThreadPriority priority) -> expected @@ -295,46 +230,7 @@ class BaseThreadWrapper : protected detail::ThreadStorage std::optional { -#ifdef _WIN32 - const auto handle = const_cast(this)->native_handle(); - using GetThreadGroupAffinityFn = BOOL(WINAPI*)(HANDLE, PGROUP_AFFINITY); - HMODULE hMod = GetModuleHandleW(L"kernel32.dll"); - if (hMod) - { - auto get_group_affinity = reinterpret_cast( - reinterpret_cast(GetProcAddress(hMod, "GetThreadGroupAffinity"))); - if (get_group_affinity) - { - GROUP_AFFINITY ga{}; - if (get_group_affinity(handle, &ga) != 0) - { - ThreadAffinity affinity; - for (int i = 0; i < 64; ++i) - { - if ((ga.Mask & (static_cast(1) << i)) != 0) - { - affinity.add_cpu(static_cast(ga.Group) * 64 + i); - } - } - if (affinity.has_any()) - { - return affinity; - } - return std::nullopt; - } - } - return std::nullopt; - } -#else - ThreadAffinity affinity; - auto const handle = const_cast(this)->native_handle(); - - if (pthread_getaffinity_np(handle, sizeof(cpu_set_t), &affinity.native_handle()) == 0) - { - return affinity; - } - return std::nullopt; -#endif + return detail::read_affinity(const_cast(this)->native_handle()); } // Nice value (process-level, affects all threads) From b727db9e225652aa0af3e71e6ae61a19ec075dc6 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 16:19:23 +0200 Subject: [PATCH 03/15] Refactor thread management and error handling for improved clarity and maintainability - Consolidated `ThreadRegistry` methods to inherit from `detail::QueryFacadeMixin`, reducing code duplication in query operations. - Streamlined POSIX scheduling helpers by merging `apply_priority` and `apply_scheduling_policy` into a shared implementation. - Simplified error handling in `PoolWithErrors` by introducing a new static factory method `TaskError::capture()` to centralize exception capturing. - Reduced duplicated logic in `ThreadRegistry::register_current_thread` by delegating to a private `try_register` method. - Updated CHANGELOG to reflect these enhancements and code reductions across multiple files. --- CHANGELOG.md | 26 +- include/threadschedule/error_handler.hpp | 26 +- include/threadschedule/scheduler_policy.hpp | 51 ++- .../thread_pool_with_errors.hpp | 63 ++-- include/threadschedule/thread_registry.hpp | 328 ++++++------------ 5 files changed, 202 insertions(+), 292 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29f1f26..273677a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,7 +102,31 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); empty, apply) are now inherited from `detail::QueryFacadeMixin` CRTP base. No API change. -- Net reduction: ~116 lines across 6 files. +- **`ThreadRegistry` inherits `detail::QueryFacadeMixin`**: The 12 facade + methods (filter, map, for_each, find_if, any, all, none, take, skip, count, + empty, apply) are now provided by the same CRTP mixin as + `CompositeThreadRegistry`, eliminating the duplicate implementations. + +- **POSIX scheduling helpers consolidated**: `apply_priority` and + `apply_scheduling_policy` for both `pthread_t` and `pid_t` now share a + common `detail::apply_sched_params` template, eliminating duplicated param + validation and error handling. + +- **`ThreadRegistry::register_current_thread` consolidated**: Both overloads + now delegate to a private `try_register(RegisteredThreadInfo)` method, + removing the duplicated lock/emplace/callback logic. + +- **`PoolWithErrors` submit methods consolidated**: `submit()` and + `submit_with_description()` now delegate to a private `submit_impl` with + optional description parameter. + +- **`TaskError::capture()` factory**: New static factory method centralizes + the repeated exception/thread_id/timestamp capture pattern. Used by + `ErrorHandledTask` and `PoolWithErrors`. + +- **`ThreadControlBlock` native handle accessor**: Private `native_handle()` + method replaces four identical `#ifdef _WIN32` dispatch blocks in the + set_affinity/set_priority/set_scheduling_policy/set_name methods. ## v1.4.1 diff --git a/include/threadschedule/error_handler.hpp b/include/threadschedule/error_handler.hpp index ebc4161..c9a30a0 100644 --- a/include/threadschedule/error_handler.hpp +++ b/include/threadschedule/error_handler.hpp @@ -37,6 +37,22 @@ struct TaskError /** @brief Monotonic timestamp recorded immediately after the exception was caught. */ std::chrono::steady_clock::time_point timestamp; + /** + * @brief Capture the current in-flight exception into a TaskError. + * + * Must be called inside a @c catch block. Fills exception, thread_id, + * and timestamp; optionally sets task_description. + */ + static auto capture(std::string description = {}) -> TaskError + { + TaskError err; + err.exception = std::current_exception(); + err.task_description = std::move(description); + err.thread_id = std::this_thread::get_id(); + err.timestamp = std::chrono::steady_clock::now(); + return err; + } + /** * @brief Extract the message string from the stored exception. * @@ -239,15 +255,7 @@ class ErrorHandledTask catch (...) { if (handler_) - { - TaskError error; - error.exception = std::current_exception(); - error.task_description = description_; - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - - handler_->handle_error(error); - } + handler_->handle_error(TaskError::capture(description_)); } } diff --git a/include/threadschedule/scheduler_policy.hpp b/include/threadschedule/scheduler_policy.hpp index cb75892..61a9dfc 100644 --- a/include/threadschedule/scheduler_policy.hpp +++ b/include/threadschedule/scheduler_policy.hpp @@ -629,29 +629,33 @@ inline auto read_affinity(HANDLE handle) -> std::optional #else // POSIX -// --- pthread_t overloads (BaseThreadWrapper, ThreadControlBlock, PThreadWrapper) --- +// --- shared implementation for pthread_t and pid_t scheduling --- -inline auto apply_priority(pthread_t handle, ThreadPriority priority) -> expected +template +inline auto apply_sched_params(SchedulingPolicy policy, ThreadPriority priority, SetSchedFn&& set_sched) + -> expected { - int const policy = SCHED_OTHER; - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); + int const policy_int = static_cast(policy); + auto params_result = SchedulerParams::create_for_policy(policy, priority); if (!params_result.has_value()) return unexpected(params_result.error()); - if (pthread_setschedparam(handle, policy, ¶ms_result.value()) == 0) + if (set_sched(policy_int, ¶ms_result.value()) == 0) return {}; return unexpected(std::error_code(errno, std::generic_category())); } +// --- pthread_t overloads (BaseThreadWrapper, ThreadControlBlock, PThreadWrapper) --- + inline auto apply_scheduling_policy(pthread_t handle, SchedulingPolicy policy, ThreadPriority priority) -> expected { - int const policy_int = static_cast(policy); - auto params_result = SchedulerParams::create_for_policy(policy, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (pthread_setschedparam(handle, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return apply_sched_params(policy, priority, + [handle](int p, sched_param* sp) { return pthread_setschedparam(handle, p, sp); }); +} + +inline auto apply_priority(pthread_t handle, ThreadPriority priority) -> expected +{ + return apply_scheduling_policy(handle, SchedulingPolicy::OTHER, priority); } inline auto apply_affinity(pthread_t handle, ThreadAffinity const& affinity) -> expected @@ -697,27 +701,16 @@ inline auto read_affinity(pthread_t handle) -> std::optional // --- pid_t / TID overloads (ThreadByNameView) --- -inline auto apply_priority(pid_t tid, ThreadPriority priority) -> expected +inline auto apply_scheduling_policy(pid_t tid, SchedulingPolicy policy, ThreadPriority priority) + -> expected { - int const policy = SCHED_OTHER; - auto params_result = SchedulerParams::create_for_policy(SchedulingPolicy::OTHER, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (sched_setscheduler(tid, policy, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return apply_sched_params(policy, priority, + [tid](int p, sched_param* sp) { return sched_setscheduler(tid, p, sp); }); } -inline auto apply_scheduling_policy(pid_t tid, SchedulingPolicy policy, ThreadPriority priority) - -> expected +inline auto apply_priority(pid_t tid, ThreadPriority priority) -> expected { - int const policy_int = static_cast(policy); - auto params_result = SchedulerParams::create_for_policy(policy, priority); - if (!params_result.has_value()) - return unexpected(params_result.error()); - if (sched_setscheduler(tid, policy_int, ¶ms_result.value()) == 0) - return {}; - return unexpected(std::error_code(errno, std::generic_category())); + return apply_scheduling_policy(tid, SchedulingPolicy::OTHER, priority); } inline auto apply_affinity(pid_t tid, ThreadAffinity const& affinity) -> expected diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp index 36b5c61..5e7e419 100644 --- a/include/threadschedule/thread_pool_with_errors.hpp +++ b/include/threadschedule/thread_pool_with_errors.hpp @@ -38,25 +38,7 @@ class PoolWithErrors template auto submit(F&& f, Args&&... args) -> FutureWithErrorHandler> { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); + return submit_impl({}, std::forward(f), std::forward(args)...); } /** @@ -66,27 +48,7 @@ class PoolWithErrors auto submit_with_description(std::string const& description, F&& f, Args&&... args) -> FutureWithErrorHandler> { - auto handler = error_handler_; - auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler, - description]() { - try - { - return std::apply(f, args); - } - catch (...) - { - TaskError error; - error.exception = std::current_exception(); - error.task_description = description; - error.thread_id = std::this_thread::get_id(); - error.timestamp = std::chrono::steady_clock::now(); - handler->handle_error(error); - throw; - } - }; - - auto future = pool_.submit(std::move(wrapped_task)); - return FutureWithErrorHandler>(std::move(future)); + return submit_impl(description, std::forward(f), std::forward(args)...); } auto add_error_callback(ErrorCallback callback) -> size_t @@ -156,6 +118,27 @@ class PoolWithErrors } private: + template + auto submit_impl(std::string description, F&& f, Args&&... args) + -> FutureWithErrorHandler> + { + auto handler = error_handler_; + auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler, + desc = std::move(description)]() { + try + { + return std::apply(f, args); + } + catch (...) + { + handler->handle_error(TaskError::capture(desc)); + throw; + } + }; + auto future = pool_.submit(std::move(wrapped_task)); + return FutureWithErrorHandler>(std::move(future)); + } + PoolType pool_; std::shared_ptr error_handler_; }; diff --git a/include/threadschedule/thread_registry.hpp b/include/threadschedule/thread_registry.hpp index 699a830..7dd5b9e 100644 --- a/include/threadschedule/thread_registry.hpp +++ b/include/threadschedule/thread_registry.hpp @@ -154,43 +154,36 @@ class ThreadControlBlock { return stdId_; } - // Removed name/component metadata from control block; metadata lives in RegisteredThreadInfo - - [[nodiscard]] auto set_affinity(ThreadAffinity const& affinity) const -> expected + private: + [[nodiscard]] auto native_handle() const { #ifdef _WIN32 - return detail::apply_affinity(handle_, affinity); + return handle_; #else - return detail::apply_affinity(pthreadHandle_, affinity); + return pthreadHandle_; #endif } + public: + [[nodiscard]] auto set_affinity(ThreadAffinity const& affinity) const -> expected + { + return detail::apply_affinity(native_handle(), affinity); + } + [[nodiscard]] auto set_priority(ThreadPriority priority) const -> expected { -#ifdef _WIN32 - return detail::apply_priority(handle_, priority); -#else - return detail::apply_priority(pthreadHandle_, priority); -#endif + return detail::apply_priority(native_handle(), priority); } [[nodiscard]] auto set_scheduling_policy(SchedulingPolicy policy, ThreadPriority priority) const -> expected { -#ifdef _WIN32 - return detail::apply_scheduling_policy(handle_, policy, priority); -#else - return detail::apply_scheduling_policy(pthreadHandle_, policy, priority); -#endif + return detail::apply_scheduling_policy(native_handle(), policy, priority); } [[nodiscard]] auto set_name(std::string const& name) const -> expected { -#ifdef _WIN32 - return detail::apply_name(handle_, name); -#else - return detail::apply_name(pthreadHandle_, name); -#endif + return detail::apply_name(native_handle(), name); } static auto create_for_current_thread() -> std::shared_ptr @@ -219,6 +212,86 @@ class ThreadControlBlock #endif }; +namespace detail +{ + +/** + * @brief CRTP mixin that provides functional-style query facade methods. + * + * The derived class must implement a public @c query() method returning a + * QueryView-like object. All facade methods (filter, map, for_each, + * find_if, any, all, none, take, skip, count, empty, apply) delegate to it. + * + * Return types are deduced via @c auto so the mixin can be used as a base + * class before the concrete QueryView type is fully defined (CRTP). + * + * @tparam Derived CRTP derived type. + */ +template +class QueryFacadeMixin +{ + auto self() const -> Derived const& { return static_cast(*this); } + + public: + template + [[nodiscard]] auto filter(Predicate&& pred) const + { + return self().query().filter(std::forward(pred)); + } + + [[nodiscard]] auto count() const -> size_t { return self().query().count(); } + + [[nodiscard]] auto empty() const -> bool { return self().query().empty(); } + + template + void for_each(Fn&& fn) const + { + self().query().for_each(std::forward(fn)); + } + + template + void apply(Predicate&& pred, Fn&& fn) const + { + self().query().filter(std::forward(pred)).for_each(std::forward(fn)); + } + + template + [[nodiscard]] auto map(Fn&& fn) const -> std::vector> + { + return self().query().map(std::forward(fn)); + } + + template + [[nodiscard]] auto find_if(Predicate&& pred) const -> std::optional + { + return self().query().find_if(std::forward(pred)); + } + + template + [[nodiscard]] auto any(Predicate&& pred) const -> bool + { + return self().query().any(std::forward(pred)); + } + + template + [[nodiscard]] auto all(Predicate&& pred) const -> bool + { + return self().query().all(std::forward(pred)); + } + + template + [[nodiscard]] auto none(Predicate&& pred) const -> bool + { + return self().query().none(std::forward(pred)); + } + + [[nodiscard]] auto take(size_t n) const { return self().query().take(n); } + + [[nodiscard]] auto skip(size_t n) const { return self().query().skip(n); } +}; + +} // namespace detail + /** * @brief Central registry of threads indexed by OS-level thread ID (Tid). * @@ -254,6 +327,8 @@ class ThreadControlBlock * query() returns a @ref QueryView holding a **snapshot** of the registry at the * moment of the call. Subsequent changes to the registry (new * registrations, unregistrations) are not reflected in an existing @ref QueryView. + * The same functional-style helpers (filter, map, for_each, etc.) are + * inherited from @ref detail::QueryFacadeMixin. * * @par Scheduling helpers * set_affinity(), set_priority(), set_scheduling_policy(), and set_name() @@ -261,43 +336,22 @@ class ThreadControlBlock * delegate to the control block. Returns @c std::errc::no_such_process if * the TID is not registered or has no control block. */ -class ThreadRegistry +class ThreadRegistry : public detail::QueryFacadeMixin { public: ThreadRegistry() = default; ThreadRegistry(ThreadRegistry const&) = delete; auto operator=(ThreadRegistry const&) -> ThreadRegistry& = delete; - // Register/unregister the CURRENT thread (to be called inside the running thread) void register_current_thread(std::string name = std::string(), std::string componentTag = std::string()) { - Tid const tid = ThreadInfo::get_thread_id(); RegisteredThreadInfo info; - info.tid = tid; + info.tid = ThreadInfo::get_thread_id(); info.stdId = std::this_thread::get_id(); info.name = std::move(name); info.componentTag = std::move(componentTag); info.alive = true; - - { - std::unique_lock lock(mutex_); - auto it = threads_.find(tid); - if (it == threads_.end()) - { - auto stored = info; // copy for callback - threads_.emplace(tid, std::move(info)); - if (onRegister_) - { - auto cb = onRegister_; - lock.unlock(); - cb(stored); - } - } - else - { - // Duplicate registration of the same TID is a no-op (first registration wins) - } - } + try_register(std::move(info)); } void register_current_thread(std::shared_ptr const& controlBlock, @@ -312,23 +366,7 @@ class ThreadRegistry info.componentTag = std::move(componentTag); info.alive = true; info.control = controlBlock; - std::unique_lock lock(mutex_); - auto it = threads_.find(info.tid); - if (it == threads_.end()) - { - auto stored = info; // copy for callback - threads_.emplace(info.tid, std::move(info)); - if (onRegister_) - { - auto cb = onRegister_; - lock.unlock(); - cb(stored); - } - } - else - { - // Duplicate registration of the same TID is a no-op (first registration wins) - } + try_register(std::move(info)); } void unregister_current_thread() @@ -528,74 +566,6 @@ class ThreadRegistry return QueryView(std::move(snapshot)); } - template - [[nodiscard]] auto filter(Predicate&& pred) const -> QueryView - { - return query().filter(std::forward(pred)); - } - - [[nodiscard]] auto count() const -> size_t - { - return query().count(); - } - - [[nodiscard]] auto empty() const -> bool - { - return query().empty(); - } - - template - void for_each(Fn&& fn) const - { - query().for_each(std::forward(fn)); - } - - template - void apply(Predicate&& pred, Fn&& fn) const - { - query().filter(std::forward(pred)).for_each(std::forward(fn)); - } - - template - [[nodiscard]] auto map(Fn&& fn) const -> std::vector> - { - return query().map(std::forward(fn)); - } - - template - [[nodiscard]] auto find_if(Predicate&& pred) const -> std::optional - { - return query().find_if(std::forward(pred)); - } - - template - [[nodiscard]] auto any(Predicate&& pred) const -> bool - { - return query().any(std::forward(pred)); - } - - template - [[nodiscard]] auto all(Predicate&& pred) const -> bool - { - return query().all(std::forward(pred)); - } - - template - [[nodiscard]] auto none(Predicate&& pred) const -> bool - { - return query().none(std::forward(pred)); - } - - [[nodiscard]] auto take(size_t n) const -> QueryView - { - return query().take(n); - } - - [[nodiscard]] auto skip(size_t n) const -> QueryView - { - return query().skip(n); - } - [[nodiscard]] auto set_affinity(Tid tid, ThreadAffinity const& affinity) const -> expected { auto blk = lock_block(tid); @@ -643,6 +613,22 @@ class ThreadRegistry } private: + void try_register(RegisteredThreadInfo info) + { + std::unique_lock lock(mutex_); + auto it = threads_.find(info.tid); + if (it != threads_.end()) + return; + auto stored = info; + threads_.emplace(info.tid, std::move(info)); + if (onRegister_) + { + auto cb = onRegister_; + lock.unlock(); + cb(stored); + } + } + [[nodiscard]] auto lock_block(Tid tid) const -> std::shared_ptr { std::shared_lock lock(mutex_); @@ -654,7 +640,6 @@ class ThreadRegistry mutable std::shared_mutex mutex_; std::unordered_map threads_; - // Integration hooks std::function onRegister_; std::function onUnregister_; }; @@ -778,89 +763,6 @@ inline auto build_mode_string() -> char const* return is_runtime_build ? "runtime" : "header-only"; } -namespace detail -{ - -/** - * @brief CRTP mixin that provides functional-style query facade methods. - * - * The derived class must implement a public @c query() method returning a - * @ref ThreadRegistry::QueryView. All facade methods (filter, map, for_each, - * find_if, any, all, none, take, skip, count, empty, apply) delegate to it. - * - * @tparam Derived CRTP derived type. - */ -template -class QueryFacadeMixin -{ - auto self() const -> Derived const& { return static_cast(*this); } - - public: - template - [[nodiscard]] auto filter(Predicate&& pred) const -> ThreadRegistry::QueryView - { - return self().query().filter(std::forward(pred)); - } - - [[nodiscard]] auto count() const -> size_t { return self().query().count(); } - - [[nodiscard]] auto empty() const -> bool { return self().query().empty(); } - - template - void for_each(Fn&& fn) const - { - self().query().for_each(std::forward(fn)); - } - - template - void apply(Predicate&& pred, Fn&& fn) const - { - self().query().filter(std::forward(pred)).for_each(std::forward(fn)); - } - - template - [[nodiscard]] auto map(Fn&& fn) const -> std::vector> - { - return self().query().map(std::forward(fn)); - } - - template - [[nodiscard]] auto find_if(Predicate&& pred) const -> std::optional - { - return self().query().find_if(std::forward(pred)); - } - - template - [[nodiscard]] auto any(Predicate&& pred) const -> bool - { - return self().query().any(std::forward(pred)); - } - - template - [[nodiscard]] auto all(Predicate&& pred) const -> bool - { - return self().query().all(std::forward(pred)); - } - - template - [[nodiscard]] auto none(Predicate&& pred) const -> bool - { - return self().query().none(std::forward(pred)); - } - - [[nodiscard]] auto take(size_t n) const -> ThreadRegistry::QueryView - { - return self().query().take(n); - } - - [[nodiscard]] auto skip(size_t n) const -> ThreadRegistry::QueryView - { - return self().query().skip(n); - } -}; - -} // namespace detail - /** * @brief Aggregates multiple ThreadRegistry instances into a single queryable * view. From d42e1e1046b674711698d8117933c6ce8ace2459 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 17:11:49 +0200 Subject: [PATCH 04/15] Enhance thread pool functionality and error handling features - Introduced quality-of-life improvements including stable callback management in `ErrorHandler` with `remove_callback(id)` and `has_callback(id)` methods. - Added non-throwing submission methods `try_submit()` and `try_submit_batch()` for all pool types, returning `expected, std::error_code>`. - Implemented chunked work distribution in `parallel_for_each` for better performance across thread pools. - Configured `HighPerformancePool` with adjustable deque capacity and pre-configuration of thread count via `GlobalPool::init(n)`. - Added C++20 ranges overloads for batch submissions and parallel processing. - Introduced cooperative cancellation support with `submit(stop_token, F, Args...)` overloads. - Added new future combinators in `futures.hpp` for enhanced future management. - Updated CHANGELOG to reflect these new features and improvements. --- CHANGELOG.md | 58 +- include/threadschedule/error_handler.hpp | 51 +- include/threadschedule/futures.hpp | 186 ++++++ include/threadschedule/task.hpp | 115 +++- include/threadschedule/thread_pool.hpp | 558 +++++++++++++++--- .../thread_pool_with_errors.hpp | 38 ++ include/threadschedule/threadschedule.hpp | 18 +- 7 files changed, 904 insertions(+), 120 deletions(-) create mode 100644 include/threadschedule/futures.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 273677a..f6ca2fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,12 +35,68 @@ - **`GlobalThreadPool`, `GlobalHighPerformancePool`** are now type aliases for `GlobalPool`. The public API is unchanged. +### Quality-of-Life Features + +- **`ErrorHandler::remove_callback(id)` / `has_callback(id)`** -- callbacks + are now stored in a `std::map` with stable IDs. Individual callbacks can be + removed without clearing all of them. + +- **`try_submit()` / `try_submit_batch()`** -- non-throwing submission for all + pool types, returning `expected, std::error_code>` instead of + throwing on shutdown. + +- **Chunked `parallel_for_each`** -- `ThreadPoolBase` now uses the same + chunked work distribution as `HighPerformancePool` via a shared + `detail::parallel_for_each_chunked` helper (one task per element is gone). + +- **`PollingWait`** -- tunable polling interval (default 10 ms). + `FastThreadPool` is `ThreadPoolBase>`. + +- **`HighPerformancePool` deque capacity** -- configurable via constructor: + `HighPerformancePool(threads, deque_capacity)`. + +- **`GlobalPool::init(n)`** -- pre-configure thread count before first use + (std::call_once semantics). + +- **C++20 ranges overloads** -- `submit_batch(range)`, `try_submit_batch(range)`, + `parallel_for_each(range, func)` on all pool types and GlobalPool. Guarded + by `__cpp_lib_ranges`. + +- **Auto-register pool workers** -- opt-in `register_workers` flag on both + pool constructors. Workers register/unregister automatically via + `AutoRegisterCurrentThread` RAII guard. + +- **Per-task tracing hooks** -- `set_on_task_start(callback)` and + `set_on_task_end(callback)` on both pool types. Callbacks receive timestamp, + thread ID, and (for end) elapsed duration. + +- **Cooperative cancellation** -- `submit(stop_token, F, Args...)` and + `try_submit(stop_token, F, Args...)` overloads. Tasks are skipped if stop is + requested. Guarded by `__cpp_lib_jthread`. + +- **Future combinators** -- new `futures.hpp` with `when_all`, `when_any`, + `when_all_settled` (typed and void specializations). + +- **Lifecycle modes** -- `ShutdownPolicy::drain` (default) and + `ShutdownPolicy::drop_pending`. `shutdown(policy)` replaces the old + no-argument `shutdown()`. `shutdown_for(timeout)` provides timed drain. + +- **Coroutine scheduler integration** -- `schedule_on{pool}` awaitable to hop + to a pool thread, `executor_base` / `pool_executor` type-erased + executor for pool-aware tasks, `run_on(pool, coro_fn)` convenience returning + `std::future`. + ### New Types - `ThreadPoolBase` - parameterized single-queue thread pool. -- `IndefiniteWait` / `PollingWait` - wait policy types for `ThreadPoolBase`. +- `IndefiniteWait` / `PollingWait` - wait policy types for `ThreadPoolBase`. - `PoolWithErrors` - generic error-handling pool wrapper. - `GlobalPool` - generic singleton pool accessor. +- `ShutdownPolicy` - enum controlling shutdown behavior (drain / drop_pending). +- `TaskStartCallback` / `TaskEndCallback` - tracing callback types. +- `executor_base` / `pool_executor` - type-erased executor for coroutines. +- `schedule_on` - awaitable for hopping to a pool thread. +- `futures.hpp` - future combinators (`when_all`, `when_any`, `when_all_settled`). ### Internal Improvements diff --git a/include/threadschedule/error_handler.hpp b/include/threadschedule/error_handler.hpp index c9a30a0..ed8b576 100644 --- a/include/threadschedule/error_handler.hpp +++ b/include/threadschedule/error_handler.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -127,11 +128,11 @@ using ErrorCallback = std::function; * - If a callback itself throws, the exception is silently swallowed so that * remaining callbacks still execute. * - * @par Limitations - * add_callback() returns an index that identifies the callback, but there is - * no @c remove_callback() -- only clear_callbacks() removes all callbacks at - * once. The error count returned by error_count() is monotonically - * increasing and is only reset by an explicit call to reset_error_count(). + * @par Callback management + * add_callback() returns a stable ID that can be passed to remove_callback() + * to unregister a single callback. clear_callbacks() removes all at once. + * The error count returned by error_count() is monotonically increasing and + * is only reset by an explicit call to reset_error_count(). */ class ErrorHandler { @@ -140,15 +141,35 @@ class ErrorHandler * @brief Register an error callback. * * @param callback Callable to invoke when a task throws. - * @return Zero-based index (handle) of the newly added callback. - * There is currently no API to remove an individual callback; - * use clear_callbacks() to remove all. + * @return Stable ID for the callback, usable with remove_callback(). */ auto add_callback(ErrorCallback callback) -> size_t { std::lock_guard lock(mutex_); - callbacks_.push_back(std::move(callback)); - return callbacks_.size() - 1; + size_t const id = next_callback_id_++; + callbacks_.emplace(id, std::move(callback)); + return id; + } + + /** + * @brief Remove a single callback by its ID. + * + * @param id The ID returned by add_callback(). + * @return @c true if the callback was found and removed, @c false otherwise. + */ + auto remove_callback(size_t id) -> bool + { + std::lock_guard lock(mutex_); + return callbacks_.erase(id) > 0; + } + + /** + * @brief Check whether a callback with the given ID is registered. + */ + [[nodiscard]] auto has_callback(size_t id) const -> bool + { + std::lock_guard lock(mutex_); + return callbacks_.count(id) > 0; } /** @@ -167,8 +188,8 @@ class ErrorHandler * @brief Dispatch an error to all registered callbacks. * * Increments the internal error counter and then invokes every registered - * callback in order. If any callback throws, the exception is caught and - * silently discarded so that subsequent callbacks still run. + * callback in insertion order. If any callback throws, the exception is + * caught and silently discarded so that subsequent callbacks still run. * * @param error Diagnostic information about the failed task. */ @@ -177,7 +198,7 @@ class ErrorHandler std::lock_guard lock(mutex_); error_count_++; - for (auto const& callback : callbacks_) + for (auto const& [id, callback] : callbacks_) { try { @@ -185,7 +206,6 @@ class ErrorHandler } catch (...) { - // Error handlers should not throw, but we catch just in case } } } @@ -215,7 +235,8 @@ class ErrorHandler private: mutable std::mutex mutex_; - std::vector callbacks_; + std::map callbacks_; + size_t next_callback_id_{0}; size_t error_count_{0}; }; diff --git a/include/threadschedule/futures.hpp b/include/threadschedule/futures.hpp new file mode 100644 index 0000000..b637bfc --- /dev/null +++ b/include/threadschedule/futures.hpp @@ -0,0 +1,186 @@ +#pragma once + +/** + * @file futures.hpp + * @brief Combinators for @c std::future: @ref when_all, @ref when_any, + * @ref when_all_settled. + * + * These utilities simplify waiting on multiple futures produced by thread + * pool submissions. + */ + +#include "expected.hpp" + +#include +#include +#include +#include +#include + +namespace threadschedule +{ + +/** + * @brief Block until all futures complete, returning results in submission order. + * + * If any future throws, the first exception is captured and re-thrown after + * all remaining futures have been waited on (to avoid leaving them dangling). + * + * @tparam T The value type of each future. + * @param futures A vector of futures to wait on. Moved-from on return. + * @return A vector of values in the same order as the input futures. + */ +template +auto when_all(std::vector>& futures) -> std::vector +{ + std::vector results; + results.reserve(futures.size()); + std::exception_ptr first_error; + + for (auto& f : futures) + { + try + { + results.push_back(f.get()); + } + catch (...) + { + if (!first_error) + first_error = std::current_exception(); + results.emplace_back(); + } + } + + if (first_error) + std::rethrow_exception(first_error); + + return results; +} + +/** + * @brief Block until all void futures complete. + * + * Re-throws the first exception after all futures have been waited on. + */ +inline void when_all(std::vector>& futures) +{ + std::exception_ptr first_error; + + for (auto& f : futures) + { + try + { + f.get(); + } + catch (...) + { + if (!first_error) + first_error = std::current_exception(); + } + } + + if (first_error) + std::rethrow_exception(first_error); +} + +/** + * @brief Block until all futures complete, returning an @c expected per slot. + * + * Never throws. Each slot is either the result value or the captured + * @c std::exception_ptr. + * + * @tparam T The value type of each future. + */ +template +auto when_all_settled(std::vector>& futures) + -> std::vector> +{ + std::vector> results; + results.reserve(futures.size()); + + for (auto& f : futures) + { + try + { + results.push_back(f.get()); + } + catch (...) + { + results.push_back(unexpected(std::current_exception())); + } + } + + return results; +} + +/** + * @brief Block until all void futures complete, returning an @c expected per slot. + */ +inline auto when_all_settled(std::vector>& futures) + -> std::vector> +{ + std::vector> results; + results.reserve(futures.size()); + + for (auto& f : futures) + { + try + { + f.get(); + results.emplace_back(); + } + catch (...) + { + results.push_back(unexpected(std::current_exception())); + } + } + + return results; +} + +/** + * @brief Block until the first future becomes ready. + * + * Polls all futures round-robin with a 1 ms timeout until one is ready, + * then returns its index and value. + * + * @note The remaining futures are left in their current state -- the caller + * is responsible for managing their lifetime. + * + * @tparam T The value type of each future. + * @return A pair of (index of the first ready future, its value). + */ +template +auto when_any(std::vector>& futures) -> std::pair +{ + while (true) + { + for (size_t i = 0; i < futures.size(); ++i) + { + if (futures[i].wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) + return {i, futures[i].get()}; + } + } +} + +/** + * @brief Block until the first void future becomes ready. + * + * @return The index of the first ready future. + */ +inline auto when_any(std::vector>& futures) -> size_t +{ + while (true) + { + for (size_t i = 0; i < futures.size(); ++i) + { + if (futures[i].wait_for(std::chrono::milliseconds(1)) == std::future_status::ready) + { + futures[i].get(); + return i; + } + } + } +} + +} // namespace threadschedule diff --git a/include/threadschedule/task.hpp b/include/threadschedule/task.hpp index d0bea60..cbeccbd 100644 --- a/include/threadschedule/task.hpp +++ b/include/threadschedule/task.hpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,6 +28,35 @@ namespace threadschedule template class task; +/** + * @brief Type-erased executor interface for pool-aware coroutines. + * + * Implementations schedule a coroutine handle for execution on a specific + * executor (e.g. a thread pool). + */ +struct executor_base +{ + virtual void execute(std::coroutine_handle<>) = 0; + virtual ~executor_base() = default; +}; + +/** + * @brief Executor that dispatches coroutine resumption to a thread pool. + * + * @tparam Pool A thread pool type providing @c submit(Callable). + */ +template +struct pool_executor : executor_base +{ + Pool& pool; + explicit pool_executor(Pool& p) : pool(p) {} + + void execute(std::coroutine_handle<> h) override + { + pool.submit([h]() mutable { h.resume(); }); + } +}; + namespace detail { @@ -35,10 +66,9 @@ namespace detail * @internal This is an implementation detail of the task coroutine machinery. * * When a task's coroutine body finishes, `final_awaiter` is returned from - * `final_suspend()`. It is never ready (always suspends), and on suspension - * it symmetric-transfers to the stored continuation. If no continuation has - * been set (e.g. the task was started via `sync_wait`), it transfers to - * `std::noop_coroutine()` to avoid undefined behaviour. + * `final_suspend()`. If an executor is set on the promise, the continuation + * is dispatched through it (e.g. resumed on a pool thread). Otherwise, + * symmetric transfer is used for zero-overhead inline resumption. */ struct final_awaiter { @@ -50,7 +80,13 @@ struct final_awaiter template auto await_suspend(std::coroutine_handle h) const noexcept -> std::coroutine_handle<> { - if (auto cont = h.promise().continuation_; cont) + auto cont = h.promise().continuation_; + if (h.promise().executor_ && cont) + { + h.promise().executor_->execute(cont); + return std::noop_coroutine(); + } + if (cont) return cont; return std::noop_coroutine(); } @@ -78,6 +114,9 @@ struct final_awaiter * - **Continuation:** `continuation_` is set by the task's awaiter just * before resuming the task. `final_awaiter` uses it to return control * to the parent coroutine. + * - **Executor:** If `executor_` is set (e.g. via `schedule_on`), the + * continuation is dispatched through the executor instead of using + * symmetric transfer. */ template class task_promise_base @@ -107,6 +146,7 @@ class task_promise_base } std::coroutine_handle<> continuation_{}; + executor_base* executor_{nullptr}; protected: std::exception_ptr exception_{}; @@ -579,6 +619,71 @@ inline void sync_wait(task t) std::rethrow_exception(ex); } +// --------------------------------------------------------------------------- +// schedule_on awaitable +// --------------------------------------------------------------------------- + +/** + * @brief Awaitable that transfers execution to a thread pool. + * + * Use `co_await schedule_on{pool}` inside any coroutine to continue + * execution on one of the pool's worker threads. + * + * @tparam Pool A thread pool type providing @c submit(Callable). + * + * @par Example + * @code + * task work(HighPerformancePool& pool) { + * co_await schedule_on{pool}; + * // now running on a pool thread + * } + * @endcode + */ +template +struct schedule_on +{ + Pool& pool; + + [[nodiscard]] auto await_ready() const noexcept -> bool { return false; } + + void await_suspend(std::coroutine_handle<> h) const + { + pool.submit([h]() mutable { h.resume(); }); + } + + void await_resume() const noexcept {} +}; + +// --------------------------------------------------------------------------- +// run_on convenience +// --------------------------------------------------------------------------- + +/** + * @brief Submit a coroutine-returning callable to a pool and return a + * @c std::future for its result. + * + * The callable is invoked on a pool worker thread. Inside the callable, + * you can use `co_await` freely -- all continuations run on the calling + * pool unless explicitly transferred elsewhere. + * + * @tparam Pool A thread pool type providing @c submit(Callable). + * @tparam F A callable returning @c task. + * + * @par Example + * @code + * auto future = run_on(pool, []() -> task { co_return 42; }); + * int v = future.get(); + * @endcode + */ +template +auto run_on(Pool& pool, F&& coro_fn) + -> std::future>()))> +{ + return pool.submit([fn = std::forward(coro_fn)]() mutable { + return sync_wait(fn()); + }); +} + } // namespace threadschedule #endif // __cpp_impl_coroutine diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index c26e6f2..d6891eb 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -2,17 +2,24 @@ #include "expected.hpp" #include "scheduler_policy.hpp" +#include "thread_registry.hpp" #include "thread_wrapper.hpp" #include #include +#include #include #include #include #include +#include #include #include #include +#if __cpp_lib_ranges >= 201911L +#include +#endif + namespace threadschedule { @@ -70,6 +77,36 @@ inline auto distribute_workers_across_cpus(WorkerRange& workers) -> expected +inline void parallel_for_each_chunked(Pool& pool, Iterator begin, Iterator end, F&& func, size_t num_workers) +{ + auto const total = static_cast(std::distance(begin, end)); + if (total == 0) + return; + + size_t const chunk_size = (std::max)(size_t(1), total / (num_workers * 4)); + std::vector> futures; + auto it = begin; + + while (it != end) + { + auto remaining = static_cast(std::distance(it, end)); + auto this_chunk = (std::min)(chunk_size, remaining); + auto chunk_end = it; + std::advance(chunk_end, this_chunk); + + futures.push_back(pool.submit([it, chunk_end, &func]() { + for (auto cur = it; cur != chunk_end; ++cur) + func(*cur); + })); + + it = chunk_end; + } + + for (auto& f : futures) + f.get(); +} + } // namespace detail /** @@ -102,6 +139,14 @@ inline auto distribute_workers_across_cpus(WorkerRange& workers) -> expected; + +/// Callback invoked when a pool worker finishes executing a task. +using TaskEndCallback = std::function; + template class WorkStealingDeque { @@ -211,6 +256,13 @@ class WorkStealingDeque { return size() == 0; } + + void clear() + { + std::lock_guard lock(mutex_); + bottom_.store(0, std::memory_order_relaxed); + top_.store(0, std::memory_order_relaxed); + } }; /** @@ -293,6 +345,16 @@ class WorkStealingDeque * work-stealing complexity. Best for high-throughput scenarios like * image processing, batch operations, etc. */ + +/** + * @brief Controls how a pool handles pending tasks during shutdown. + */ +enum class ShutdownPolicy : uint8_t +{ + drain, ///< Finish all queued tasks before stopping (default). + drop_pending ///< Finish running tasks, discard queued ones. +}; + class HighPerformancePool { public: @@ -309,14 +371,16 @@ class HighPerformancePool std::chrono::microseconds avg_task_time; }; - explicit HighPerformancePool(size_t num_threads = std::thread::hardware_concurrency()) - : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false), next_victim_(0), - start_time_(std::chrono::steady_clock::now()) + explicit HighPerformancePool(size_t num_threads = std::thread::hardware_concurrency(), + size_t deque_capacity = WorkStealingDeque::DEFAULT_CAPACITY, + bool register_workers = false) + : num_threads_(num_threads == 0 ? 1 : num_threads), register_workers_(register_workers), + stop_(false), next_victim_(0), start_time_(std::chrono::steady_clock::now()) { worker_queues_.resize(num_threads_); for (size_t i = 0; i < num_threads_; ++i) { - worker_queues_[i] = std::make_unique>(); + worker_queues_[i] = std::make_unique>(deque_capacity); } workers_.reserve(num_threads_); @@ -332,14 +396,73 @@ class HighPerformancePool ~HighPerformancePool() { - shutdown(); + shutdown(ShutdownPolicy::drain); + } + + /** + * @brief Shut the pool down. + * + * @param policy @c drain (default) finishes all queued tasks; + * @c drop_pending discards queued tasks. + */ + void shutdown(ShutdownPolicy policy = ShutdownPolicy::drain) + { + { + std::lock_guard lock(overflow_mutex_); + if (stop_.exchange(true, std::memory_order_acq_rel)) + return; + + if (policy == ShutdownPolicy::drop_pending) + { + std::queue empty; + overflow_tasks_.swap(empty); + for (auto& q : worker_queues_) + q->clear(); + } + } + + wakeup_condition_.notify_all(); + + for (auto& worker : workers_) + { + if (worker.joinable()) + worker.join(); + } + + workers_.clear(); + } + + /** + * @brief Attempt a timed drain: finish as many tasks as possible within + * @p timeout, then force-stop remaining workers. + * @return @c true if all tasks completed within the deadline, + * @c false if the timeout expired first. + */ + auto shutdown_for(std::chrono::milliseconds timeout) -> bool + { + auto const deadline = std::chrono::steady_clock::now() + timeout; + + { + std::lock_guard lock(overflow_mutex_); + if (stop_.load(std::memory_order_acquire)) + return true; + } + + std::unique_lock lock(completion_mutex_); + bool const drained = completion_condition_.wait_until(lock, deadline, [this] { + return pending_tasks() == 0 && active_tasks_.load(std::memory_order_acquire) == 0; + }); + + shutdown(ShutdownPolicy::drain); + return drained; } /** - * @brief High-performance task submission (optimized hot path) + * @brief Submit a task, returning an error instead of throwing on shutdown. */ template - auto submit(F&& f, Args&&... args) -> std::future> + auto try_submit(F&& f, Args&&... args) + -> expected>, std::error_code> { using return_type = std::invoke_result_t; @@ -349,9 +472,7 @@ class HighPerformancePool std::future result = task->get_future(); if (stop_.load(std::memory_order_acquire)) - { - throw std::runtime_error("HighPerformancePool is shutting down"); - } + return unexpected(std::make_error_code(std::errc::operation_canceled)); size_t const preferred_queue = next_victim_.fetch_add(1, std::memory_order_relaxed) % num_threads_; @@ -374,9 +495,7 @@ class HighPerformancePool { std::lock_guard lock(overflow_mutex_); if (stop_.load(std::memory_order_relaxed)) - { - throw std::runtime_error("HighPerformancePool is shutting down"); - } + return unexpected(std::make_error_code(std::errc::operation_canceled)); overflow_tasks_.emplace([task]() { (*task)(); }); } @@ -385,19 +504,61 @@ class HighPerformancePool } /** - * @brief Batch task submission for maximum throughput + * @brief Submit a task. Throws std::runtime_error if the pool is shutting down. + */ + template + auto submit(F&& f, Args&&... args) -> std::future> + { + auto result = try_submit(std::forward(f), std::forward(args)...); + if (!result.has_value()) + throw std::runtime_error("HighPerformancePool is shutting down"); + return std::move(result.value()); + } + +#if __cpp_lib_jthread >= 201911L + /** + * @brief Submit a cancellable task. If stop is already requested the task + * is skipped and the future throws @c std::future_error (broken_promise). + */ + template + auto submit(std::stop_token token, F&& f, Args&&... args) + -> std::future> + { + return submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + if (token.stop_requested()) + return decltype(fn())(); + return fn(); + }); + } + + /** + * @brief Non-throwing cancellable submission. + */ + template + auto try_submit(std::stop_token token, F&& f, Args&&... args) + -> expected>, std::error_code> + { + return try_submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + if (token.stop_requested()) + return decltype(fn())(); + return fn(); + }); + } +#endif + + /** + * @brief Batch task submission, returning an error instead of throwing on shutdown. */ template - auto submit_batch(Iterator begin, Iterator end) -> std::vector> + auto try_submit_batch(Iterator begin, Iterator end) + -> expected>, std::error_code> { std::vector> futures; size_t const batch_size = std::distance(begin, end); futures.reserve(batch_size); if (stop_.load(std::memory_order_acquire)) - { - throw std::runtime_error("HighPerformancePool is shutting down"); - } + return unexpected(std::make_error_code(std::errc::operation_canceled)); size_t queue_idx = next_victim_.fetch_add(batch_size, std::memory_order_relaxed) % num_threads_; @@ -429,37 +590,39 @@ class HighPerformancePool } /** - * @brief Optimized parallel for_each with work distribution + * @brief Batch task submission. Throws on shutdown. + */ + template + auto submit_batch(Iterator begin, Iterator end) -> std::vector> + { + auto result = try_submit_batch(begin, end); + if (!result.has_value()) + throw std::runtime_error("HighPerformancePool is shutting down"); + return std::move(result.value()); + } + + /** + * @brief Apply a function to a range in parallel using chunked work distribution. */ template void parallel_for_each(Iterator begin, Iterator end, F&& func) { - size_t const total_items = std::distance(begin, end); - if (total_items == 0) - return; - - size_t const chunk_size = (std::max)(size_t(1), total_items / (num_threads_ * 4)); - std::vector> futures; - - for (auto it = begin; it < end;) - { - auto chunk_end = (std::min)(it + chunk_size, end); + detail::parallel_for_each_chunked(*this, begin, end, std::forward(func), num_threads_); + } - futures.push_back(submit([func, it, chunk_end]() { - for (auto chunk_it = it; chunk_it != chunk_end; ++chunk_it) - { - func(*chunk_it); - } - })); +#if __cpp_lib_ranges >= 201911L + template + auto submit_batch(R&& range) { return submit_batch(std::ranges::begin(range), std::ranges::end(range)); } - it = chunk_end; - } + template + auto try_submit_batch(R&& range) { return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); } - for (auto& future : futures) - { - future.wait(); - } + template + void parallel_for_each(R&& range, F&& func) + { + parallel_for_each(std::ranges::begin(range), std::ranges::end(range), std::forward(func)); } +#endif [[nodiscard]] auto size() const noexcept -> size_t { @@ -505,29 +668,6 @@ class HighPerformancePool lock, [this] { return pending_tasks() == 0 && active_tasks_.load(std::memory_order_acquire) == 0; }); } - void shutdown() - { - { - std::lock_guard lock(overflow_mutex_); - if (stop_.exchange(true, std::memory_order_acq_rel)) - { - return; - } - } - - wakeup_condition_.notify_all(); - - for (auto& worker : workers_) - { - if (worker.joinable()) - { - worker.join(); - } - } - - workers_.clear(); - } - /** * @brief Get detailed performance statistics */ @@ -565,8 +705,27 @@ class HighPerformancePool return stats; } + /** + * @brief Set a callback invoked at the start of each task. + */ + void set_on_task_start(TaskStartCallback cb) + { + std::lock_guard lock(trace_mutex_); + on_task_start_ = std::move(cb); + } + + /** + * @brief Set a callback invoked at the end of each task. + */ + void set_on_task_end(TaskEndCallback cb) + { + std::lock_guard lock(trace_mutex_); + on_task_end_ = std::move(cb); + } + private: size_t num_threads_; + bool register_workers_; std::vector workers_; std::vector>> worker_queues_; @@ -586,11 +745,19 @@ class HighPerformancePool std::atomic stolen_tasks_{0}; std::atomic total_task_time_{0}; + std::mutex trace_mutex_; + TaskStartCallback on_task_start_; + TaskEndCallback on_task_end_; + std::chrono::steady_clock::time_point start_time_; // NOLINTNEXTLINE(readability-function-cognitive-complexity) void worker_function(size_t worker_id) { + std::optional reg_guard; + if (register_workers_) + reg_guard.emplace("hp_worker_" + std::to_string(worker_id), "threadschedule.pool"); + thread_local std::mt19937 gen = []() { std::random_device device; return std::mt19937(device()); @@ -638,6 +805,14 @@ class HighPerformancePool active_tasks_.fetch_add(1, std::memory_order_relaxed); auto const start_time = std::chrono::steady_clock::now(); + auto const tid = std::this_thread::get_id(); + + { + std::lock_guard tl(trace_mutex_); + if (on_task_start_) + on_task_start_(start_time, tid); + } + try { task(); @@ -650,6 +825,12 @@ class HighPerformancePool auto const task_duration = std::chrono::duration_cast(end_time - start_time); total_task_time_.fetch_add(task_duration.count(), std::memory_order_relaxed); + { + std::lock_guard tl(trace_mutex_); + if (on_task_end_) + on_task_end_(end_time, tid, task_duration); + } + active_tasks_.fetch_sub(1, std::memory_order_relaxed); completed_tasks_.fetch_add(1, std::memory_order_relaxed); @@ -690,18 +871,21 @@ struct IndefiniteWait }; /** - * @brief Wait policy that polls with a 10 ms timeout. + * @brief Wait policy that polls with a configurable timeout. * * Workers periodically re-check the queue even without notification, trading * a small amount of CPU for lower wake-up latency under bursty workloads. - * Used by the @c FastThreadPool type alias. + * Used by the @c FastThreadPool type alias (default 10 ms). + * + * @tparam IntervalMs Polling interval in milliseconds. */ +template struct PollingWait { template static auto wait(std::condition_variable& cv, Lock& lock, Pred pred) -> bool { - return cv.wait_for(lock, std::chrono::milliseconds(10), pred); + return cv.wait_for(lock, std::chrono::milliseconds(IntervalMs), pred); } }; @@ -777,8 +961,10 @@ class ThreadPoolBase std::chrono::microseconds avg_task_time; }; - explicit ThreadPoolBase(size_t num_threads = std::thread::hardware_concurrency()) - : num_threads_(num_threads == 0 ? 1 : num_threads), stop_(false), + explicit ThreadPoolBase(size_t num_threads = std::thread::hardware_concurrency(), + bool register_workers = false) + : num_threads_(num_threads == 0 ? 1 : num_threads), + register_workers_(register_workers), stop_(false), start_time_(std::chrono::steady_clock::now()) { workers_.reserve(num_threads_); @@ -794,14 +980,15 @@ class ThreadPoolBase ~ThreadPoolBase() { - shutdown(); + shutdown(ShutdownPolicy::drain); } /** - * @brief Submit a task to the thread pool + * @brief Submit a task, returning an error instead of throwing on shutdown. */ template - auto submit(F&& f, Args&&... args) -> std::future> + auto try_submit(F&& f, Args&&... args) + -> expected>, std::error_code> { using return_type = std::invoke_result_t; @@ -813,9 +1000,7 @@ class ThreadPoolBase { std::lock_guard lock(queue_mutex_); if (stop_) - { - throw std::runtime_error("Pool is shutting down"); - } + return unexpected(std::make_error_code(std::errc::operation_canceled)); tasks_.emplace([task]() { (*task)(); }); } @@ -824,10 +1009,51 @@ class ThreadPoolBase } /** - * @brief Submit multiple tasks under a single lock acquisition + * @brief Submit a task. Throws std::runtime_error if the pool is shutting down. + */ + template + auto submit(F&& f, Args&&... args) -> std::future> + { + auto result = try_submit(std::forward(f), std::forward(args)...); + if (!result.has_value()) + throw std::runtime_error("Pool is shutting down"); + return std::move(result.value()); + } + +#if __cpp_lib_jthread >= 201911L + /** + * @brief Submit a cancellable task. If stop is already requested the task + * is skipped and returns a default-constructed result. + */ + template + auto submit(std::stop_token token, F&& f, Args&&... args) + -> std::future> + { + return submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + if (token.stop_requested()) + return decltype(fn())(); + return fn(); + }); + } + + template + auto try_submit(std::stop_token token, F&& f, Args&&... args) + -> expected>, std::error_code> + { + return try_submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + if (token.stop_requested()) + return decltype(fn())(); + return fn(); + }); + } +#endif + + /** + * @brief Submit multiple tasks, returning an error instead of throwing on shutdown. */ template - auto submit_batch(Iterator begin, Iterator end) -> std::vector> + auto try_submit_batch(Iterator begin, Iterator end) + -> expected>, std::error_code> { std::vector> futures; futures.reserve(std::distance(begin, end)); @@ -835,9 +1061,7 @@ class ThreadPoolBase { std::lock_guard lock(queue_mutex_); if (stop_) - { - throw std::runtime_error("Pool is shutting down"); - } + return unexpected(std::make_error_code(std::errc::operation_canceled)); for (auto it = begin; it != end; ++it) { @@ -852,24 +1076,39 @@ class ThreadPoolBase } /** - * @brief Apply a function to a range of values in parallel + * @brief Submit multiple tasks under a single lock acquisition. Throws on shutdown. + */ + template + auto submit_batch(Iterator begin, Iterator end) -> std::vector> + { + auto result = try_submit_batch(begin, end); + if (!result.has_value()) + throw std::runtime_error("Pool is shutting down"); + return std::move(result.value()); + } + + /** + * @brief Apply a function to a range in parallel using chunked work distribution. */ template void parallel_for_each(Iterator begin, Iterator end, F&& func) { - std::vector> futures; - futures.reserve(std::distance(begin, end)); + detail::parallel_for_each_chunked(*this, begin, end, std::forward(func), num_threads_); + } - for (auto it = begin; it != end; ++it) - { - futures.push_back(submit([func, it]() { func(*it); })); - } +#if __cpp_lib_ranges >= 201911L + template + auto submit_batch(R&& range) { return submit_batch(std::ranges::begin(range), std::ranges::end(range)); } - for (auto& future : futures) - { - future.wait(); - } + template + auto try_submit_batch(R&& range) { return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); } + + template + void parallel_for_each(R&& range, F&& func) + { + parallel_for_each(std::ranges::begin(range), std::ranges::end(range), std::forward(func)); } +#endif [[nodiscard]] auto size() const noexcept -> size_t { @@ -914,13 +1153,24 @@ class ThreadPoolBase lock, [this] { return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; }); } - void shutdown() + /** + * @brief Shut the pool down. + * + * @param policy @c drain (default) finishes all queued tasks; + * @c drop_pending discards queued tasks. + */ + void shutdown(ShutdownPolicy policy = ShutdownPolicy::drain) { { std::lock_guard lock(queue_mutex_); if (stop_) return; stop_ = true; + if (policy == ShutdownPolicy::drop_pending) + { + std::queue empty; + tasks_.swap(empty); + } } condition_.notify_all(); @@ -928,14 +1178,38 @@ class ThreadPoolBase for (auto& worker : workers_) { if (worker.joinable()) - { worker.join(); - } } workers_.clear(); } + /** + * @brief Attempt a timed drain: finish as many tasks as possible within + * @p timeout, then force-stop remaining workers. + * @return @c true if all tasks completed within the deadline, + * @c false if the timeout expired first. + */ + auto shutdown_for(std::chrono::milliseconds timeout) -> bool + { + auto const deadline = std::chrono::steady_clock::now() + timeout; + + { + std::lock_guard lock(queue_mutex_); + if (stop_) + return true; + } + + std::unique_lock lock(queue_mutex_); + bool const drained = task_finished_condition_.wait_until(lock, deadline, [this] { + return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; + }); + lock.unlock(); + + shutdown(ShutdownPolicy::drain); + return drained; + } + /** * @brief Get performance statistics */ @@ -973,8 +1247,27 @@ class ThreadPoolBase return stats; } + /** + * @brief Set a callback invoked at the start of each task. + */ + void set_on_task_start(TaskStartCallback cb) + { + std::lock_guard lock(trace_mutex_); + on_task_start_ = std::move(cb); + } + + /** + * @brief Set a callback invoked at the end of each task. + */ + void set_on_task_end(TaskEndCallback cb) + { + std::lock_guard lock(trace_mutex_); + on_task_end_ = std::move(cb); + } + private: size_t num_threads_; + bool register_workers_; std::vector workers_; std::queue tasks_; @@ -986,10 +1279,18 @@ class ThreadPoolBase std::atomic completed_tasks_{0}; std::atomic total_task_time_{0}; + std::mutex trace_mutex_; + TaskStartCallback on_task_start_; + TaskEndCallback on_task_end_; + std::chrono::steady_clock::time_point start_time_; - void worker_function(size_t /*worker_id*/) + void worker_function(size_t worker_id) { + std::optional reg_guard; + if (register_workers_) + reg_guard.emplace("pool_worker_" + std::to_string(worker_id), "threadschedule.pool"); + while (true) { Task task; @@ -1022,6 +1323,14 @@ class ThreadPoolBase if (found_task) { auto const start_time = std::chrono::steady_clock::now(); + auto const tid = std::this_thread::get_id(); + + { + std::lock_guard tl(trace_mutex_); + if (on_task_start_) + on_task_start_(start_time, tid); + } + try { task(); @@ -1034,6 +1343,12 @@ class ThreadPoolBase auto const task_duration = std::chrono::duration_cast(end_time - start_time); total_task_time_.fetch_add(task_duration.count(), std::memory_order_relaxed); + { + std::lock_guard tl(trace_mutex_); + if (on_task_end_) + on_task_end_(end_time, tid, task_duration); + } + active_tasks_.fetch_sub(1, std::memory_order_relaxed); completed_tasks_.fetch_add(1, std::memory_order_relaxed); @@ -1062,7 +1377,7 @@ using ThreadPool = ThreadPoolBase; * * @see ThreadPoolBase, PollingWait */ -using FastThreadPool = ThreadPoolBase; +using FastThreadPool = ThreadPoolBase>; // --------------------------------------------------------------------------- // GlobalPool @@ -1099,9 +1414,20 @@ template class GlobalPool { public: + /** + * @brief Pre-configure the number of threads before first use. + * + * Must be called before instance() is first invoked. Subsequent calls + * are ignored (std::call_once semantics). + */ + static void init(size_t num_threads) + { + std::call_once(init_flag_(), [num_threads] { thread_count_() = num_threads; }); + } + static auto instance() -> PoolType& { - static PoolType pool(std::thread::hardware_concurrency()); + static PoolType pool(thread_count_()); return pool; } @@ -1111,20 +1437,58 @@ class GlobalPool return instance().submit(std::forward(f), std::forward(args)...); } + template + static auto try_submit(F&& f, Args&&... args) + { + return instance().try_submit(std::forward(f), std::forward(args)...); + } + template static auto submit_batch(Iterator begin, Iterator end) { return instance().submit_batch(begin, end); } + template + static auto try_submit_batch(Iterator begin, Iterator end) + { + return instance().try_submit_batch(begin, end); + } + template static void parallel_for_each(Iterator begin, Iterator end, F&& func) { instance().parallel_for_each(begin, end, std::forward(func)); } +#if __cpp_lib_ranges >= 201911L + template + static auto submit_batch(R&& range) { return instance().submit_batch(std::forward(range)); } + + template + static auto try_submit_batch(R&& range) { return instance().try_submit_batch(std::forward(range)); } + + template + static void parallel_for_each(R&& range, F&& func) + { + instance().parallel_for_each(std::forward(range), std::forward(func)); + } +#endif + private: GlobalPool() = default; + + static auto init_flag_() -> std::once_flag& + { + static std::once_flag flag; + return flag; + } + + static auto thread_count_() -> size_t& + { + static size_t count = std::thread::hardware_concurrency(); + return count; + } }; /** @brief Singleton @ref ThreadPool accessor. */ diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp index 5e7e419..1454544 100644 --- a/include/threadschedule/thread_pool_with_errors.hpp +++ b/include/threadschedule/thread_pool_with_errors.hpp @@ -51,11 +51,26 @@ class PoolWithErrors return submit_impl(description, std::forward(f), std::forward(args)...); } + /** + * @brief Submit a task, returning an error instead of throwing on shutdown. + */ + template + auto try_submit(F&& f, Args&&... args) + -> expected>, std::error_code> + { + return try_submit_impl({}, std::forward(f), std::forward(args)...); + } + auto add_error_callback(ErrorCallback callback) -> size_t { return error_handler_->add_callback(std::move(callback)); } + auto remove_error_callback(size_t id) -> bool + { + return error_handler_->remove_callback(id); + } + void clear_error_callbacks() { error_handler_->clear_callbacks(); @@ -139,6 +154,29 @@ class PoolWithErrors return FutureWithErrorHandler>(std::move(future)); } + template + auto try_submit_impl(std::string description, F&& f, Args&&... args) + -> expected>, std::error_code> + { + auto handler = error_handler_; + auto wrapped_task = [f = std::forward(f), args = std::make_tuple(std::forward(args)...), handler, + desc = std::move(description)]() { + try + { + return std::apply(f, args); + } + catch (...) + { + handler->handle_error(TaskError::capture(desc)); + throw; + } + }; + auto result = pool_.try_submit(std::move(wrapped_task)); + if (!result.has_value()) + return unexpected(result.error()); + return FutureWithErrorHandler>(std::move(result.value())); + } + PoolType pool_; std::shared_ptr error_handler_; }; diff --git a/include/threadschedule/threadschedule.hpp b/include/threadschedule/threadschedule.hpp index 06b293d..4896b12 100644 --- a/include/threadschedule/threadschedule.hpp +++ b/include/threadschedule/threadschedule.hpp @@ -3,6 +3,7 @@ #include "chaos.hpp" #include "concepts.hpp" #include "error_handler.hpp" +#include "futures.hpp" #include "generator.hpp" #include "profiles.hpp" #include "pthread_wrapper.hpp" @@ -69,13 +70,17 @@ using ts::ScheduledTaskHandle; using ts::ScheduledThreadPool; using ts::ScheduledThreadPoolT; using ts::SchedulingPolicy; +using ts::ShutdownPolicy; using ts::TaskError; using ts::ThreadAffinity; using ts::ThreadByNameView; using ts::ThreadPool; using ts::ThreadPoolBase; using ts::ThreadPoolWithErrors; +using ts::PollingWait; using ts::PoolWithErrors; +using ts::TaskEndCallback; +using ts::TaskStartCallback; using ts::ThreadPriority; using ts::ThreadProfile; using ts::ThreadWrapper; @@ -86,11 +91,20 @@ using ts::BuildMode; using ts::build_mode; using ts::build_mode_string; +// Future combinators +using ts::when_all; +using ts::when_all_settled; +using ts::when_any; + // Coroutine primitives (C++20) #if defined(__cpp_impl_coroutine) && __cpp_impl_coroutine >= 201902L -using ts::task; -using ts::sync_wait; +using ts::executor_base; using ts::generator; +using ts::pool_executor; +using ts::run_on; +using ts::schedule_on; +using ts::sync_wait; +using ts::task; #endif } // namespace threadschedule From bc6798c53820fc7f31051550f18c727eefd01b7c Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 17:42:45 +0200 Subject: [PATCH 05/15] Enhance thread pool capabilities and introduce new lightweight pool - Added `LightweightPoolT` for ultra-lightweight fire-and-forget task execution with zero heap allocations for typical lambdas. - Implemented `post()` and `try_post()` methods for fire-and-forget submissions across all pool types, reducing overhead. - Updated `ScheduledThreadPoolT` to utilize `post()` internally, eliminating unnecessary future allocations. - Introduced new types: `LightweightPool`, `ScheduledLightweightPool`, and `detail::SboCallable` for improved callable management. - Updated CHANGELOG to reflect these new features and enhancements. --- CHANGELOG.md | 18 + include/threadschedule/pthread_wrapper.hpp | 12 +- include/threadschedule/scheduled_pool.hpp | 4 +- include/threadschedule/thread_pool.hpp | 485 ++++++++++++++++++++- include/threadschedule/threadschedule.hpp | 3 + 5 files changed, 504 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6ca2fd..e3f3de2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,21 @@ executor for pool-aware tasks, `run_on(pool, coro_fn)` convenience returning `std::future`. +- **`LightweightPoolT`** -- ultra-lightweight fire-and-forget pool + using a custom `detail::SboCallable` with configurable inline buffer + (default 64 bytes = 1 cache line, 56 bytes usable). Zero heap allocations for + typical lambdas. No futures, no `packaged_task`, no statistics, no tracing. + Workers are `ThreadWrapper` so `configure_threads`/`set_affinity` still work. + `using LightweightPool = LightweightPoolT<>` for the default. + +- **`post()` / `try_post()`** -- fire-and-forget submission on all pool types + (`HighPerformancePool`, `ThreadPoolBase`, `GlobalPool`). Same queue logic as + `submit()` but skips `packaged_task`/`shared_ptr`/`future` overhead. + +- **`ScheduledThreadPoolT` now uses `post()`** internally instead of `submit()`, + eliminating wasted `future` allocations for every scheduled task dispatch. + New alias: `ScheduledLightweightPool = ScheduledThreadPoolT`. + ### New Types - `ThreadPoolBase` - parameterized single-queue thread pool. @@ -97,6 +112,9 @@ - `executor_base` / `pool_executor` - type-erased executor for coroutines. - `schedule_on` - awaitable for hopping to a pool thread. - `futures.hpp` - future combinators (`when_all`, `when_any`, `when_all_settled`). +- `LightweightPoolT` / `LightweightPool` - fire-and-forget pool with SBO. +- `detail::SboCallable` - type-erased callable with inline storage. +- `ScheduledLightweightPool` - scheduled pool backed by `LightweightPool`. ### Internal Improvements diff --git a/include/threadschedule/pthread_wrapper.hpp b/include/threadschedule/pthread_wrapper.hpp index db6485a..db7ee4b 100644 --- a/include/threadschedule/pthread_wrapper.hpp +++ b/include/threadschedule/pthread_wrapper.hpp @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef _WIN32 #include @@ -59,9 +60,11 @@ class PThreadWrapper explicit PThreadWrapper(F&& func, Args&&... args) : thread_(0), joined_(false) { - // Store the callable in a way pthread can handle auto callable = - std::make_unique>(std::bind(std::forward(func), std::forward(args)...)); + std::make_unique>([fn = std::forward(func), + tup = std::make_tuple(std::forward(args)...)]() mutable { + std::apply(std::move(fn), std::move(tup)); + }); int const result = pthread_create(&thread_, nullptr, thread_function, callable.release()); @@ -221,7 +224,10 @@ class PThreadWrapper PThreadWrapper wrapper; auto callable = - std::make_unique>(std::bind(std::forward(func), std::forward(args)...)); + std::make_unique>([fn = std::forward(func), + tup = std::make_tuple(std::forward(args)...)]() mutable { + std::apply(std::move(fn), std::move(tup)); + }); int const result = pthread_create(&wrapper.thread_, &attr, thread_function, callable.release()); diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp index 31c149f..748ac8e 100644 --- a/include/threadschedule/scheduled_pool.hpp +++ b/include/threadschedule/scheduled_pool.hpp @@ -359,7 +359,7 @@ class ScheduledThreadPoolT auto task_copy = info.task; auto cancelled_flag = info.cancelled; - pool_.submit([task_copy, cancelled_flag]() { + pool_.post([task_copy, cancelled_flag]() { if (!cancelled_flag->load(std::memory_order_acquire)) { task_copy(); @@ -387,5 +387,7 @@ using ScheduledThreadPool = ScheduledThreadPoolT; using ScheduledHighPerformancePool = ScheduledThreadPoolT; /** @brief @ref ScheduledThreadPoolT using @ref FastThreadPool as backend. */ using ScheduledFastThreadPool = ScheduledThreadPoolT; +/** @brief @ref ScheduledThreadPoolT using @ref LightweightPool as backend (minimal overhead). */ +using ScheduledLightweightPool = ScheduledThreadPoolT; } // namespace threadschedule diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index d6891eb..b1caefd 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #if __cpp_lib_ranges >= 201911L @@ -107,6 +108,157 @@ inline void parallel_for_each_chunked(Pool& pool, Iterator begin, Iterator end, f.get(); } +// --------------------------------------------------------------------------- +// bind_args -- optimal argument binding, C++20 pack-capture or C++17 tuple +// --------------------------------------------------------------------------- + +/** + * @brief Bind a callable with its arguments into a nullary lambda. + * + * On C++20 and later this uses pack init-captures for zero intermediate + * storage overhead. On C++17 it falls back to @c std::make_tuple / + * @c std::apply which is still significantly faster than @c std::bind. + */ +template +auto bind_args(F&& f, Args&&... args) +{ +#if __cpp_init_captures >= 201803L + return [fn = std::forward(f), ...a = std::forward(args)]() mutable { + return fn(std::move(a)...); + }; +#else + return [fn = std::forward(f), + tup = std::make_tuple(std::forward(args)...)]() mutable { + return std::apply(std::move(fn), std::move(tup)); + }; +#endif +} + +// --------------------------------------------------------------------------- +// SboCallable -- type-erased callable with inline small-buffer storage +// --------------------------------------------------------------------------- + +/** + * @brief Type-erased, move-only callable with configurable inline storage. + * + * Avoids the heap allocation that @c std::function incurs for callables + * larger than its (typically 16-byte) internal buffer. Callables that fit + * within @c TaskSize - sizeof(void*) bytes are stored inline; larger ones + * fall back to a heap allocation. + * + * @tparam TaskSize Total object size in bytes (default 64, one x86 cache line). + * The usable inline buffer is @c TaskSize - 8 bytes on 64-bit platforms. + */ +template +class SboCallable +{ + static_assert(TaskSize > sizeof(void*), "TaskSize must be larger than a pointer"); + + struct VTable + { + void (*invoke)(void* storage); + void (*destroy)(void* storage); + void (*move_to)(void* dst, void* src) noexcept; + }; + + static constexpr size_t kBufferSize = TaskSize - sizeof(VTable const*); + + template + static constexpr bool fits_inline_v = + sizeof(F) <= kBufferSize && + alignof(F) <= alignof(std::max_align_t) && + std::is_nothrow_move_constructible_v; + + template + static VTable const* vtable_for() noexcept + { + if constexpr (fits_inline_v) + { + static constexpr VTable vt{ + [](void* s) { (*static_cast(s))(); }, + [](void* s) { static_cast(s)->~F(); }, + [](void* dst, void* src) noexcept { + ::new (dst) F(std::move(*static_cast(src))); + static_cast(src)->~F(); + }}; + return &vt; + } + else + { + static constexpr VTable vt{ + [](void* s) { (*(*static_cast(s)))(); }, + [](void* s) { delete *static_cast(s); }, + [](void* dst, void* src) noexcept { + *static_cast(dst) = *static_cast(src); + *static_cast(src) = nullptr; + }}; + return &vt; + } + } + + public: + SboCallable() = default; + + template , SboCallable>>> + SboCallable(F&& f) // NOLINT(google-explicit-constructor) + { + using Decay = std::decay_t; + vtable_ = vtable_for(); + if constexpr (fits_inline_v) + ::new (buffer_) Decay(std::forward(f)); + else + *reinterpret_cast(buffer_) = new Decay(std::forward(f)); + } + + SboCallable(SboCallable&& other) noexcept : vtable_(other.vtable_) + { + if (vtable_) + { + vtable_->move_to(buffer_, other.buffer_); + other.vtable_ = nullptr; + } + } + + auto operator=(SboCallable&& other) noexcept -> SboCallable& + { + if (this != &other) + { + if (vtable_) + vtable_->destroy(buffer_); + vtable_ = other.vtable_; + if (vtable_) + { + vtable_->move_to(buffer_, other.buffer_); + other.vtable_ = nullptr; + } + } + return *this; + } + + SboCallable(SboCallable const&) = delete; + auto operator=(SboCallable const&) -> SboCallable& = delete; + + ~SboCallable() + { + if (vtable_) + vtable_->destroy(buffer_); + } + + explicit operator bool() const noexcept { return vtable_ != nullptr; } + + void operator()() + { + auto* vt = vtable_; + vtable_ = nullptr; + vt->invoke(buffer_); + vt->destroy(buffer_); + } + + private: + VTable const* vtable_ = nullptr; + alignas(std::max_align_t) unsigned char buffer_[kBufferSize]{}; +}; + } // namespace detail /** @@ -467,7 +619,7 @@ class HighPerformancePool using return_type = std::invoke_result_t; auto task = std::make_shared>( - std::bind(std::forward(f), std::forward(args)...)); + detail::bind_args(std::forward(f), std::forward(args)...)); std::future result = task->get_future(); @@ -515,6 +667,57 @@ class HighPerformancePool return std::move(result.value()); } + /** + * @brief Fire-and-forget submission (no future, no packaged_task overhead). + */ + template + void post(F&& f, Args&&... args) + { + auto r = try_post(std::forward(f), std::forward(args)...); + if (!r.has_value()) + throw std::runtime_error("HighPerformancePool is shutting down"); + } + + /** + * @brief Fire-and-forget submission. Returns error on shutdown. + */ + template + auto try_post(F&& f, Args&&... args) -> expected + { + Task bound(detail::bind_args(std::forward(f), std::forward(args)...)); + + if (stop_.load(std::memory_order_acquire)) + return unexpected(std::make_error_code(std::errc::operation_canceled)); + + size_t const preferred_queue = next_victim_.fetch_add(1, std::memory_order_relaxed) % num_threads_; + + if (worker_queues_[preferred_queue]->push(std::move(bound))) + { + wakeup_condition_.notify_one(); + return {}; + } + + for (size_t attempts = 0; attempts < (std::min)(num_threads_, size_t(3)); ++attempts) + { + size_t const idx = (preferred_queue + attempts + 1) % num_threads_; + if (worker_queues_[idx]->push(std::move(bound))) + { + wakeup_condition_.notify_one(); + return {}; + } + } + + { + std::lock_guard lock(overflow_mutex_); + if (stop_.load(std::memory_order_relaxed)) + return unexpected(std::make_error_code(std::errc::operation_canceled)); + overflow_tasks_.emplace(std::move(bound)); + } + + wakeup_condition_.notify_all(); + return {}; + } + #if __cpp_lib_jthread >= 201911L /** * @brief Submit a cancellable task. If stop is already requested the task @@ -524,10 +727,11 @@ class HighPerformancePool auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> { - return submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + return submit([token = std::move(token), + bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { if (token.stop_requested()) - return decltype(fn())(); - return fn(); + return std::invoke_result_t(); + return bound(); }); } @@ -538,10 +742,11 @@ class HighPerformancePool auto try_submit(std::stop_token token, F&& f, Args&&... args) -> expected>, std::error_code> { - return try_submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + return try_submit([token = std::move(token), + bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { if (token.stop_requested()) - return decltype(fn())(); - return fn(); + return std::invoke_result_t(); + return bound(); }); } #endif @@ -993,7 +1198,7 @@ class ThreadPoolBase using return_type = std::invoke_result_t; auto task = std::make_shared>( - std::bind(std::forward(f), std::forward(args)...)); + detail::bind_args(std::forward(f), std::forward(args)...)); std::future result = task->get_future(); @@ -1020,6 +1225,33 @@ class ThreadPoolBase return std::move(result.value()); } + /** + * @brief Fire-and-forget submission (no future, no packaged_task overhead). + */ + template + void post(F&& f, Args&&... args) + { + auto r = try_post(std::forward(f), std::forward(args)...); + if (!r.has_value()) + throw std::runtime_error("Pool is shutting down"); + } + + /** + * @brief Fire-and-forget submission. Returns error on shutdown. + */ + template + auto try_post(F&& f, Args&&... args) -> expected + { + { + std::lock_guard lock(queue_mutex_); + if (stop_) + return unexpected(std::make_error_code(std::errc::operation_canceled)); + tasks_.emplace(detail::bind_args(std::forward(f), std::forward(args)...)); + } + condition_.notify_one(); + return {}; + } + #if __cpp_lib_jthread >= 201911L /** * @brief Submit a cancellable task. If stop is already requested the task @@ -1029,10 +1261,11 @@ class ThreadPoolBase auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> { - return submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + return submit([token = std::move(token), + bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { if (token.stop_requested()) - return decltype(fn())(); - return fn(); + return std::invoke_result_t(); + return bound(); }); } @@ -1040,10 +1273,11 @@ class ThreadPoolBase auto try_submit(std::stop_token token, F&& f, Args&&... args) -> expected>, std::error_code> { - return try_submit([token = std::move(token), fn = std::bind(std::forward(f), std::forward(args)...)]() mutable { + return try_submit([token = std::move(token), + bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { if (token.stop_requested()) - return decltype(fn())(); - return fn(); + return std::invoke_result_t(); + return bound(); }); } #endif @@ -1379,6 +1613,217 @@ using ThreadPool = ThreadPoolBase; */ using FastThreadPool = ThreadPoolBase>; +// --------------------------------------------------------------------------- +// LightweightPoolT +// --------------------------------------------------------------------------- + +/** + * @brief Ultra-lightweight fire-and-forget thread pool. + * + * Uses a custom @ref detail::SboCallable instead of @c std::function to avoid + * heap allocations for callables up to @c TaskSize - 8 bytes. No futures, no + * packaged_task, no statistics, no tracing -- just raw throughput. + * + * Workers are @ref ThreadWrapper instances so that naming, affinity, and + * scheduling policy can still be configured after construction. + * + * @par API + * Only @c post() (fire-and-forget) is provided. For tasks that need a return + * value, use @ref ThreadPool or @ref HighPerformancePool with @c submit(). + * + * @tparam TaskSize Total size in bytes of each inline task slot (default 64, + * one x86 cache line). Usable buffer = @c TaskSize - 8 bytes. + */ +template +class LightweightPoolT +{ + public: + explicit LightweightPoolT(size_t num_threads = std::thread::hardware_concurrency()) + : num_threads_(num_threads == 0 ? 1 : num_threads) + { + workers_.reserve(num_threads_); + for (size_t i = 0; i < num_threads_; ++i) + workers_.emplace_back(&LightweightPoolT::worker_loop, this); + } + + LightweightPoolT(LightweightPoolT const&) = delete; + auto operator=(LightweightPoolT const&) -> LightweightPoolT& = delete; + + ~LightweightPoolT() { shutdown(ShutdownPolicy::drain); } + + /** + * @brief Fire-and-forget task submission. Throws on shutdown. + */ + template + void post(F&& f, Args&&... args) + { + auto r = try_post(std::forward(f), std::forward(args)...); + if (!r.has_value()) + throw std::runtime_error("LightweightPool is shutting down"); + } + + /** + * @brief Fire-and-forget task submission. Returns error on shutdown. + */ + template + auto try_post(F&& f, Args&&... args) -> expected + { + detail::SboCallable task(detail::bind_args(std::forward(f), std::forward(args)...)); + { + std::lock_guard lock(mutex_); + if (stop_) + return unexpected(std::make_error_code(std::errc::operation_canceled)); + tasks_.push(std::move(task)); + } + condition_.notify_one(); + return {}; + } + + /** + * @brief Batch fire-and-forget submission under a single lock. + */ + template + void post_batch(Iterator begin, Iterator end) + { + auto r = try_post_batch(begin, end); + if (!r.has_value()) + throw std::runtime_error("LightweightPool is shutting down"); + } + + /** + * @brief Batch fire-and-forget submission. Returns error on shutdown. + */ + template + auto try_post_batch(Iterator begin, Iterator end) -> expected + { + { + std::lock_guard lock(mutex_); + if (stop_) + return unexpected(std::make_error_code(std::errc::operation_canceled)); + for (auto it = begin; it != end; ++it) + tasks_.push(detail::SboCallable(*it)); + } + condition_.notify_all(); + return {}; + } + +#if __cpp_lib_ranges >= 201911L + template + void post_batch(R&& range) { post_batch(std::ranges::begin(range), std::ranges::end(range)); } + + template + auto try_post_batch(R&& range) { return try_post_batch(std::ranges::begin(range), std::ranges::end(range)); } +#endif + + /** + * @brief Shut the pool down. + */ + void shutdown(ShutdownPolicy policy = ShutdownPolicy::drain) + { + { + std::lock_guard lock(mutex_); + if (stop_) + return; + stop_ = true; + if (policy == ShutdownPolicy::drop_pending) + { + std::queue> empty; + tasks_.swap(empty); + } + } + condition_.notify_all(); + for (auto& w : workers_) + { + if (w.joinable()) + w.join(); + } + workers_.clear(); + } + + /** + * @brief Timed drain: finish as many tasks as possible within timeout. + * @return @c true if all tasks completed, @c false on timeout. + */ + auto shutdown_for(std::chrono::milliseconds timeout) -> bool + { + auto const deadline = std::chrono::steady_clock::now() + timeout; + { + std::lock_guard lock(mutex_); + if (stop_) + return true; + } + std::unique_lock lock(mutex_); + bool const drained = drain_condition_.wait_until(lock, deadline, [this] { + return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; + }); + lock.unlock(); + shutdown(ShutdownPolicy::drain); + return drained; + } + + [[nodiscard]] auto size() const noexcept -> size_t { return num_threads_; } + + auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, + ThreadPriority priority = ThreadPriority::normal()) -> expected + { + return detail::configure_worker_threads(workers_, name_prefix, policy, priority); + } + + auto set_affinity(ThreadAffinity const& affinity) -> expected + { + return detail::set_worker_affinity(workers_, affinity); + } + + auto distribute_across_cpus() -> expected + { + return detail::distribute_workers_across_cpus(workers_); + } + + private: + size_t num_threads_; + std::vector workers_; + std::queue> tasks_; + std::mutex mutex_; + std::condition_variable condition_; + std::condition_variable drain_condition_; + std::atomic stop_{false}; + std::atomic active_tasks_{0}; + + void worker_loop() + { + while (true) + { + detail::SboCallable task; + { + std::unique_lock lock(mutex_); + condition_.wait(lock, [this] { return stop_ || !tasks_.empty(); }); + if (stop_ && tasks_.empty()) + return; + if (!tasks_.empty()) + { + task = std::move(tasks_.front()); + tasks_.pop(); + active_tasks_.fetch_add(1, std::memory_order_relaxed); + } + else + continue; + } + try + { + task(); + } + catch (...) + { + } + active_tasks_.fetch_sub(1, std::memory_order_relaxed); + drain_condition_.notify_all(); + } + } +}; + +/** @brief Default lightweight pool with 64-byte task slots. */ +using LightweightPool = LightweightPoolT<>; + // --------------------------------------------------------------------------- // GlobalPool // --------------------------------------------------------------------------- @@ -1443,6 +1888,18 @@ class GlobalPool return instance().try_submit(std::forward(f), std::forward(args)...); } + template + static void post(F&& f, Args&&... args) + { + instance().post(std::forward(f), std::forward(args)...); + } + + template + static auto try_post(F&& f, Args&&... args) + { + return instance().try_post(std::forward(f), std::forward(args)...); + } + template static auto submit_batch(Iterator begin, Iterator end) { diff --git a/include/threadschedule/threadschedule.hpp b/include/threadschedule/threadschedule.hpp index 4896b12..48c509e 100644 --- a/include/threadschedule/threadschedule.hpp +++ b/include/threadschedule/threadschedule.hpp @@ -62,10 +62,13 @@ using ts::GlobalThreadPool; using ts::HighPerformancePool; using ts::HighPerformancePoolWithErrors; using ts::JThreadWrapper; +using ts::LightweightPool; +using ts::LightweightPoolT; using ts::JThreadWrapperView; using ts::read_topology; using ts::ScheduledFastThreadPool; using ts::ScheduledHighPerformancePool; +using ts::ScheduledLightweightPool; using ts::ScheduledTaskHandle; using ts::ScheduledThreadPool; using ts::ScheduledThreadPoolT; From 56e79a32e621d8f85da3db8aa945d1510cccb314 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 17:57:30 +0200 Subject: [PATCH 06/15] Fix buffer initialization in SboCallable to ensure proper alignment without default initialization. Added missing include for to support size-related definitions. --- include/threadschedule/thread_pool.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index b1caefd..1b9d443 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -6,6 +6,7 @@ #include "thread_wrapper.hpp" #include #include +#include #include #include #include @@ -256,7 +257,7 @@ class SboCallable private: VTable const* vtable_ = nullptr; - alignas(std::max_align_t) unsigned char buffer_[kBufferSize]{}; + alignas(std::max_align_t) unsigned char buffer_[kBufferSize]; }; } // namespace detail From 2ab3492a0d3f0fa865d4f971face7971428089aa Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 20:27:48 +0200 Subject: [PATCH 07/15] Enhance benchmark tests and documentation for LightweightPool - Added new benchmarks for `LightweightPool` to evaluate performance in fire-and-forget scenarios, including minimal tasks, light tasks, and batch posting. - Updated `run_benchmarks.sh` to include guidance on using `LightweightPool` for low-overhead task execution. - Modified `CMakeLists.txt` to conditionally link the performance benchmark with Google Benchmark. - Refactored `performance_benchmark.cpp` to streamline task submission and improve clarity in benchmark results. - Updated CHANGELOG to reflect the addition of `LightweightPool` benchmarks and related enhancements. --- benchmarks/threadpool_benchmarks.cpp | 264 +++++++++++++++++++---- examples/CMakeLists.txt | 8 +- examples/performance_benchmark.cpp | 285 +++++++++---------------- include/threadschedule/thread_pool.hpp | 159 +++++++------- run_benchmarks.sh | 1 + 5 files changed, 420 insertions(+), 297 deletions(-) diff --git a/benchmarks/threadpool_benchmarks.cpp b/benchmarks/threadpool_benchmarks.cpp index 488e371..71ceb53 100644 --- a/benchmarks/threadpool_benchmarks.cpp +++ b/benchmarks/threadpool_benchmarks.cpp @@ -284,82 +284,220 @@ static void BM_HighPerformancePool_ParallelForEach(benchmark::State& state) state.SetLabel("threads=" + std::to_string(num_threads) + " items=" + std::to_string(data_size)); } +// ============================================================================= +// LightweightPool Benchmarks (fire-and-forget via post) +// ============================================================================= + +static void BM_LightweightPool_MinimalTasks(benchmark::State& state) +{ + size_t const num_threads = state.range(0); + size_t const num_tasks = state.range(1); + + LightweightPool pool(num_threads); + pool.configure_threads("bench"); + + for (auto _ : state) + { + std::atomic counter{0}; + + auto start = std::chrono::high_resolution_clock::now(); + + for (size_t i = 0; i < num_tasks; ++i) + { + pool.post([&counter]() { counter.fetch_add(1, std::memory_order_relaxed); }); + } + + while (counter.load(std::memory_order_acquire) < num_tasks) + std::this_thread::yield(); + + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(end - start); + state.SetIterationTime(elapsed.count() / 1e9); + } + + state.SetItemsProcessed(state.iterations() * num_tasks); + state.SetLabel("threads=" + std::to_string(num_threads) + " tasks=" + std::to_string(num_tasks)); +} + +static void BM_LightweightPool_LightTasks(benchmark::State& state) +{ + size_t const num_threads = state.range(0); + size_t const num_tasks = state.range(1); + + LightweightPool pool(num_threads); + pool.configure_threads("bench"); + + for (auto _ : state) + { + std::atomic counter{0}; + + for (size_t i = 0; i < num_tasks; ++i) + { + pool.post([&counter]() { + BenchmarkWorkloads::light_cpu_task(); + counter.fetch_add(1, std::memory_order_relaxed); + }); + } + + while (counter.load(std::memory_order_acquire) < num_tasks) + std::this_thread::yield(); + } + + state.SetItemsProcessed(state.iterations() * num_tasks); + state.SetLabel("threads=" + std::to_string(num_threads)); +} + +static void BM_LightweightPool_BatchPost(benchmark::State& state) +{ + size_t const num_threads = state.range(0); + size_t const batch_size = state.range(1); + + LightweightPool pool(num_threads); + pool.configure_threads("bench"); + + std::vector> tasks; + tasks.reserve(batch_size); + std::atomic counter{0}; + for (size_t i = 0; i < batch_size; ++i) + { + tasks.emplace_back([&counter]() { + BenchmarkWorkloads::light_cpu_task(); + counter.fetch_add(1, std::memory_order_relaxed); + }); + } + + for (auto _ : state) + { + counter = 0; + pool.post_batch(tasks.begin(), tasks.end()); + + while (counter.load(std::memory_order_acquire) < batch_size) + std::this_thread::yield(); + } + + state.SetItemsProcessed(state.iterations() * batch_size); + state.SetLabel("threads=" + std::to_string(num_threads) + " batch=" + std::to_string(batch_size)); +} + // ============================================================================= // Comparison Benchmarks (All Pools) // ============================================================================= // Note: This benchmark shows workload-dependent behavior: -// - For small task counts (< 100), simpler pools (ThreadPool/FastThreadPool) perform better +// - LightweightPool excels for fire-and-forget (no future overhead) +// - For small task counts (< 100), simpler pools perform better // - For larger task counts (1k+), HighPerformancePool shows its advantage due to work-stealing -// - Real-world workloads typically benefit from HighPerformancePool (e.g., image processing) static void BM_ComparePoolTypes_LightWorkload(benchmark::State& state) { - size_t const num_threads = 4; // Fixed for fair comparison + size_t const num_threads = 4; size_t const num_tasks = state.range(0); - int const pool_type = state.range(1); // 0=ThreadPool, 1=FastThreadPool, 2=HighPerformancePool + int const pool_type = state.range(1); for (auto _ : state) { - state.PauseTiming(); - - std::vector> futures; - futures.reserve(num_tasks); - - state.ResumeTiming(); - if (pool_type == 0) { ThreadPool pool(num_threads); pool.configure_threads("bench"); + std::vector> futures; + futures.reserve(num_tasks); for (size_t i = 0; i < num_tasks; ++i) - { futures.push_back(pool.submit(BenchmarkWorkloads::light_cpu_task)); - } - - for (auto& future : futures) - { - future.wait(); - } + for (auto& f : futures) + f.wait(); } else if (pool_type == 1) { FastThreadPool pool(num_threads); pool.configure_threads("bench"); + std::vector> futures; + futures.reserve(num_tasks); for (size_t i = 0; i < num_tasks; ++i) - { futures.push_back(pool.submit(BenchmarkWorkloads::light_cpu_task)); - } - - for (auto& future : futures) - { - future.wait(); - } + for (auto& f : futures) + f.wait(); } else if (pool_type == 2) { HighPerformancePool pool(num_threads); pool.configure_threads("bench"); pool.distribute_across_cpus(); + std::vector> futures; + futures.reserve(num_tasks); for (size_t i = 0; i < num_tasks; ++i) - { futures.push_back(pool.submit(BenchmarkWorkloads::light_cpu_task)); - } + for (auto& f : futures) + f.wait(); + } + else if (pool_type == 3) + { + LightweightPool pool(num_threads); + pool.configure_threads("bench"); + std::atomic counter{0}; - for (auto& future : futures) + for (size_t i = 0; i < num_tasks; ++i) { - future.wait(); + pool.post([&counter]() { + BenchmarkWorkloads::light_cpu_task(); + counter.fetch_add(1, std::memory_order_relaxed); + }); } + + while (counter.load(std::memory_order_acquire) < num_tasks) + std::this_thread::yield(); } } - std::vector pool_names = {"ThreadPool", "FastThreadPool", "HighPerformancePool"}; + std::vector pool_names = {"ThreadPool", "FastThreadPool", "HighPerformancePool", "LightweightPool"}; state.SetItemsProcessed(state.iterations() * num_tasks); state.SetLabel(pool_names[pool_type] + " tasks=" + std::to_string(num_tasks)); } +// ============================================================================= +// Post vs Submit comparison (fire-and-forget overhead on pools that support both) +// ============================================================================= + +static void BM_PostVsSubmit(benchmark::State& state) +{ + size_t const num_tasks = state.range(0); + int const mode = state.range(1); + + HighPerformancePool pool(4); + pool.configure_threads("bench"); + + for (auto _ : state) + { + if (mode == 0) + { + std::vector> futures; + futures.reserve(num_tasks); + for (size_t i = 0; i < num_tasks; ++i) + futures.push_back(pool.submit(BenchmarkWorkloads::minimal_task)); + for (auto& f : futures) + f.wait(); + } + else + { + std::atomic counter{0}; + for (size_t i = 0; i < num_tasks; ++i) + { + pool.post([&counter]() { + BenchmarkWorkloads::minimal_task(); + counter.fetch_add(1, std::memory_order_relaxed); + }); + } + while (counter.load(std::memory_order_acquire) < num_tasks) + std::this_thread::yield(); + } + } + + state.SetItemsProcessed(state.iterations() * num_tasks); + state.SetLabel(mode == 0 ? "submit(future)" : "post(fire-forget)"); +} + // ============================================================================= // Registration with various parameter combinations // ============================================================================= @@ -462,23 +600,77 @@ BENCHMARK(BM_HighPerformancePool_ParallelForEach) ->Args({16, 1000000}) ->Unit(benchmark::kMillisecond); -// Pool comparison benchmarks - showing workload-dependent behavior +// LightweightPool benchmarks +BENCHMARK(BM_LightweightPool_MinimalTasks) + ->Args({1, 100}) + ->Args({2, 100}) + ->Args({4, 100}) + ->Args({8, 100}) + ->Args({1, 1000}) + ->Args({2, 1000}) + ->Args({4, 1000}) + ->Args({8, 1000}) + ->Args({1, 10000}) + ->Args({4, 10000}) + ->Args({8, 10000}) + ->Args({4, 100000}) + ->Args({8, 100000}) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond); + +BENCHMARK(BM_LightweightPool_LightTasks) + ->Args({1, 100}) + ->Args({2, 100}) + ->Args({4, 100}) + ->Args({8, 100}) + ->Args({1, 1000}) + ->Args({4, 1000}) + ->Args({8, 1000}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK(BM_LightweightPool_BatchPost) + ->Args({1, 1000}) + ->Args({2, 1000}) + ->Args({4, 1000}) + ->Args({8, 1000}) + ->Args({4, 5000}) + ->Args({8, 5000}) + ->Args({4, 10000}) + ->Args({8, 10000}) + ->Unit(benchmark::kMillisecond); + +// Pool comparison benchmarks (all 4 pool types) BENCHMARK(BM_ComparePoolTypes_LightWorkload) ->Args({10, 0}) ->Args({10, 1}) - ->Args({10, 2}) // Very small tasks (ThreadPool/FastThreadPool advantage) + ->Args({10, 2}) + ->Args({10, 3}) ->Args({100, 0}) ->Args({100, 1}) - ->Args({100, 2}) // Small tasks (ThreadPool/FastThreadPool advantage) + ->Args({100, 2}) + ->Args({100, 3}) + ->Args({1000, 0}) + ->Args({1000, 1}) + ->Args({1000, 2}) + ->Args({1000, 3}) + ->Args({10000, 0}) + ->Args({10000, 1}) + ->Args({10000, 2}) + ->Args({10000, 3}) + ->Args({100000, 0}) + ->Args({100000, 1}) + ->Args({100000, 2}) + ->Args({100000, 3}) + ->Unit(benchmark::kMillisecond); + +// Post vs Submit overhead comparison +BENCHMARK(BM_PostVsSubmit) ->Args({1000, 0}) ->Args({1000, 1}) - ->Args({1000, 2}) // Medium tasks (mixed performance) ->Args({10000, 0}) ->Args({10000, 1}) - ->Args({10000, 2}) // Large tasks (HighPerformancePool advantage) ->Args({100000, 0}) ->Args({100000, 1}) - ->Args({100000, 2}) // Very large tasks (HighPerformancePool clear advantage) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 7f531b8..80e8c7e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,9 +4,11 @@ add_executable(basic_example basic_example.cpp) target_link_libraries(basic_example ThreadSchedule::ThreadSchedule) -# Performance benchmark for high-performance ThreadPool -add_executable(performance_benchmark performance_benchmark.cpp) -target_link_libraries(performance_benchmark ThreadSchedule::ThreadSchedule) +# Performance benchmark for high-performance ThreadPool (requires Google Benchmark) +if(TARGET benchmark::benchmark) + add_executable(performance_benchmark performance_benchmark.cpp) + target_link_libraries(performance_benchmark ThreadSchedule::ThreadSchedule benchmark::benchmark) +endif() # Thread registry examples add_executable(registry_example registry_example.cpp) diff --git a/examples/performance_benchmark.cpp b/examples/performance_benchmark.cpp index 96d031b..2b5ec18 100644 --- a/examples/performance_benchmark.cpp +++ b/examples/performance_benchmark.cpp @@ -1,243 +1,156 @@ -#include #include -#include -#include -#include +#include #include #include -#include #include #include using namespace threadschedule; -class PerformanceBenchmark -{ - private: - HighPerformancePool pool_; - std::atomic completed_tasks_{0}; - std::atomic total_time_us_{0}; - - public: - explicit PerformanceBenchmark(size_t num_threads = std::thread::hardware_concurrency()) : pool_(num_threads) - { - pool_.configure_threads("bench", SchedulingPolicy::OTHER, ThreadPriority::normal()); - pool_.distribute_across_cpus(); - } +// ============================================================================= +// HighPerformancePool submission throughput (submit with futures) +// ============================================================================= - // Benchmark pure task submission/completion throughput - void benchmark_throughput(size_t num_tasks, std::string const& test_name) - { - std::cout << "\n=== " << test_name << " ===" << std::endl; - std::cout << "Tasks: " << num_tasks << ", Threads: " << pool_.size() << std::endl; +static void BM_HPPool_Throughput(benchmark::State& state) +{ + auto const num_tasks = static_cast(state.range(0)); - completed_tasks_ = 0; + HighPerformancePool pool(std::thread::hardware_concurrency()); + pool.configure_threads("bench", SchedulingPolicy::OTHER, ThreadPriority::normal()); + pool.distribute_across_cpus(); - auto start_time = std::chrono::high_resolution_clock::now(); + std::atomic completed{0}; + for (auto _ : state) + { + completed = 0; std::vector> futures; futures.reserve(num_tasks); - // Submit tasks as fast as possible for (size_t i = 0; i < num_tasks; ++i) - { - futures.push_back(pool_.submit([this]() { - // Minimal work to measure pure overhead - completed_tasks_.fetch_add(1, std::memory_order_relaxed); + futures.push_back(pool.submit([&completed]() { + completed.fetch_add(1, std::memory_order_relaxed); })); - } - - // Wait for completion - for (auto& future : futures) - { - future.wait(); - } - - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); - - double tasks_per_second = (static_cast(num_tasks) * 1000000.0) / duration.count(); - double avg_task_time_us = static_cast(duration.count()) / num_tasks; - std::cout << "Duration: " << duration.count() << "μs" << std::endl; - std::cout << "Throughput: " << std::fixed << std::setprecision(0) << tasks_per_second << " tasks/second" - << std::endl; - std::cout << "Avg task time: " << std::fixed << std::setprecision(2) << avg_task_time_us << "μs" << std::endl; + for (auto& f : futures) + f.wait(); - auto stats = pool_.get_statistics(); - std::cout << "Work stealing: " << stats.stolen_tasks << " (" << std::fixed << std::setprecision(1) - << (100.0 * stats.stolen_tasks / stats.completed_tasks) << "%)" << std::endl; + benchmark::DoNotOptimize(completed.load()); } - // Benchmark batch processing - void benchmark_batch_processing(size_t batch_size) - { - std::cout << "\n=== Batch Processing Benchmark ===" << std::endl; - std::cout << "Batch size: " << batch_size << std::endl; + auto stats = pool.get_statistics(); + state.counters["steal_%"] = 100.0 * stats.stolen_tasks / std::max(stats.completed_tasks, size_t(1)); + state.SetItemsProcessed(state.iterations() * static_cast(num_tasks)); +} - std::vector> tasks; - tasks.reserve(batch_size); +BENCHMARK(BM_HPPool_Throughput) + ->Arg(1000)->Arg(10000)->Arg(100000) + ->Unit(benchmark::kMicrosecond); - std::atomic counter{0}; +// ============================================================================= +// HighPerformancePool batch processing +// ============================================================================= - for (size_t i = 0; i < batch_size; ++i) - { - tasks.emplace_back([&counter]() { counter.fetch_add(1, std::memory_order_relaxed); }); - } +static void BM_HPPool_Batch(benchmark::State& state) +{ + auto const batch_size = static_cast(state.range(0)); - auto start_time = std::chrono::high_resolution_clock::now(); + HighPerformancePool pool(std::thread::hardware_concurrency()); + pool.configure_threads("bench", SchedulingPolicy::OTHER, ThreadPriority::normal()); + pool.distribute_across_cpus(); - auto futures = pool_.submit_batch(tasks.begin(), tasks.end()); + std::atomic counter{0}; + std::vector> tasks; + tasks.reserve(batch_size); + for (size_t i = 0; i < batch_size; ++i) + tasks.emplace_back([&counter]() { counter.fetch_add(1, std::memory_order_relaxed); }); - for (auto& future : futures) - { - future.wait(); - } + for (auto _ : state) + { + auto futures = pool.submit_batch(tasks.begin(), tasks.end()); + for (auto& f : futures) + f.wait(); + } - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); + state.SetItemsProcessed(state.iterations() * static_cast(batch_size)); +} - double tasks_per_second = (static_cast(batch_size) * 1000000.0) / duration.count(); +BENCHMARK(BM_HPPool_Batch) + ->Arg(5000)->Arg(50000) + ->Unit(benchmark::kMillisecond); - std::cout << "Batch duration: " << duration.count() << "μs" << std::endl; - std::cout << "Batch throughput: " << std::fixed << std::setprecision(0) << tasks_per_second << " tasks/second" - << std::endl; - std::cout << "Completed: " << counter.load() << std::endl; - } +// ============================================================================= +// HighPerformancePool variable workload (simulating real tasks) +// ============================================================================= - // Benchmark with variable task durations (simulating real workloads) - void benchmark_variable_workload(size_t num_tasks) - { - std::cout << "\n=== Variable Workload Benchmark ===" << std::endl; - std::cout << "Tasks: " << num_tasks << " (variable duration)" << std::endl; +static void BM_HPPool_VariableWorkload(benchmark::State& state) +{ + auto const num_tasks = static_cast(state.range(0)); - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution work_dist(10, 200); // 10-200 iterations + HighPerformancePool pool(std::thread::hardware_concurrency()); + pool.configure_threads("bench", SchedulingPolicy::OTHER, ThreadPriority::normal()); + pool.distribute_across_cpus(); - auto start_time = std::chrono::high_resolution_clock::now(); + std::mt19937 gen(42); + std::uniform_int_distribution work_dist(10, 200); + std::vector work_amounts(num_tasks); + for (auto& w : work_amounts) + w = work_dist(gen); + for (auto _ : state) + { std::vector> futures; futures.reserve(num_tasks); for (size_t i = 0; i < num_tasks; ++i) { - int work_amount = work_dist(gen); - futures.push_back(pool_.submit([work_amount]() { - // Variable amount of work + int const amount = work_amounts[i]; + futures.push_back(pool.submit([amount]() { volatile int x = 0; - for (int j = 0; j < work_amount; ++j) - { + for (int j = 0; j < amount; ++j) x += j * j; - } })); } - for (auto& future : futures) - { - future.wait(); - } - - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); - - double tasks_per_second = (static_cast(num_tasks) * 1000.0) / duration.count(); - - std::cout << "Variable workload duration: " << duration.count() << "ms" << std::endl; - std::cout << "Variable workload throughput: " << std::fixed << std::setprecision(0) << tasks_per_second - << " tasks/second" << std::endl; - - auto stats = pool_.get_statistics(); - std::cout << "Work stealing efficiency: " << std::fixed << std::setprecision(1) - << (100.0 * stats.stolen_tasks / stats.completed_tasks) << "%" << std::endl; + for (auto& f : futures) + f.wait(); } - // Benchmark parallel algorithms - void benchmark_parallel_algorithm() - { - std::cout << "\n=== Parallel Algorithm Benchmark ===" << std::endl; - - size_t const data_size = 10000000; // 10M elements - std::vector data(data_size); - - // Fill with test data - std::iota(data.begin(), data.end(), 1); - - std::atomic sum{0}; + state.SetItemsProcessed(state.iterations() * static_cast(num_tasks)); +} - auto start_time = std::chrono::high_resolution_clock::now(); +BENCHMARK(BM_HPPool_VariableWorkload) + ->Arg(1000)->Arg(25000) + ->Unit(benchmark::kMillisecond); - pool_.parallel_for_each(data.begin(), data.end(), - [&sum](int value) { sum.fetch_add(value * value, std::memory_order_relaxed); }); +// ============================================================================= +// HighPerformancePool parallel_for_each +// ============================================================================= - auto end_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(end_time - start_time); +static void BM_HPPool_ParallelForEach(benchmark::State& state) +{ + auto const data_size = static_cast(state.range(0)); - double items_per_second = (static_cast(data_size) * 1000.0) / duration.count(); + HighPerformancePool pool(std::thread::hardware_concurrency()); + pool.configure_threads("bench", SchedulingPolicy::OTHER, ThreadPriority::normal()); + pool.distribute_across_cpus(); - std::cout << "Parallel algorithm: " << data_size << " items in " << duration.count() << "ms" << std::endl; - std::cout << "Processing rate: " << std::fixed << std::setprecision(0) << items_per_second << " items/second" - << std::endl; - std::cout << "Sum: " << sum.load() << std::endl; - } + std::vector data(data_size); + std::iota(data.begin(), data.end(), 1); - void print_system_info() + for (auto _ : state) { - std::cout << "\n=== System Information ===" << std::endl; - std::cout << "Hardware threads: " << std::thread::hardware_concurrency() << std::endl; - std::cout << "Pool threads: " << pool_.size() << std::endl; - - auto current_policy = ThreadInfo::get_current_policy(); - if (current_policy) - { - std::cout << "Current scheduling policy: " << to_string(*current_policy) << std::endl; - } - - auto nice_value = ThreadWrapper::get_nice_value(); - if (nice_value) - { - std::cout << "Process nice value: " << *nice_value << std::endl; - } + std::atomic sum{0}; + pool.parallel_for_each(data.begin(), data.end(), + [&sum](int v) { sum.fetch_add(v * v, std::memory_order_relaxed); }); + benchmark::DoNotOptimize(sum.load()); } -}; -int main() -{ - std::cout << "ThreadSchedule High-Performance ThreadPool Benchmark" << std::endl; - std::cout << "=====================================================" << std::endl; + state.SetItemsProcessed(state.iterations() * static_cast(data_size)); +} - try - { - PerformanceBenchmark benchmark; - - benchmark.print_system_info(); - - // Test different scales - benchmark.benchmark_throughput(1000, "Light Load (1K tasks)"); - benchmark.benchmark_throughput(10000, "Medium Load (10K tasks)"); - benchmark.benchmark_throughput(100000, "Heavy Load (100K tasks)"); - - benchmark.benchmark_batch_processing(50000); - benchmark.benchmark_variable_workload(25000); - benchmark.benchmark_parallel_algorithm(); - - std::cout << "\n=== Performance Summary ===" << std::endl; - std::cout << "The optimized ThreadPool achieves:" << std::endl; - std::cout << "- 100K+ tasks/second for minimal tasks" << std::endl; - std::cout << "- Efficient work stealing with < 20% stealing ratio" << std::endl; - std::cout << "- Low overhead batch processing" << std::endl; - std::cout << "- Scalable parallel algorithms" << std::endl; - std::cout << "\nFor 10K+ tasks/second workloads:" << std::endl; - std::cout << "- Use batch submission when possible" << std::endl; - std::cout << "- Keep tasks < 100μs duration" << std::endl; - std::cout << "- Monitor work stealing ratio" << std::endl; - std::cout << "- Configure CPU affinity for CPU-bound work" << std::endl; - } - catch (std::exception const& e) - { - std::cerr << "Benchmark failed: " << e.what() << std::endl; - return 1; - } +BENCHMARK(BM_HPPool_ParallelForEach) + ->Arg(100000)->Arg(1000000)->Arg(10000000) + ->Unit(benchmark::kMillisecond); - return 0; -} +BENCHMARK_MAIN(); diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index 1b9d443..48cbcee 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -6,10 +6,10 @@ #include "thread_wrapper.hpp" #include #include -#include -#include #include #include +#include +#include #include #include #include @@ -19,7 +19,7 @@ #include #if __cpp_lib_ranges >= 201911L -#include +# include #endif namespace threadschedule @@ -124,12 +124,9 @@ template auto bind_args(F&& f, Args&&... args) { #if __cpp_init_captures >= 201803L - return [fn = std::forward(f), ...a = std::forward(args)]() mutable { - return fn(std::move(a)...); - }; + return [fn = std::forward(f), ... a = std::forward(args)]() mutable { return fn(std::move(a)...); }; #else - return [fn = std::forward(f), - tup = std::make_tuple(std::forward(args)...)]() mutable { + return [fn = std::forward(f), tup = std::make_tuple(std::forward(args)...)]() mutable { return std::apply(std::move(fn), std::move(tup)); }; #endif @@ -166,33 +163,29 @@ class SboCallable template static constexpr bool fits_inline_v = - sizeof(F) <= kBufferSize && - alignof(F) <= alignof(std::max_align_t) && - std::is_nothrow_move_constructible_v; + sizeof(F) <= kBufferSize && alignof(F) <= alignof(std::max_align_t) && std::is_nothrow_move_constructible_v; template static VTable const* vtable_for() noexcept { if constexpr (fits_inline_v) { - static constexpr VTable vt{ - [](void* s) { (*static_cast(s))(); }, - [](void* s) { static_cast(s)->~F(); }, - [](void* dst, void* src) noexcept { - ::new (dst) F(std::move(*static_cast(src))); - static_cast(src)->~F(); - }}; + static constexpr VTable vt{[](void* s) { (*static_cast(s))(); }, + [](void* s) { static_cast(s)->~F(); }, + [](void* dst, void* src) noexcept { + ::new (dst) F(std::move(*static_cast(src))); + static_cast(src)->~F(); + }}; return &vt; } else { - static constexpr VTable vt{ - [](void* s) { (*(*static_cast(s)))(); }, - [](void* s) { delete *static_cast(s); }, - [](void* dst, void* src) noexcept { - *static_cast(dst) = *static_cast(src); - *static_cast(src) = nullptr; - }}; + static constexpr VTable vt{[](void* s) { (*(*static_cast(s)))(); }, + [](void* s) { delete *static_cast(s); }, + [](void* dst, void* src) noexcept { + *static_cast(dst) = *static_cast(src); + *static_cast(src) = nullptr; + }}; return &vt; } } @@ -245,7 +238,10 @@ class SboCallable vtable_->destroy(buffer_); } - explicit operator bool() const noexcept { return vtable_ != nullptr; } + explicit operator bool() const noexcept + { + return vtable_ != nullptr; + } void operator()() { @@ -297,8 +293,8 @@ class SboCallable using TaskStartCallback = std::function; /// Callback invoked when a pool worker finishes executing a task. -using TaskEndCallback = std::function; +using TaskEndCallback = + std::function; template class WorkStealingDeque @@ -418,6 +414,15 @@ class WorkStealingDeque } }; +/** + * @brief Controls how a pool handles pending tasks during shutdown. + */ +enum class ShutdownPolicy : uint8_t +{ + drain, ///< Finish all queued tasks before stopping (default). + drop_pending ///< Finish running tasks, discard queued ones. +}; + /** * @brief High-performance thread pool optimized for high-frequency task submission. * @@ -498,16 +503,6 @@ class WorkStealingDeque * work-stealing complexity. Best for high-throughput scenarios like * image processing, batch operations, etc. */ - -/** - * @brief Controls how a pool handles pending tasks during shutdown. - */ -enum class ShutdownPolicy : uint8_t -{ - drain, ///< Finish all queued tasks before stopping (default). - drop_pending ///< Finish running tasks, discard queued ones. -}; - class HighPerformancePool { public: @@ -527,8 +522,8 @@ class HighPerformancePool explicit HighPerformancePool(size_t num_threads = std::thread::hardware_concurrency(), size_t deque_capacity = WorkStealingDeque::DEFAULT_CAPACITY, bool register_workers = false) - : num_threads_(num_threads == 0 ? 1 : num_threads), register_workers_(register_workers), - stop_(false), next_victim_(0), start_time_(std::chrono::steady_clock::now()) + : num_threads_(num_threads == 0 ? 1 : num_threads), register_workers_(register_workers), stop_(false), + next_victim_(0), start_time_(std::chrono::steady_clock::now()) { worker_queues_.resize(num_threads_); for (size_t i = 0; i < num_threads_; ++i) @@ -614,8 +609,7 @@ class HighPerformancePool * @brief Submit a task, returning an error instead of throwing on shutdown. */ template - auto try_submit(F&& f, Args&&... args) - -> expected>, std::error_code> + auto try_submit(F&& f, Args&&... args) -> expected>, std::error_code> { using return_type = std::invoke_result_t; @@ -725,8 +719,7 @@ class HighPerformancePool * is skipped and the future throws @c std::future_error (broken_promise). */ template - auto submit(std::stop_token token, F&& f, Args&&... args) - -> std::future> + auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> { return submit([token = std::move(token), bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { @@ -756,8 +749,7 @@ class HighPerformancePool * @brief Batch task submission, returning an error instead of throwing on shutdown. */ template - auto try_submit_batch(Iterator begin, Iterator end) - -> expected>, std::error_code> + auto try_submit_batch(Iterator begin, Iterator end) -> expected>, std::error_code> { std::vector> futures; size_t const batch_size = std::distance(begin, end); @@ -818,10 +810,16 @@ class HighPerformancePool #if __cpp_lib_ranges >= 201911L template - auto submit_batch(R&& range) { return submit_batch(std::ranges::begin(range), std::ranges::end(range)); } + auto submit_batch(R&& range) + { + return submit_batch(std::ranges::begin(range), std::ranges::end(range)); + } template - auto try_submit_batch(R&& range) { return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); } + auto try_submit_batch(R&& range) + { + return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); + } template void parallel_for_each(R&& range, F&& func) @@ -1167,10 +1165,8 @@ class ThreadPoolBase std::chrono::microseconds avg_task_time; }; - explicit ThreadPoolBase(size_t num_threads = std::thread::hardware_concurrency(), - bool register_workers = false) - : num_threads_(num_threads == 0 ? 1 : num_threads), - register_workers_(register_workers), stop_(false), + explicit ThreadPoolBase(size_t num_threads = std::thread::hardware_concurrency(), bool register_workers = false) + : num_threads_(num_threads == 0 ? 1 : num_threads), register_workers_(register_workers), stop_(false), start_time_(std::chrono::steady_clock::now()) { workers_.reserve(num_threads_); @@ -1193,8 +1189,7 @@ class ThreadPoolBase * @brief Submit a task, returning an error instead of throwing on shutdown. */ template - auto try_submit(F&& f, Args&&... args) - -> expected>, std::error_code> + auto try_submit(F&& f, Args&&... args) -> expected>, std::error_code> { using return_type = std::invoke_result_t; @@ -1259,8 +1254,7 @@ class ThreadPoolBase * is skipped and returns a default-constructed result. */ template - auto submit(std::stop_token token, F&& f, Args&&... args) - -> std::future> + auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> { return submit([token = std::move(token), bound = detail::bind_args(std::forward(f), std::forward(args)...)]() mutable { @@ -1287,8 +1281,7 @@ class ThreadPoolBase * @brief Submit multiple tasks, returning an error instead of throwing on shutdown. */ template - auto try_submit_batch(Iterator begin, Iterator end) - -> expected>, std::error_code> + auto try_submit_batch(Iterator begin, Iterator end) -> expected>, std::error_code> { std::vector> futures; futures.reserve(std::distance(begin, end)); @@ -1333,10 +1326,16 @@ class ThreadPoolBase #if __cpp_lib_ranges >= 201911L template - auto submit_batch(R&& range) { return submit_batch(std::ranges::begin(range), std::ranges::end(range)); } + auto submit_batch(R&& range) + { + return submit_batch(std::ranges::begin(range), std::ranges::end(range)); + } template - auto try_submit_batch(R&& range) { return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); } + auto try_submit_batch(R&& range) + { + return try_submit_batch(std::ranges::begin(range), std::ranges::end(range)); + } template void parallel_for_each(R&& range, F&& func) @@ -1436,9 +1435,8 @@ class ThreadPoolBase } std::unique_lock lock(queue_mutex_); - bool const drained = task_finished_condition_.wait_until(lock, deadline, [this] { - return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; - }); + bool const drained = task_finished_condition_.wait_until( + lock, deadline, [this] { return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; }); lock.unlock(); shutdown(ShutdownPolicy::drain); @@ -1650,7 +1648,10 @@ class LightweightPoolT LightweightPoolT(LightweightPoolT const&) = delete; auto operator=(LightweightPoolT const&) -> LightweightPoolT& = delete; - ~LightweightPoolT() { shutdown(ShutdownPolicy::drain); } + ~LightweightPoolT() + { + shutdown(ShutdownPolicy::drain); + } /** * @brief Fire-and-forget task submission. Throws on shutdown. @@ -1710,10 +1711,16 @@ class LightweightPoolT #if __cpp_lib_ranges >= 201911L template - void post_batch(R&& range) { post_batch(std::ranges::begin(range), std::ranges::end(range)); } + void post_batch(R&& range) + { + post_batch(std::ranges::begin(range), std::ranges::end(range)); + } template - auto try_post_batch(R&& range) { return try_post_batch(std::ranges::begin(range), std::ranges::end(range)); } + auto try_post_batch(R&& range) + { + return try_post_batch(std::ranges::begin(range), std::ranges::end(range)); + } #endif /** @@ -1754,15 +1761,17 @@ class LightweightPoolT return true; } std::unique_lock lock(mutex_); - bool const drained = drain_condition_.wait_until(lock, deadline, [this] { - return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; - }); + bool const drained = drain_condition_.wait_until( + lock, deadline, [this] { return tasks_.empty() && active_tasks_.load(std::memory_order_acquire) == 0; }); lock.unlock(); shutdown(ShutdownPolicy::drain); return drained; } - [[nodiscard]] auto size() const noexcept -> size_t { return num_threads_; } + [[nodiscard]] auto size() const noexcept -> size_t + { + return num_threads_; + } auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) -> expected @@ -1921,10 +1930,16 @@ class GlobalPool #if __cpp_lib_ranges >= 201911L template - static auto submit_batch(R&& range) { return instance().submit_batch(std::forward(range)); } + static auto submit_batch(R&& range) + { + return instance().submit_batch(std::forward(range)); + } template - static auto try_submit_batch(R&& range) { return instance().try_submit_batch(std::forward(range)); } + static auto try_submit_batch(R&& range) + { + return instance().try_submit_batch(std::forward(range)); + } template static void parallel_for_each(R&& range, F&& func) diff --git a/run_benchmarks.sh b/run_benchmarks.sh index 78497b8..d16b37c 100755 --- a/run_benchmarks.sh +++ b/run_benchmarks.sh @@ -234,6 +234,7 @@ echo " ./build/benchmarks/threadpool_resampling_benchmarks --benchmark_filter=\ echo "" echo -e "${YELLOW}Pool Selection Guide:${NC}" echo " - Use HighPerformancePool for: Batch processing, image processing, high-throughput scenarios (1k+ tasks)" +echo " - Use LightweightPool for: Fire-and-forget tasks, lowest overhead, no futures needed" echo " - Use FastThreadPool for: Medium workloads, consistent task patterns (100-10k tasks)" echo " - Use ThreadPool for: Simple workloads, low task counts (< 1k tasks)" echo "" From a16c6d0f5f933e508c870f9f511d92ee841c8c87 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 20:36:51 +0200 Subject: [PATCH 08/15] Enhance documentation for thread pool and callable management - Expanded the documentation for `SboCallable` to clarify its design, storage layout, inline eligibility, move semantics, and thread safety. - Added detailed descriptions for `ShutdownPolicy` to outline shutdown behavior options. - Improved method documentation in `HighPerformancePool`, including `try_submit`, `submit`, `post`, and batch submission methods, emphasizing error handling and performance benefits. - Introduced C++20 ranges overloads and observers for better usability and clarity in task management. --- include/threadschedule/thread_pool.hpp | 451 ++++++++++++++++++++----- 1 file changed, 365 insertions(+), 86 deletions(-) diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index 48cbcee..a75ac3f 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -139,13 +139,35 @@ auto bind_args(F&& f, Args&&... args) /** * @brief Type-erased, move-only callable with configurable inline storage. * - * Avoids the heap allocation that @c std::function incurs for callables - * larger than its (typically 16-byte) internal buffer. Callables that fit - * within @c TaskSize - sizeof(void*) bytes are stored inline; larger ones - * fall back to a heap allocation. + * Designed as a lightweight replacement for @c std::function when heap + * allocations are undesirable. Callables whose size and alignment fit + * within the inline buffer are stored in-place (Small Buffer Optimization); + * larger callables fall back to a heap allocation transparently. + * + * @par Storage layout + * @code + * |<---------- TaskSize bytes ---------->| + * [ VTable* (8 B) | inline buffer ] + * @endcode + * The usable inline buffer is @c TaskSize - sizeof(void*) bytes + * (56 bytes on 64-bit platforms with the default @c TaskSize of 64). + * + * @par Inline eligibility + * A callable @c F is stored inline when all of the following hold: + * - @c sizeof(F) <= kBufferSize + * - @c alignof(F) <= alignof(std::max_align_t) + * - @c std::is_nothrow_move_constructible_v + * + * @par Move semantics + * Move-only. Invoking @c operator() consumes the callable (invoke + destroy), + * leaving the object in an empty state. This single-shot design avoids the + * overhead of reference counting or shared ownership. + * + * @par Thread safety + * Not thread-safe. Intended to be used as a queue element inside a + * mutex-protected task queue. * * @tparam TaskSize Total object size in bytes (default 64, one x86 cache line). - * The usable inline buffer is @c TaskSize - 8 bytes on 64-bit platforms. */ template class SboCallable @@ -416,6 +438,17 @@ class WorkStealingDeque /** * @brief Controls how a pool handles pending tasks during shutdown. + * + * Passed to @c shutdown() on any pool type to select graceful vs. immediate + * shutdown behaviour. + * + * | Policy | Running tasks | Queued tasks | + * |-----------------|---------------|---------------------| + * | @c drain | Finish | Execute, then stop | + * | @c drop_pending | Finish | Discard immediately | + * + * @see HighPerformancePool::shutdown, ThreadPoolBase::shutdown, + * LightweightPoolT::shutdown */ enum class ShutdownPolicy : uint8_t { @@ -606,7 +639,20 @@ class HighPerformancePool } /** - * @brief Submit a task, returning an error instead of throwing on shutdown. + * @brief Submit a task without throwing on shutdown. + * + * Wraps the callable in a @c std::packaged_task and enqueues it. + * Returns an @c expected containing the @c std::future on success, + * or @c std::errc::operation_canceled if the pool is shutting down. + * + * @tparam F Callable type. + * @tparam Args Argument types forwarded to @p F. + * @param f Callable to execute. + * @param args Arguments forwarded to @p f. + * @return @c expected, std::error_code> where + * @c R = @c std::invoke_result_t. + * + * @see submit() for the throwing variant. */ template auto try_submit(F&& f, Args&&... args) -> expected>, std::error_code> @@ -651,7 +697,13 @@ class HighPerformancePool } /** - * @brief Submit a task. Throws std::runtime_error if the pool is shutting down. + * @brief Submit a task, throwing on shutdown. + * + * Equivalent to @ref try_submit but throws @c std::runtime_error instead + * of returning an error code when the pool is shutting down. + * + * @throws std::runtime_error If the pool is shutting down. + * @return @c std::future that becomes ready when the task completes. */ template auto submit(F&& f, Args&&... args) -> std::future> @@ -663,7 +715,14 @@ class HighPerformancePool } /** - * @brief Fire-and-forget submission (no future, no packaged_task overhead). + * @brief Fire-and-forget task submission (throwing variant). + * + * Enqueues a callable without creating a @c std::packaged_task or + * @c std::future, giving roughly 3x higher throughput than @ref submit() + * for tasks whose return value is not needed. + * + * @throws std::runtime_error If the pool is shutting down. + * @see try_post() for the non-throwing variant. */ template void post(F&& f, Args&&... args) @@ -674,7 +733,10 @@ class HighPerformancePool } /** - * @brief Fire-and-forget submission. Returns error on shutdown. + * @brief Fire-and-forget task submission (non-throwing variant). + * + * @return @c expected -- + * @c std::errc::operation_canceled on shutdown. */ template auto try_post(F&& f, Args&&... args) -> expected @@ -746,7 +808,15 @@ class HighPerformancePool #endif /** - * @brief Batch task submission, returning an error instead of throwing on shutdown. + * @brief Submit a range of @c void() callables in one go (non-throwing). + * + * Acquires the lock once per batch, distributing tasks across worker + * queues in round-robin fashion. Significantly more efficient than + * calling @ref submit() in a loop for large batches. + * + * @tparam Iterator Forward iterator whose value_type is callable as @c void(). + * @return @c expected containing a vector of futures, or + * @c std::errc::operation_canceled on shutdown. */ template auto try_submit_batch(Iterator begin, Iterator end) -> expected>, std::error_code> @@ -788,7 +858,9 @@ class HighPerformancePool } /** - * @brief Batch task submission. Throws on shutdown. + * @brief Submit a range of @c void() callables in one go (throwing). + * @throws std::runtime_error If the pool is shutting down. + * @see try_submit_batch() for the non-throwing variant. */ template auto submit_batch(Iterator begin, Iterator end) -> std::vector> @@ -800,7 +872,10 @@ class HighPerformancePool } /** - * @brief Apply a function to a range in parallel using chunked work distribution. + * @brief Apply @p func to every element in @c [begin, end) in parallel. + * + * The range is split into chunks and submitted as tasks. Blocks until + * all elements have been processed. */ template void parallel_for_each(Iterator begin, Iterator end, F&& func) @@ -809,6 +884,7 @@ class HighPerformancePool } #if __cpp_lib_ranges >= 201911L + /// @{ @name C++20 Ranges overloads template auto submit_batch(R&& range) { @@ -826,13 +902,19 @@ class HighPerformancePool { parallel_for_each(std::ranges::begin(range), std::ranges::end(range), std::forward(func)); } + /// @} #endif + /// @name Observers + /// @{ + + /// @brief Number of worker threads in this pool. [[nodiscard]] auto size() const noexcept -> size_t { return num_threads_; } + /// @brief Approximate count of tasks waiting in all queues. [[nodiscard]] auto pending_tasks() const -> size_t { size_t total = 0; @@ -846,35 +928,7 @@ class HighPerformancePool return total; } - /** - * @brief Configure all worker threads - */ - auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, - ThreadPriority priority = ThreadPriority::normal()) -> expected - { - return detail::configure_worker_threads(workers_, name_prefix, policy, priority); - } - - auto set_affinity(ThreadAffinity const& affinity) -> expected - { - return detail::set_worker_affinity(workers_, affinity); - } - - auto distribute_across_cpus() -> expected - { - return detail::distribute_workers_across_cpus(workers_); - } - - void wait_for_tasks() - { - std::unique_lock lock(completion_mutex_); - completion_condition_.wait( - lock, [this] { return pending_tasks() == 0 && active_tasks_.load(std::memory_order_acquire) == 0; }); - } - - /** - * @brief Get detailed performance statistics - */ + /// @brief Collect approximate performance counters. auto get_statistics() const -> Statistics { auto const now = std::chrono::steady_clock::now(); @@ -909,8 +963,58 @@ class HighPerformancePool return stats; } + /// @} + + /// @name Thread configuration + /// @{ + + /** + * @brief Name, schedule and prioritize all worker threads. + * + * Each worker is named @c name_prefix + "_0", @c "_1", etc. + * + * @return @c expected -- error if the OS + * rejected any configuration call. + */ + auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, + ThreadPriority priority = ThreadPriority::normal()) -> expected + { + return detail::configure_worker_threads(workers_, name_prefix, policy, priority); + } + + /// @brief Pin all workers to the same CPU set. + auto set_affinity(ThreadAffinity const& affinity) -> expected + { + return detail::set_worker_affinity(workers_, affinity); + } + + /// @brief Pin each worker to a distinct CPU core (round-robin). + auto distribute_across_cpus() -> expected + { + return detail::distribute_workers_across_cpus(workers_); + } + + /// @} + + /// @name Synchronisation + /// @{ + + /// @brief Block until all pending and active tasks have completed. + void wait_for_tasks() + { + std::unique_lock lock(completion_mutex_); + completion_condition_.wait( + lock, [this] { return pending_tasks() == 0 && active_tasks_.load(std::memory_order_acquire) == 0; }); + } + + /// @} + + /// @name Tracing hooks + /// @{ + /** - * @brief Set a callback invoked at the start of each task. + * @brief Register a callback invoked just before each task executes. + * @param cb Receives the start time and the worker's @c std::thread::id. */ void set_on_task_start(TaskStartCallback cb) { @@ -919,7 +1023,9 @@ class HighPerformancePool } /** - * @brief Set a callback invoked at the end of each task. + * @brief Register a callback invoked just after each task completes. + * @param cb Receives the end time, the worker's @c std::thread::id, + * and the wall-clock duration of the task. */ void set_on_task_end(TaskEndCallback cb) { @@ -927,6 +1033,8 @@ class HighPerformancePool on_task_end_ = std::move(cb); } + /// @} + private: size_t num_threads_; bool register_workers_; @@ -1185,8 +1293,13 @@ class ThreadPoolBase shutdown(ShutdownPolicy::drain); } + /// @name Task submission + /// @{ + /** - * @brief Submit a task, returning an error instead of throwing on shutdown. + * @brief Submit a task without throwing on shutdown. + * @return @c expected, std::error_code>. + * @see submit() for the throwing variant. */ template auto try_submit(F&& f, Args&&... args) -> expected>, std::error_code> @@ -1210,7 +1323,8 @@ class ThreadPoolBase } /** - * @brief Submit a task. Throws std::runtime_error if the pool is shutting down. + * @brief Submit a task, throwing on shutdown. + * @throws std::runtime_error If the pool is shutting down. */ template auto submit(F&& f, Args&&... args) -> std::future> @@ -1222,7 +1336,12 @@ class ThreadPoolBase } /** - * @brief Fire-and-forget submission (no future, no packaged_task overhead). + * @brief Fire-and-forget task submission (throwing variant). + * + * Bypasses @c std::packaged_task / @c std::future for lower overhead. + * + * @throws std::runtime_error If the pool is shutting down. + * @see try_post() */ template void post(F&& f, Args&&... args) @@ -1233,7 +1352,9 @@ class ThreadPoolBase } /** - * @brief Fire-and-forget submission. Returns error on shutdown. + * @brief Fire-and-forget task submission (non-throwing variant). + * @return @c expected -- + * @c std::errc::operation_canceled on shutdown. */ template auto try_post(F&& f, Args&&... args) -> expected @@ -1250,8 +1371,10 @@ class ThreadPoolBase #if __cpp_lib_jthread >= 201911L /** - * @brief Submit a cancellable task. If stop is already requested the task - * is skipped and returns a default-constructed result. + * @brief Submit a cancellable task (C++20). + * + * If @p token is already stopped the task body is skipped and + * the future receives a default-constructed result. */ template auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> @@ -1264,6 +1387,7 @@ class ThreadPoolBase }); } + /// @brief Non-throwing cancellable submission (C++20). template auto try_submit(std::stop_token token, F&& f, Args&&... args) -> expected>, std::error_code> @@ -1278,7 +1402,9 @@ class ThreadPoolBase #endif /** - * @brief Submit multiple tasks, returning an error instead of throwing on shutdown. + * @brief Submit a range of @c void() callables in one go (non-throwing). + * + * All tasks are enqueued under a single lock acquisition. */ template auto try_submit_batch(Iterator begin, Iterator end) -> expected>, std::error_code> @@ -1303,9 +1429,7 @@ class ThreadPoolBase return futures; } - /** - * @brief Submit multiple tasks under a single lock acquisition. Throws on shutdown. - */ + /// @brief Submit a batch of tasks (throwing). @see try_submit_batch() template auto submit_batch(Iterator begin, Iterator end) -> std::vector> { @@ -1315,9 +1439,7 @@ class ThreadPoolBase return std::move(result.value()); } - /** - * @brief Apply a function to a range in parallel using chunked work distribution. - */ + /// @brief Apply @p func to @c [begin, end) in parallel (chunked). template void parallel_for_each(Iterator begin, Iterator end, F&& func) { @@ -1325,6 +1447,7 @@ class ThreadPoolBase } #if __cpp_lib_ranges >= 201911L + /// @{ @name C++20 Ranges overloads template auto submit_batch(R&& range) { @@ -1342,21 +1465,35 @@ class ThreadPoolBase { parallel_for_each(std::ranges::begin(range), std::ranges::end(range), std::forward(func)); } + /// @} #endif + /// @} + + /// @name Observers + /// @{ + + /// @brief Number of worker threads. [[nodiscard]] auto size() const noexcept -> size_t { return num_threads_; } + /// @brief Number of tasks waiting in the queue. [[nodiscard]] auto pending_tasks() const -> size_t { std::lock_guard lock(queue_mutex_); return tasks_.size(); } + /// @} + + /// @name Thread configuration + /// @{ + /** - * @brief Configure all worker threads (name, scheduling policy, priority) + * @brief Name, schedule and prioritize all worker threads. + * @see HighPerformancePool::configure_threads */ auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) -> expected @@ -1364,22 +1501,24 @@ class ThreadPoolBase return detail::configure_worker_threads(workers_, name_prefix, policy, priority); } - /** - * @brief Set CPU affinity for all worker threads - */ + /// @brief Pin all workers to the same CPU set. auto set_affinity(ThreadAffinity const& affinity) -> expected { return detail::set_worker_affinity(workers_, affinity); } - /** - * @brief Distribute workers across available CPUs (round-robin) - */ + /// @brief Pin each worker to a distinct CPU core (round-robin). auto distribute_across_cpus() -> expected { return detail::distribute_workers_across_cpus(workers_); } + /// @} + + /// @name Synchronisation & lifecycle + /// @{ + + /// @brief Block until all pending and active tasks have completed. void wait_for_tasks() { std::unique_lock lock(queue_mutex_); @@ -1389,7 +1528,6 @@ class ThreadPoolBase /** * @brief Shut the pool down. - * * @param policy @c drain (default) finishes all queued tasks; * @c drop_pending discards queued tasks. */ @@ -1443,9 +1581,12 @@ class ThreadPoolBase return drained; } - /** - * @brief Get performance statistics - */ + /// @} + + /// @name Observers + /// @{ + + /// @brief Collect approximate performance counters. [[nodiscard]] auto get_statistics() const -> Statistics { auto const now = std::chrono::steady_clock::now(); @@ -1480,8 +1621,14 @@ class ThreadPoolBase return stats; } + /// @} + + /// @name Tracing hooks + /// @{ + /** - * @brief Set a callback invoked at the start of each task. + * @brief Register a callback invoked just before each task executes. + * @param cb Receives the start time and the worker's @c std::thread::id. */ void set_on_task_start(TaskStartCallback cb) { @@ -1490,7 +1637,9 @@ class ThreadPoolBase } /** - * @brief Set a callback invoked at the end of each task. + * @brief Register a callback invoked just after each task completes. + * @param cb Receives the end time, the worker's @c std::thread::id, + * and the wall-clock duration of the task. */ void set_on_task_end(TaskEndCallback cb) { @@ -1498,6 +1647,8 @@ class ThreadPoolBase on_task_end_ = std::move(cb); } + /// @} + private: size_t num_threads_; bool register_workers_; @@ -1619,24 +1770,79 @@ using FastThreadPool = ThreadPoolBase>; /** * @brief Ultra-lightweight fire-and-forget thread pool. * - * Uses a custom @ref detail::SboCallable instead of @c std::function to avoid - * heap allocations for callables up to @c TaskSize - 8 bytes. No futures, no - * packaged_task, no statistics, no tracing -- just raw throughput. + * Designed for maximum throughput on tasks whose return value is not needed. + * Typical measured throughput is **3x** higher than @ref submit() on the + * same hardware, because @c LightweightPoolT avoids the overhead of + * @c std::packaged_task, @c std::future, and @c std::shared_ptr entirely. + * + * @par Internal architecture + * @code + * Producer(s) Single Queue Worker Threads + * +---------+ +------------------+ +----------------+ + * | post() | ---> | SboCallable<64> | ---> | ThreadWrapper | + * | post() | ---> | SboCallable<64> | ---> | ThreadWrapper | + * +---------+ +------------------+ +----------------+ + * mutex + cond_var + * @endcode + * + * - **Queue**: Single @c std::queue of @ref detail::SboCallable objects + * protected by one mutex + condition_variable. + * - **Workers**: @ref ThreadWrapper instances so that thread naming, CPU + * affinity, and scheduling policy can be configured after construction. + * - **SBO**: Callables up to @c TaskSize - 8 bytes are stored inline + * (no heap allocation). Larger callables fall back to the heap. + * + * @par What is @e not included (by design) + * - No @c std::future / @c std::packaged_task (use @ref submit() on other + * pools if you need return values). + * - No statistics counters (@ref HighPerformancePool::get_statistics). + * - No tracing hooks (@ref HighPerformancePool::set_on_task_start). + * - No work stealing (single shared queue). + * - No @c ThreadRegistry auto-registration. + * + * @par Execution guarantees + * - Every successfully posted task is guaranteed to execute (unless + * @c shutdown(ShutdownPolicy::drop_pending) is called). + * - Tasks are dequeued in FIFO order. Because multiple workers pop + * concurrently, the @e completion order is non-deterministic. + * - Exceptions thrown by tasks are silently caught; the worker continues. + * + * @par Thread safety + * @c post(), @c try_post(), @c post_batch(), and @c try_post_batch() may + * be called from any number of threads concurrently. @c shutdown() is + * internally guarded and safe to call more than once. + * + * @par Lifetime + * The destructor calls @c shutdown(ShutdownPolicy::drain) and joins all + * workers. It blocks until every queued task has been executed. * - * Workers are @ref ThreadWrapper instances so that naming, affinity, and - * scheduling policy can still be configured after construction. + * @par Choosing @c TaskSize + * The default of 64 bytes (one x86 cache line) works well for lambdas + * capturing up to ~7 pointers. If your tasks capture more state, increase + * @c TaskSize to avoid the heap fallback: + * @code + * LightweightPoolT<128> pool(4); // 120 bytes of inline storage + * @endcode + * + * @par Copyability / movability + * Not copyable, not movable. * - * @par API - * Only @c post() (fire-and-forget) is provided. For tasks that need a return - * value, use @ref ThreadPool or @ref HighPerformancePool with @c submit(). + * @tparam TaskSize Total size in bytes of each @ref detail::SboCallable + * slot (default 64). Usable inline buffer = @c TaskSize - 8 bytes + * on 64-bit platforms. * - * @tparam TaskSize Total size in bytes of each inline task slot (default 64, - * one x86 cache line). Usable buffer = @c TaskSize - 8 bytes. + * @see LightweightPool (alias for @c LightweightPoolT<64>), + * ScheduledLightweightPool (scheduled variant). */ template class LightweightPoolT { public: + /** + * @brief Construct a lightweight pool with @p num_threads workers. + * @param num_threads Number of worker threads (clamped to at least 1). + * Defaults to @c std::thread::hardware_concurrency(). + */ explicit LightweightPoolT(size_t num_threads = std::thread::hardware_concurrency()) : num_threads_(num_threads == 0 ? 1 : num_threads) { @@ -1653,8 +1859,19 @@ class LightweightPoolT shutdown(ShutdownPolicy::drain); } + /// @name Task submission + /// @{ + /** - * @brief Fire-and-forget task submission. Throws on shutdown. + * @brief Post a fire-and-forget task (throwing variant). + * + * The callable and its arguments are bound into a + * @ref detail::SboCallable and pushed into the shared queue. + * + * @tparam F Callable type. + * @tparam Args Argument types forwarded to @p F. + * @throws std::runtime_error If the pool is shutting down. + * @see try_post() for the non-throwing variant. */ template void post(F&& f, Args&&... args) @@ -1665,7 +1882,10 @@ class LightweightPoolT } /** - * @brief Fire-and-forget task submission. Returns error on shutdown. + * @brief Post a fire-and-forget task (non-throwing variant). + * + * @return @c expected -- + * @c std::errc::operation_canceled on shutdown. */ template auto try_post(F&& f, Args&&... args) -> expected @@ -1682,7 +1902,13 @@ class LightweightPoolT } /** - * @brief Batch fire-and-forget submission under a single lock. + * @brief Post a range of callables under a single lock acquisition. + * + * More efficient than calling @ref post() in a loop because the mutex + * is acquired only once and all workers are woken via @c notify_all(). + * + * @tparam Iterator Forward iterator whose value_type is callable as @c void(). + * @throws std::runtime_error If the pool is shutting down. */ template void post_batch(Iterator begin, Iterator end) @@ -1693,7 +1919,8 @@ class LightweightPoolT } /** - * @brief Batch fire-and-forget submission. Returns error on shutdown. + * @brief Batch post (non-throwing). + * @return @c expected. */ template auto try_post_batch(Iterator begin, Iterator end) -> expected @@ -1710,6 +1937,7 @@ class LightweightPoolT } #if __cpp_lib_ranges >= 201911L + /// @{ @name C++20 Ranges overloads template void post_batch(R&& range) { @@ -1721,10 +1949,23 @@ class LightweightPoolT { return try_post_batch(std::ranges::begin(range), std::ranges::end(range)); } + /// @} #endif + /// @} + + /// @name Lifecycle + /// @{ + /** * @brief Shut the pool down. + * + * @param policy @c drain (default) -- workers finish all queued tasks + * before exiting. @c drop_pending -- the queue is cleared + * and only the currently executing tasks are allowed to + * finish. + * + * Safe to call more than once (subsequent calls are no-ops). */ void shutdown(ShutdownPolicy policy = ShutdownPolicy::drain) { @@ -1749,8 +1990,13 @@ class LightweightPoolT } /** - * @brief Timed drain: finish as many tasks as possible within timeout. - * @return @c true if all tasks completed, @c false on timeout. + * @brief Attempt a timed drain. + * + * Waits up to @p timeout for all tasks to complete, then performs a + * full @c shutdown(drain). + * + * @return @c true if all tasks completed within the deadline, + * @c false if the timeout expired (pool is still shut down). */ auto shutdown_for(std::chrono::milliseconds timeout) -> bool { @@ -1768,27 +2014,47 @@ class LightweightPoolT return drained; } + /// @} + + /// @name Observers + /// @{ + + /// @brief Number of worker threads. [[nodiscard]] auto size() const noexcept -> size_t { return num_threads_; } + /// @} + + /// @name Thread configuration + /// @{ + + /** + * @brief Name, schedule and prioritize all worker threads. + * + * Workers are named @c name_prefix + "_0", @c "_1", etc. + */ auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, ThreadPriority priority = ThreadPriority::normal()) -> expected { return detail::configure_worker_threads(workers_, name_prefix, policy, priority); } + /// @brief Pin all workers to the same CPU set. auto set_affinity(ThreadAffinity const& affinity) -> expected { return detail::set_worker_affinity(workers_, affinity); } + /// @brief Pin each worker to a distinct CPU core (round-robin). auto distribute_across_cpus() -> expected { return detail::distribute_workers_across_cpus(workers_); } + /// @} + private: size_t num_threads_; std::vector workers_; @@ -1831,7 +2097,13 @@ class LightweightPoolT } }; -/** @brief Default lightweight pool with 64-byte task slots. */ +/** + * @brief Default lightweight pool with 64-byte task slots (56 bytes usable). + * + * Sufficient for lambdas capturing up to ~7 pointers on 64-bit platforms. + * + * @see LightweightPoolT + */ using LightweightPool = LightweightPoolT<>; // --------------------------------------------------------------------------- @@ -1880,12 +2152,17 @@ class GlobalPool std::call_once(init_flag_(), [num_threads] { thread_count_() = num_threads; }); } + /// @brief Access the singleton pool instance (created on first call). static auto instance() -> PoolType& { static PoolType pool(thread_count_()); return pool; } + /// @name Forwarding wrappers + /// All methods below simply forward to @c instance().method(...). + /// @{ + template static auto submit(F&& f, Args&&... args) { @@ -1948,6 +2225,8 @@ class GlobalPool } #endif + /// @} + private: GlobalPool() = default; From a1bdcf1af5437fbeafa7adb33d308c9847981f35 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Sun, 5 Apr 2026 20:52:21 +0200 Subject: [PATCH 09/15] Update documentation for v2.0 migration and enhancements - Added a comprehensive migration guide in `docs/MIGRATION_V2.md` detailing breaking changes, upgrade steps, and optional improvements for transitioning from v1.x to v2.0. - Expanded the `CHANGELOG.md` to include a full list of changes and new features in v2.0, emphasizing lower-overhead submission and improved ergonomics. - Updated `README.md` to highlight new features in v2.0 and link to the migration guide for user convenience. - Enhanced documentation in `README-DOCS.md` to include references to the migration guide and other relevant resources. --- CHANGELOG.md | 4 ++ README.md | 50 +++++++++++++--- docs/MIGRATION_V2.md | 139 +++++++++++++++++++++++++++++++++++++++++++ docs/README-DOCS.md | 1 + 4 files changed, 187 insertions(+), 7 deletions(-) create mode 100644 docs/MIGRATION_V2.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e3f3de2..555b3f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,10 @@ ### Migration Guide +Full step-by-step guide: **[docs/MIGRATION_V2.md](docs/MIGRATION_V2.md)**. + +Quick reference: + ```cpp // v1: bool return bool ok = pool.configure_threads("worker"); diff --git a/README.md b/README.md index 02f395a..694f692 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,8 @@ or with optional **shared runtime** for multi-DSO applications. - **Chaos Testing**: RAII controller to perturb affinity/priority for validation - **C++20 Coroutines**: `task`, `generator`, and `sync_wait` out of the box -- no boilerplate promise types needed -- **High-Performance Pools**: Work-stealing thread pool optimized for 10k+ - tasks/second +- **High-Performance Pools**: Work-stealing pool, `post()` / `try_post()`, and + optional `LightweightPool` for fire-and-forget workloads with minimal overhead - **Scheduled Tasks**: Run tasks at specific times, after delays, or periodically - **Error Handling**: Comprehensive exception handling with error callbacks and @@ -46,8 +46,33 @@ or with optional **shared runtime** for multi-DSO applications. - **RAII & Exception Safety**: Automatic resource management - **Multiple Integration Methods**: CMake, CPM, Conan, FetchContent +## What's new in v2.0 + +Version 2.0 focuses on **lower-overhead submission**, **more control over shutdown and tuning**, and **better ergonomics** for modern C++ (ranges, coroutines, `std::stop_token`). Highlights: + +| Area | What changed | +| ---- | ------------ | +| **Lightweight pool** | `LightweightPoolT` / `LightweightPool` -- fire-and-forget only, configurable SBO buffer (default 64 B), no futures or stats. Workers are still `ThreadWrapper` (name, affinity, policy). Ideal for maximum throughput when you do not need a return value. | +| **`post()` / `try_post()`** | On `HighPerformancePool`, `ThreadPool` / `FastThreadPool`, and `GlobalPool` -- same queue path as `submit()` but skips `packaged_task` / `future` overhead. | +| **Non-throwing submit** | `try_submit()` / `try_submit_batch()` return `expected` instead of throwing on shutdown. | +| **Scheduled dispatch** | `ScheduledThreadPoolT` dispatches with `post()` internally. Alias `ScheduledLightweightPool` uses `LightweightPool` as the backend. | +| **Shutdown** | `ShutdownPolicy::drain` (default) vs `drop_pending`; `shutdown_for(timeout)` for a timed drain. | +| **Parallel loops** | Chunked `parallel_for_each` on single-queue pools (same helper as the work-stealing pool). | +| **Tuning** | `PollingWait` for `FastThreadPool`, configurable work-stealing deque capacity on `HighPerformancePool`, `GlobalPool::init(n)` before first use. | +| **C++20** | Ranges overloads for batch submit and `parallel_for_each`; `submit`/`try_submit` with `std::stop_token` (cooperative skip). | +| **Futures** | `when_all`, `when_any`, `when_all_settled` in `futures.hpp`. | +| **Coroutines** | `schedule_on{pool}`, `pool_executor`, `run_on(pool, coro_fn)` for pool-aware `task`. | +| **Observability** | Optional auto-registration of pool workers in the thread registry; per-task `set_on_task_start` / `set_on_task_end` hooks. | +| **Errors** | `ErrorHandler` callbacks get stable IDs; `remove_callback(id)` / `has_callback(id)`. | + +See [CHANGELOG.md](CHANGELOG.md) for the full list, including breaking changes when upgrading from v1.x. + +**Upgrading from v1.x:** [Migration guide (v2.0)](docs/MIGRATION_V2.md) + ## Documentation +- **[Migrating to v2.0](docs/MIGRATION_V2.md)** - Breaking changes, renames, and + recommended follow-ups from v1.x - **[Integration Guide](docs/INTEGRATION.md)** - CMake, Conan, FetchContent, system installation - **[Thread Registry Guide](docs/REGISTRY.md)** - Process-wide thread control @@ -196,6 +221,12 @@ int main() { auto future = pool.submit([]() { return 42; }); std::cout << "Result: " << future.get() << std::endl; + + // Fire-and-forget (no future): post() on any pool, or LightweightPool + pool.post([]() { /* work */ }); + LightweightPool lite(4); + lite.configure_threads("lite"); + lite.post([]() { /* minimal overhead */ }); // Scheduled tasks (uses ThreadPool by default) ScheduledThreadPool scheduler(4); @@ -208,6 +239,8 @@ int main() { auto handle_hp = scheduler_hp.schedule_periodic(std::chrono::milliseconds(100), []() { std::cout << "Frequent task!" << std::endl; }); + + // v2: ScheduledLightweightPool -- same API, LightweightPool backend (post-based dispatch) // Error handling HighPerformancePoolWithErrors pool_safe(4); @@ -449,11 +482,14 @@ Zero-overhead helpers to operate on existing threads without taking ownership. ### Thread Pools -| Class | Use Case | Performance | -| --------------------- | --------------------------------------- | ---------------- | -| `ThreadPool` | General-purpose, simple API | < 1k tasks/sec | -| `HighPerformancePool` | Work-stealing, optimized for throughput | 10k+ tasks/sec | -| `FastThreadPool` | Single-queue, minimal overhead | 1k-10k tasks/sec | +| Class | Use Case | Notes | +| ----------------------- | --------------------------------------------- | ----- | +| `ThreadPool` | Single shared queue, blocks while idle | `submit`, `try_submit`, `post`, batches, `parallel_for_each` | +| `FastThreadPool` | Same as `ThreadPool` with polling wait policy | Tunable via `PollingWait` | +| `HighPerformancePool` | Work-stealing + overflow queue | Highest throughput for large batches; tunable deque capacity | +| `LightweightPool` | Fire-and-forget only, SBO tasks | No futures; use `post` / `post_batch`. Alias of `LightweightPoolT<64>` | + +All of the above support `shutdown(ShutdownPolicy)` and `shutdown_for(timeout)` where applicable. Use **`post()`** when you do not need a `std::future` (lower overhead than `submit()`). ### Configuration diff --git a/docs/MIGRATION_V2.md b/docs/MIGRATION_V2.md new file mode 100644 index 0000000..92a3b13 --- /dev/null +++ b/docs/MIGRATION_V2.md @@ -0,0 +1,139 @@ +# Migrating to ThreadSchedule v2.0 + +This guide helps you move from v1.x to **v2.0.0**. It lists **breaking changes** first, then **behavioral changes** you should be aware of, and finally **optional upgrades** that are not required but often worthwhile. + +For the authoritative list of every change, see [CHANGELOG.md](../CHANGELOG.md). + +## 1. Upgrade steps + +1. **Pin the version** in CMake / Conan / FetchContent to a v2.0.0 tag (or `main` once released). +2. **Rebuild** with the same `CMAKE_CXX_STANDARD` as before (v2 still supports C++17 as the baseline). +3. **Fix compile errors** using the sections below (most projects only touch `submit_range`, `configure_threads` storage type, or forward declarations). +4. **Run tests** -- especially anything that assumed strict per-element scheduling for `parallel_for_each` on `ThreadPool` / `FastThreadPool`. + +## 2. Breaking changes (must fix) + +### 2.1 `submit_range()` removed + +`ThreadPool::submit_range` and `GlobalThreadPool::submit_range` are removed. Use **`submit_batch`** with the same iterators. + +```cpp +// v1 +auto futures = pool.submit_range(tasks.begin(), tasks.end()); + +// v2 +auto futures = pool.submit_batch(tasks.begin(), tasks.end()); +``` + +`submit_batch` acquires the queue lock once for the whole range and matches the API of `FastThreadPool` and `HighPerformancePool`. + +### 2.2 `configure_threads` / `set_affinity` / `distribute_across_cpus` return type + +On **`ThreadPool`** and **`FastThreadPool`**, these functions now return **`expected`** (same as `HighPerformancePool` already did). + +```cpp +// v1: storing in bool (no longer valid) +bool ok = pool.configure_threads("worker"); + +// v2: use auto or expected +auto r = pool.configure_threads("worker"); +if (!r) { + std::cerr << r.error().message() << '\n'; +} + +// Conditions still work: expected has operator bool +if (pool.configure_threads("worker")) { /* success */ } +``` + +### 2.3 `ThreadPool` and `FastThreadPool` are type aliases + +They are now: + +- `ThreadPool` = `ThreadPoolBase` +- `FastThreadPool` = `ThreadPoolBase>` + +**Runtime behavior is unchanged.** You only need to act if you: + +- **Forward-declared** a concrete `class ThreadPool;` -- forward-declare the alias or include the header instead. +- **Specialized** a template on `ThreadPool` as a unique class type -- switch to `ThreadPoolBase` (or a SFINAE-friendly trait). + +### 2.4 `ThreadPool::Statistics` extended + +`Statistics` on the single-queue pools now includes **`tasks_per_second`** and **`avg_task_time`**, like the other pools. If you use **designated initializers** or **memset**-style initialization that assumed a smaller struct, update the initializer list. + +### 2.5 Error pool and global pool type names (aliases only) + +These are now aliases; **the public API is unchanged**: + +- `HighPerformancePoolWithErrors`, `ThreadPoolWithErrors`, `FastThreadPoolWithErrors` -> `PoolWithErrors` +- `GlobalThreadPool`, `GlobalHighPerformancePool` -> `GlobalPool` + +Only unusual code (e.g. explicit template specialization on the old type name) may need the new spelling. + +### 2.6 `ErrorHandler::add_callback` return type + +`add_callback` now returns **`size_t`** (stable callback id for `remove_callback` / `has_callback`). Code that ignored the return value is unaffected. Code that assumed **`void`** must be updated. + +```cpp +// v2 +size_t id = handler.add_callback([](TaskError const& e) { /* ... */ }); +handler.remove_callback(id); +``` + +## 3. API changes that are backward compatible + +### 3.1 `shutdown()` + +`shutdown()` now takes an optional **`ShutdownPolicy`** (default **`drain`**, matching old behavior). Old call sites without arguments behave as before. + +```cpp +pool.shutdown(); // still: drain all work +pool.shutdown(ShutdownPolicy::drop_pending); // new: drop queued tasks +pool.shutdown_for(std::chrono::seconds(5)); // new: timed drain +``` + +### 3.2 Destructors + +Destructors still shut down the pool; they use **`drain`** by default. No change required unless you want **`drop_pending`** explicitly before destruction. + +## 4. Behavioral changes (no rename, but semantics differ) + +### 4.1 `parallel_for_each` on `ThreadPool` / `FastThreadPool` + +Implementation is now **chunked** (same strategy as `HighPerformancePool`): the range is split into a small number of tasks instead of one task per element. + +- **Pros:** Much less submission overhead on large ranges. +- **Cons:** Finer-grained progress / cancellation per element is no longer one-to-one with one pool task. + +If you relied on **one future per element**, switch to an explicit loop with `submit`, or chunk manually. + +### 4.2 Scheduled pools dispatch with `post()` + +`ScheduledThreadPoolT` dispatches due tasks with **`post()`** instead of **`submit()`**, so **no `std::future` is created per dispatch**. Your task bodies are unchanged; only internal overhead is lower. + +## 5. Optional improvements after migrating + +These are **not** required for a successful build but match v2 design well: + +| Goal | Approach | +| ---- | -------- | +| Less overhead than `submit()` | Use **`post()`** / **`try_post()`** when you do not need a return value or `std::future`. | +| Dedicated fire-and-forget pool | Use **`LightweightPool`** / **`LightweightPoolT`** (SBO task buffer, no futures). | +| Non-throwing submit | Use **`try_submit()`** / **`try_submit_batch()`** and check **`expected`**. | +| Tune fast pool polling | Use **`ThreadPoolBase>`** or keep **`FastThreadPool`** (10 ms default). | +| Tune HP deque size | **`HighPerformancePool(threads, deque_capacity)`**. | +| Fix global pool size early | **`GlobalPool<...>::init(n)`** before first **`instance()`**. | +| Workers in registry | Pass **`register_workers = true`** to pool constructors. | + +## 6. Header and module notes + +- New headers pulled in by the umbrella header include **`futures.hpp`** (combinators) and coroutine helpers on **`task.hpp`** as documented in [COROUTINES.md](COROUTINES.md). +- Include **`threadschedule/futures.hpp`** directly if you only need combinators. + +## 7. Further reading + +- [README.md](../README.md) -- "What's new in v2.0" summary table +- [CHANGELOG.md](../CHANGELOG.md) -- full v2.0.0 notes +- [INTEGRATION.md](INTEGRATION.md) -- CMake and package managers +- [ERROR_HANDLING.md](ERROR_HANDLING.md) -- pools with errors and callbacks +- [SCHEDULED_TASKS.md](SCHEDULED_TASKS.md) -- scheduled pools and aliases diff --git a/docs/README-DOCS.md b/docs/README-DOCS.md index e02e983..00d6e00 100644 --- a/docs/README-DOCS.md +++ b/docs/README-DOCS.md @@ -19,6 +19,7 @@ Notes: - The documentation includes headers under `include/` and uses the repository `README.md` as the landing page. Additional guides: +- [Migrating to v2.0](MIGRATION_V2.md) - [Integration Guide](INTEGRATION.md) - [Thread Registry](REGISTRY.md) - [Scheduled Tasks](SCHEDULED_TASKS.md) From b7ba58e84e50708b5dcf0f08fd5930e4e1845d0d Mon Sep 17 00:00:00 2001 From: Katze719 Date: Mon, 6 Apr 2026 17:43:29 +0200 Subject: [PATCH 10/15] Update CHANGELOG.md for improved clarity and formatting - Refined the presentation of quality-of-life features, ensuring consistent formatting and clearer descriptions. - Enhanced the documentation of new types and internal improvements, emphasizing usability and performance benefits. - Consolidated related changes for better readability and understanding of enhancements in the thread pool and error handling functionalities. --- CHANGELOG.md | 113 ++++++++++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 555b3f3..724cd11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,16 +37,16 @@ ### Quality-of-Life Features -- **`ErrorHandler::remove_callback(id)` / `has_callback(id)`** -- callbacks - are now stored in a `std::map` with stable IDs. Individual callbacks can be +- **`ErrorHandler::remove_callback(id)` / `has_callback(id)`** -- callbacks are + now stored in a `std::map` with stable IDs. Individual callbacks can be removed without clearing all of them. - **`try_submit()` / `try_submit_batch()`** -- non-throwing submission for all pool types, returning `expected, std::error_code>` instead of throwing on shutdown. -- **Chunked `parallel_for_each`** -- `ThreadPoolBase` now uses the same - chunked work distribution as `HighPerformancePool` via a shared +- **Chunked `parallel_for_each`** -- `ThreadPoolBase` now uses the same chunked + work distribution as `HighPerformancePool` via a shared `detail::parallel_for_each_chunked` helper (one task per element is gone). - **`PollingWait`** -- tunable polling interval (default 10 ms). @@ -58,12 +58,12 @@ - **`GlobalPool::init(n)`** -- pre-configure thread count before first use (std::call_once semantics). -- **C++20 ranges overloads** -- `submit_batch(range)`, `try_submit_batch(range)`, - `parallel_for_each(range, func)` on all pool types and GlobalPool. Guarded - by `__cpp_lib_ranges`. +- **C++20 ranges overloads** -- `submit_batch(range)`, + `try_submit_batch(range)`, `parallel_for_each(range, func)` on all pool types + and GlobalPool. Guarded by `__cpp_lib_ranges`. -- **Auto-register pool workers** -- opt-in `register_workers` flag on both - pool constructors. Workers register/unregister automatically via +- **Auto-register pool workers** -- opt-in `register_workers` flag on both pool + constructors. Workers register/unregister automatically via `AutoRegisterCurrentThread` RAII guard. - **Per-task tracing hooks** -- `set_on_task_start(callback)` and @@ -81,9 +81,9 @@ `ShutdownPolicy::drop_pending`. `shutdown(policy)` replaces the old no-argument `shutdown()`. `shutdown_for(timeout)` provides timed drain. -- **Coroutine scheduler integration** -- `schedule_on{pool}` awaitable to hop - to a pool thread, `executor_base` / `pool_executor` type-erased - executor for pool-aware tasks, `run_on(pool, coro_fn)` convenience returning +- **Coroutine scheduler integration** -- `schedule_on{pool}` awaitable to hop to + a pool thread, `executor_base` / `pool_executor` type-erased executor + for pool-aware tasks, `run_on(pool, coro_fn)` convenience returning `std::future`. - **`LightweightPoolT`** -- ultra-lightweight fire-and-forget pool @@ -98,21 +98,24 @@ `submit()` but skips `packaged_task`/`shared_ptr`/`future` overhead. - **`ScheduledThreadPoolT` now uses `post()`** internally instead of `submit()`, - eliminating wasted `future` allocations for every scheduled task dispatch. - New alias: `ScheduledLightweightPool = ScheduledThreadPoolT`. + eliminating wasted `future` allocations for every scheduled task dispatch. New + alias: `ScheduledLightweightPool = ScheduledThreadPoolT`. ### New Types - `ThreadPoolBase` - parameterized single-queue thread pool. -- `IndefiniteWait` / `PollingWait` - wait policy types for `ThreadPoolBase`. +- `IndefiniteWait` / `PollingWait` - wait policy types for + `ThreadPoolBase`. - `PoolWithErrors` - generic error-handling pool wrapper. - `GlobalPool` - generic singleton pool accessor. - `ShutdownPolicy` - enum controlling shutdown behavior (drain / drop_pending). - `TaskStartCallback` / `TaskEndCallback` - tracing callback types. - `executor_base` / `pool_executor` - type-erased executor for coroutines. - `schedule_on` - awaitable for hopping to a pool thread. -- `futures.hpp` - future combinators (`when_all`, `when_any`, `when_all_settled`). -- `LightweightPoolT` / `LightweightPool` - fire-and-forget pool with SBO. +- `futures.hpp` - future combinators (`when_all`, `when_any`, + `when_all_settled`). +- `LightweightPoolT` / `LightweightPool` - fire-and-forget pool with + SBO. - `detail::SboCallable` - type-erased callable with inline storage. - `ScheduledLightweightPool` - scheduled pool backed by `LightweightPool`. @@ -135,31 +138,6 @@ - **`ScheduledThreadPoolT`**: `schedule_at()` and `schedule_periodic_after()` now share a private `insert_task()` helper. -### Migration Guide - -Full step-by-step guide: **[docs/MIGRATION_V2.md](docs/MIGRATION_V2.md)**. - -Quick reference: - -```cpp -// v1: bool return -bool ok = pool.configure_threads("worker"); - -// v2: expected return (operator bool still works in conditions) -auto result = pool.configure_threads("worker"); -if (!result.has_value()) { - std::cerr << result.error().message() << std::endl; -} - -// v1: submit_range -auto futures = pool.submit_range(tasks.begin(), tasks.end()); - -// v2: submit_batch (same signature, more efficient) -auto futures = pool.submit_batch(tasks.begin(), tasks.end()); -``` - -### Internal improvements (v2.0.0 continued) - - **Pool worker configuration deduplicated**: `configure_threads()`, `set_affinity()`, `distribute_across_cpus()` in `HighPerformancePool` and `ThreadPoolBase` now delegate to shared `detail::configure_worker_threads`, @@ -172,13 +150,13 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); `detail::read_name`, `detail::read_affinity` in `scheduler_policy.hpp`. - **`FutureWithErrorHandler` specialization removed**: The primary - template now handles both `T` and `void` via `if constexpr`, eliminating - ~70 lines of duplicated code. No API change. + template now handles both `T` and `void` via `if constexpr`, eliminating ~70 + lines of duplicated code. No API change. -- **`CompositeThreadRegistry` facade deduplicated**: The 12 query facade - methods (filter, map, for_each, find_if, any, all, none, take, skip, count, - empty, apply) are now inherited from `detail::QueryFacadeMixin` - CRTP base. No API change. +- **`CompositeThreadRegistry` facade deduplicated**: The 12 query facade methods + (filter, map, for_each, find_if, any, all, none, take, skip, count, empty, + apply) are now inherited from `detail::QueryFacadeMixin` CRTP base. + No API change. - **`ThreadRegistry` inherits `detail::QueryFacadeMixin`**: The 12 facade methods (filter, map, for_each, find_if, any, all, none, take, skip, count, @@ -186,26 +164,49 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); `CompositeThreadRegistry`, eliminating the duplicate implementations. - **POSIX scheduling helpers consolidated**: `apply_priority` and - `apply_scheduling_policy` for both `pthread_t` and `pid_t` now share a - common `detail::apply_sched_params` template, eliminating duplicated param - validation and error handling. + `apply_scheduling_policy` for both `pthread_t` and `pid_t` now share a common + `detail::apply_sched_params` template, eliminating duplicated param validation + and error handling. -- **`ThreadRegistry::register_current_thread` consolidated**: Both overloads - now delegate to a private `try_register(RegisteredThreadInfo)` method, - removing the duplicated lock/emplace/callback logic. +- **`ThreadRegistry::register_current_thread` consolidated**: Both overloads now + delegate to a private `try_register(RegisteredThreadInfo)` method, removing + the duplicated lock/emplace/callback logic. - **`PoolWithErrors` submit methods consolidated**: `submit()` and `submit_with_description()` now delegate to a private `submit_impl` with optional description parameter. -- **`TaskError::capture()` factory**: New static factory method centralizes - the repeated exception/thread_id/timestamp capture pattern. Used by +- **`TaskError::capture()` factory**: New static factory method centralizes the + repeated exception/thread_id/timestamp capture pattern. Used by `ErrorHandledTask` and `PoolWithErrors`. - **`ThreadControlBlock` native handle accessor**: Private `native_handle()` method replaces four identical `#ifdef _WIN32` dispatch blocks in the set_affinity/set_priority/set_scheduling_policy/set_name methods. +### Migration Guide + +Full step-by-step guide: **[docs/MIGRATION_V2.md](docs/MIGRATION_V2.md)**. + +Quick reference: + +```cpp +// v1: bool return +bool ok = pool.configure_threads("worker"); + +// v2: expected return (operator bool still works in conditions) +auto result = pool.configure_threads("worker"); +if (!result.has_value()) { + std::cerr << result.error().message() << std::endl; +} + +// v1: submit_range +auto futures = pool.submit_range(tasks.begin(), tasks.end()); + +// v2: submit_batch (same signature, more efficient) +auto futures = pool.submit_batch(tasks.begin(), tasks.end()); +``` + ## v1.4.1 - Fix: `*WrapperReg` types (`ThreadWrapperReg`, `JThreadWrapperReg`, From 87f2b2e0e3f7dfba9a03c728e8b8f426adc43583 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Mon, 6 Apr 2026 23:17:00 +0200 Subject: [PATCH 11/15] Update documentation and CHANGELOG for v2.0.0 release - Finalized the CHANGELOG to mark the release of v2.0.0, including breaking changes and new features. - Updated Doxyfile to enable warnings for undocumented elements, enhancing code documentation quality. - Improved links in README.md and migration guide for better accessibility to CHANGELOG and upgrade instructions. - Clarified documentation in futures.hpp and scheduled_pool.hpp to ensure consistent use of code formatting for better readability. --- CHANGELOG.md | 2 +- Doxyfile | 4 +- README.md | 103 +++++++++--------- docs/MIGRATION_V2.md | 91 ++++++++++------ include/threadschedule/futures.hpp | 4 +- include/threadschedule/scheduled_pool.hpp | 14 +-- include/threadschedule/thread_pool.hpp | 24 ++-- .../thread_pool_with_errors.hpp | 4 +- include/threadschedule/threadschedule.hpp | 2 +- 9 files changed, 146 insertions(+), 102 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 724cd11..ce81069 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## v2.0.0 (unreleased) +## v2.0.0 ### Breaking Changes diff --git a/Doxyfile b/Doxyfile index e8c6b2d..117dcc0 100644 --- a/Doxyfile +++ b/Doxyfile @@ -28,8 +28,8 @@ EXTRACT_LOCAL_CLASSES = YES EXTRACT_ANON_NSPACES = YES QUIET = YES -WARN_IF_UNDOCUMENTED = NO -WARN_AS_ERROR = NO +WARN_IF_UNDOCUMENTED = YES +WARN_AS_ERROR = YES GENERATE_HTML = YES HTML_OUTPUT = html diff --git a/README.md b/README.md index 694f692..08d7f8a 100644 --- a/README.md +++ b/README.md @@ -48,24 +48,27 @@ or with optional **shared runtime** for multi-DSO applications. ## What's new in v2.0 -Version 2.0 focuses on **lower-overhead submission**, **more control over shutdown and tuning**, and **better ergonomics** for modern C++ (ranges, coroutines, `std::stop_token`). Highlights: - -| Area | What changed | -| ---- | ------------ | -| **Lightweight pool** | `LightweightPoolT` / `LightweightPool` -- fire-and-forget only, configurable SBO buffer (default 64 B), no futures or stats. Workers are still `ThreadWrapper` (name, affinity, policy). Ideal for maximum throughput when you do not need a return value. | -| **`post()` / `try_post()`** | On `HighPerformancePool`, `ThreadPool` / `FastThreadPool`, and `GlobalPool` -- same queue path as `submit()` but skips `packaged_task` / `future` overhead. | -| **Non-throwing submit** | `try_submit()` / `try_submit_batch()` return `expected` instead of throwing on shutdown. | -| **Scheduled dispatch** | `ScheduledThreadPoolT` dispatches with `post()` internally. Alias `ScheduledLightweightPool` uses `LightweightPool` as the backend. | -| **Shutdown** | `ShutdownPolicy::drain` (default) vs `drop_pending`; `shutdown_for(timeout)` for a timed drain. | -| **Parallel loops** | Chunked `parallel_for_each` on single-queue pools (same helper as the work-stealing pool). | -| **Tuning** | `PollingWait` for `FastThreadPool`, configurable work-stealing deque capacity on `HighPerformancePool`, `GlobalPool::init(n)` before first use. | -| **C++20** | Ranges overloads for batch submit and `parallel_for_each`; `submit`/`try_submit` with `std::stop_token` (cooperative skip). | -| **Futures** | `when_all`, `when_any`, `when_all_settled` in `futures.hpp`. | -| **Coroutines** | `schedule_on{pool}`, `pool_executor`, `run_on(pool, coro_fn)` for pool-aware `task`. | -| **Observability** | Optional auto-registration of pool workers in the thread registry; per-task `set_on_task_start` / `set_on_task_end` hooks. | -| **Errors** | `ErrorHandler` callbacks get stable IDs; `remove_callback(id)` / `has_callback(id)`. | - -See [CHANGELOG.md](CHANGELOG.md) for the full list, including breaking changes when upgrading from v1.x. +Version 2.0 focuses on **lower-overhead submission**, **more control over +shutdown and tuning**, and **better ergonomics** for modern C++ (ranges, +coroutines, `std::stop_token`). Highlights: + +| Area | What changed | +| --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **Lightweight pool** | `LightweightPoolT` / `LightweightPool` -- fire-and-forget only, configurable SBO buffer (default 64 B), no futures or stats. Workers are still `ThreadWrapper` (name, affinity, policy). Ideal for maximum throughput when you do not need a return value. | +| **`post()` / `try_post()`** | On `HighPerformancePool`, `ThreadPool` / `FastThreadPool`, and `GlobalPool` -- same queue path as `submit()` but skips `packaged_task` / `future` overhead. | +| **Non-throwing submit** | `try_submit()` / `try_submit_batch()` return `expected` instead of throwing on shutdown. | +| **Scheduled dispatch** | `ScheduledThreadPoolT` dispatches with `post()` internally. Alias `ScheduledLightweightPool` uses `LightweightPool` as the backend. | +| **Shutdown** | `ShutdownPolicy::drain` (default) vs `drop_pending`; `shutdown_for(timeout)` for a timed drain. | +| **Parallel loops** | Chunked `parallel_for_each` on single-queue pools (same helper as the work-stealing pool). | +| **Tuning** | `PollingWait` for `FastThreadPool`, configurable work-stealing deque capacity on `HighPerformancePool`, `GlobalPool::init(n)` before first use. | +| **C++20** | Ranges overloads for batch submit and `parallel_for_each`; `submit`/`try_submit` with `std::stop_token` (cooperative skip). | +| **Futures** | `when_all`, `when_any`, `when_all_settled` in `futures.hpp`. | +| **Coroutines** | `schedule_on{pool}`, `pool_executor`, `run_on(pool, coro_fn)` for pool-aware `task`. | +| **Observability** | Optional auto-registration of pool workers in the thread registry; per-task `set_on_task_start` / `set_on_task_end` hooks. | +| **Errors** | `ErrorHandler` callbacks get stable IDs; `remove_callback(id)` / `has_callback(id)`. | + +See [CHANGELOG.md](CHANGELOG.md) for the full list, including breaking changes +when upgrading from v1.x. **Upgrading from v1.x:** [Migration guide (v2.0)](docs/MIGRATION_V2.md) @@ -98,28 +101,28 @@ ThreadSchedule is designed to work on any platform with a C++17 (or newer) compiler and standard threading support. The library is **continuously tested** on: -| Platform | Compiler | C++17 | C++20 | C++23 | C++26 | -| ------------------- | ----------------- | :---: | :---: | :---: | :---: | -| **Linux (x86_64)** | | | | | | -| Ubuntu 22.04 | GCC 11 | ✅ | ✅ | ✅ | - | -| Ubuntu 22.04 | GCC 12 | - | ✅ | - | - | -| Ubuntu 22.04 | Clang 14 | ✅ | ✅ | ✅ | - | -| Ubuntu 22.04 | Clang 15 | - | ✅ | ✅ | - | -| Ubuntu 24.04 | GCC 13 | ✅ | ✅ | ✅ | - | -| Ubuntu 24.04 | GCC 14 | ✅ | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | GCC 15 | - | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | Clang 16 | ✅ | ✅ | - | - | -| Ubuntu 24.04 | Clang 18 | ✅ | ✅ | - | - | -| Ubuntu 24.04 | Clang 19 | - | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | Clang 21 | - | ✅ | ✅ | ✅ | -| **Linux (ARM64)** | | | | | | -| Ubuntu 24.04 ARM64 | GCC 13 (system) | ✅ | ✅ | ✅ | - | -| Ubuntu 24.04 ARM64 | GCC 14 | - | ✅ | ✅ | ✅ | -| **Windows** | | | | | | -| Windows Server 2022 | MSVC 2022 | ✅ | ✅ | ✅ | - | -| Windows Server 2022 | MinGW-w64 (GCC 15)| ✅ | ✅ | ✅ | - | -| Windows Server 2025 | MSVC 2022 | ✅ | ✅ | ✅ | - | -| Windows Server 2025 | MinGW-w64 (GCC 15)| ✅ | ✅ | ✅ | - | +| Platform | Compiler | C++17 | C++20 | C++23 | C++26 | +| ------------------- | ------------------ | :---: | :---: | :---: | :---: | +| **Linux (x86_64)** | | | | | | +| Ubuntu 22.04 | GCC 11 | ✅ | ✅ | ✅ | - | +| Ubuntu 22.04 | GCC 12 | - | ✅ | - | - | +| Ubuntu 22.04 | Clang 14 | ✅ | ✅ | ✅ | - | +| Ubuntu 22.04 | Clang 15 | - | ✅ | ✅ | - | +| Ubuntu 24.04 | GCC 13 | ✅ | ✅ | ✅ | - | +| Ubuntu 24.04 | GCC 14 | ✅ | ✅ | ✅ | ✅ | +| Ubuntu 24.04 | GCC 15 | - | ✅ | ✅ | ✅ | +| Ubuntu 24.04 | Clang 16 | ✅ | ✅ | - | - | +| Ubuntu 24.04 | Clang 18 | ✅ | ✅ | - | - | +| Ubuntu 24.04 | Clang 19 | - | ✅ | ✅ | ✅ | +| Ubuntu 24.04 | Clang 21 | - | ✅ | ✅ | ✅ | +| **Linux (ARM64)** | | | | | | +| Ubuntu 24.04 ARM64 | GCC 13 (system) | ✅ | ✅ | ✅ | - | +| Ubuntu 24.04 ARM64 | GCC 14 | - | ✅ | ✅ | ✅ | +| **Windows** | | | | | | +| Windows Server 2022 | MSVC 2022 | ✅ | ✅ | ✅ | - | +| Windows Server 2022 | MinGW-w64 (GCC 15) | ✅ | ✅ | ✅ | - | +| Windows Server 2025 | MSVC 2022 | ✅ | ✅ | ✅ | - | +| Windows Server 2025 | MinGW-w64 (GCC 15) | ✅ | ✅ | ✅ | - | **Additional platforms:** ThreadSchedule should work on other platforms (macOS, FreeBSD, other Linux distributions) with standard C++17+ compilers, but these @@ -135,8 +138,8 @@ are not regularly tested in CI. > > **GCC 15**: Installed via `ppa:ubuntu-toolchain-r/test` on Ubuntu 24.04. > -> **Clang 21**: Installed via the official LLVM apt repository -> (`apt.llvm.org`) on Ubuntu 24.04. +> **Clang 21**: Installed via the official LLVM apt repository (`apt.llvm.org`) +> on Ubuntu 24.04. > > **Windows ARM64**: Not currently covered by GitHub-hosted runners, requires > self-hosted runner for testing. @@ -482,14 +485,16 @@ Zero-overhead helpers to operate on existing threads without taking ownership. ### Thread Pools -| Class | Use Case | Notes | -| ----------------------- | --------------------------------------------- | ----- | -| `ThreadPool` | Single shared queue, blocks while idle | `submit`, `try_submit`, `post`, batches, `parallel_for_each` | -| `FastThreadPool` | Same as `ThreadPool` with polling wait policy | Tunable via `PollingWait` | -| `HighPerformancePool` | Work-stealing + overflow queue | Highest throughput for large batches; tunable deque capacity | -| `LightweightPool` | Fire-and-forget only, SBO tasks | No futures; use `post` / `post_batch`. Alias of `LightweightPoolT<64>` | +| Class | Use Case | Notes | +| --------------------- | --------------------------------------------- | ---------------------------------------------------------------------- | +| `ThreadPool` | Single shared queue, blocks while idle | `submit`, `try_submit`, `post`, batches, `parallel_for_each` | +| `FastThreadPool` | Same as `ThreadPool` with polling wait policy | Tunable via `PollingWait` | +| `HighPerformancePool` | Work-stealing + overflow queue | Highest throughput for large batches; tunable deque capacity | +| `LightweightPool` | Fire-and-forget only, SBO tasks | No futures; use `post` / `post_batch`. Alias of `LightweightPoolT<64>` | -All of the above support `shutdown(ShutdownPolicy)` and `shutdown_for(timeout)` where applicable. Use **`post()`** when you do not need a `std::future` (lower overhead than `submit()`). +All of the above support `shutdown(ShutdownPolicy)` and `shutdown_for(timeout)` +where applicable. Use **`post()`** when you do not need a `std::future` (lower +overhead than `submit()`). ### Configuration diff --git a/docs/MIGRATION_V2.md b/docs/MIGRATION_V2.md index 92a3b13..5029db6 100644 --- a/docs/MIGRATION_V2.md +++ b/docs/MIGRATION_V2.md @@ -1,21 +1,28 @@ # Migrating to ThreadSchedule v2.0 -This guide helps you move from v1.x to **v2.0.0**. It lists **breaking changes** first, then **behavioral changes** you should be aware of, and finally **optional upgrades** that are not required but often worthwhile. +This guide helps you move from v1.x to **v2.0.0**. It lists **breaking changes** +first, then **behavioral changes** you should be aware of, and finally +**optional upgrades** that are not required but often worthwhile. For the authoritative list of every change, see [CHANGELOG.md](../CHANGELOG.md). ## 1. Upgrade steps -1. **Pin the version** in CMake / Conan / FetchContent to a v2.0.0 tag (or `main` once released). -2. **Rebuild** with the same `CMAKE_CXX_STANDARD` as before (v2 still supports C++17 as the baseline). -3. **Fix compile errors** using the sections below (most projects only touch `submit_range`, `configure_threads` storage type, or forward declarations). -4. **Run tests** -- especially anything that assumed strict per-element scheduling for `parallel_for_each` on `ThreadPool` / `FastThreadPool`. +1. **Pin the version** in CMake / Conan / FetchContent to the **v2.0.0** tag (or + a later v2.x tag). +2. **Rebuild** with the same `CMAKE_CXX_STANDARD` as before (v2 still supports + C++17 as the baseline). +3. **Fix compile errors** using the sections below (most projects only touch + `submit_range`, `configure_threads` storage type, or forward declarations). +4. **Run tests** -- especially anything that assumed strict per-element + scheduling for `parallel_for_each` on `ThreadPool` / `FastThreadPool`. ## 2. Breaking changes (must fix) ### 2.1 `submit_range()` removed -`ThreadPool::submit_range` and `GlobalThreadPool::submit_range` are removed. Use **`submit_batch`** with the same iterators. +`ThreadPool::submit_range` and `GlobalThreadPool::submit_range` are removed. Use +**`submit_batch`** with the same iterators. ```cpp // v1 @@ -25,11 +32,14 @@ auto futures = pool.submit_range(tasks.begin(), tasks.end()); auto futures = pool.submit_batch(tasks.begin(), tasks.end()); ``` -`submit_batch` acquires the queue lock once for the whole range and matches the API of `FastThreadPool` and `HighPerformancePool`. +`submit_batch` acquires the queue lock once for the whole range and matches the +API of `FastThreadPool` and `HighPerformancePool`. ### 2.2 `configure_threads` / `set_affinity` / `distribute_across_cpus` return type -On **`ThreadPool`** and **`FastThreadPool`**, these functions now return **`expected`** (same as `HighPerformancePool` already did). +On **`ThreadPool`** and **`FastThreadPool`**, these functions now return +**`expected`** (same as `HighPerformancePool` already +did). ```cpp // v1: storing in bool (no longer valid) @@ -54,25 +64,34 @@ They are now: **Runtime behavior is unchanged.** You only need to act if you: -- **Forward-declared** a concrete `class ThreadPool;` -- forward-declare the alias or include the header instead. -- **Specialized** a template on `ThreadPool` as a unique class type -- switch to `ThreadPoolBase` (or a SFINAE-friendly trait). +- **Forward-declared** a concrete `class ThreadPool;` -- forward-declare the + alias or include the header instead. +- **Specialized** a template on `ThreadPool` as a unique class type -- switch to + `ThreadPoolBase` (or a SFINAE-friendly trait). ### 2.4 `ThreadPool::Statistics` extended -`Statistics` on the single-queue pools now includes **`tasks_per_second`** and **`avg_task_time`**, like the other pools. If you use **designated initializers** or **memset**-style initialization that assumed a smaller struct, update the initializer list. +`Statistics` on the single-queue pools now includes **`tasks_per_second`** and +**`avg_task_time`**, like the other pools. If you use **designated +initializers** or **memset**-style initialization that assumed a smaller struct, +update the initializer list. ### 2.5 Error pool and global pool type names (aliases only) These are now aliases; **the public API is unchanged**: -- `HighPerformancePoolWithErrors`, `ThreadPoolWithErrors`, `FastThreadPoolWithErrors` -> `PoolWithErrors` +- `HighPerformancePoolWithErrors`, `ThreadPoolWithErrors`, + `FastThreadPoolWithErrors` -> `PoolWithErrors` - `GlobalThreadPool`, `GlobalHighPerformancePool` -> `GlobalPool` -Only unusual code (e.g. explicit template specialization on the old type name) may need the new spelling. +Only unusual code (e.g. explicit template specialization on the old type name) +may need the new spelling. ### 2.6 `ErrorHandler::add_callback` return type -`add_callback` now returns **`size_t`** (stable callback id for `remove_callback` / `has_callback`). Code that ignored the return value is unaffected. Code that assumed **`void`** must be updated. +`add_callback` now returns **`size_t`** (stable callback id for +`remove_callback` / `has_callback`). Code that ignored the return value is +unaffected. Code that assumed **`void`** must be updated. ```cpp // v2 @@ -84,7 +103,8 @@ handler.remove_callback(id); ### 3.1 `shutdown()` -`shutdown()` now takes an optional **`ShutdownPolicy`** (default **`drain`**, matching old behavior). Old call sites without arguments behave as before. +`shutdown()` now takes an optional **`ShutdownPolicy`** (default **`drain`**, +matching old behavior). Old call sites without arguments behave as before. ```cpp pool.shutdown(); // still: drain all work @@ -94,41 +114,50 @@ pool.shutdown_for(std::chrono::seconds(5)); // new: timed drain ### 3.2 Destructors -Destructors still shut down the pool; they use **`drain`** by default. No change required unless you want **`drop_pending`** explicitly before destruction. +Destructors still shut down the pool; they use **`drain`** by default. No change +required unless you want **`drop_pending`** explicitly before destruction. ## 4. Behavioral changes (no rename, but semantics differ) ### 4.1 `parallel_for_each` on `ThreadPool` / `FastThreadPool` -Implementation is now **chunked** (same strategy as `HighPerformancePool`): the range is split into a small number of tasks instead of one task per element. +Implementation is now **chunked** (same strategy as `HighPerformancePool`): the +range is split into a small number of tasks instead of one task per element. - **Pros:** Much less submission overhead on large ranges. -- **Cons:** Finer-grained progress / cancellation per element is no longer one-to-one with one pool task. +- **Cons:** Finer-grained progress / cancellation per element is no longer + one-to-one with one pool task. -If you relied on **one future per element**, switch to an explicit loop with `submit`, or chunk manually. +If you relied on **one future per element**, switch to an explicit loop with +`submit`, or chunk manually. ### 4.2 Scheduled pools dispatch with `post()` -`ScheduledThreadPoolT` dispatches due tasks with **`post()`** instead of **`submit()`**, so **no `std::future` is created per dispatch**. Your task bodies are unchanged; only internal overhead is lower. +`ScheduledThreadPoolT` dispatches due tasks with **`post()`** instead of +**`submit()`**, so **no `std::future` is created per dispatch**. Your task +bodies are unchanged; only internal overhead is lower. ## 5. Optional improvements after migrating These are **not** required for a successful build but match v2 design well: -| Goal | Approach | -| ---- | -------- | -| Less overhead than `submit()` | Use **`post()`** / **`try_post()`** when you do not need a return value or `std::future`. | -| Dedicated fire-and-forget pool | Use **`LightweightPool`** / **`LightweightPoolT`** (SBO task buffer, no futures). | -| Non-throwing submit | Use **`try_submit()`** / **`try_submit_batch()`** and check **`expected`**. | -| Tune fast pool polling | Use **`ThreadPoolBase>`** or keep **`FastThreadPool`** (10 ms default). | -| Tune HP deque size | **`HighPerformancePool(threads, deque_capacity)`**. | -| Fix global pool size early | **`GlobalPool<...>::init(n)`** before first **`instance()`**. | -| Workers in registry | Pass **`register_workers = true`** to pool constructors. | +| Goal | Approach | +| ------------------------------ | ----------------------------------------------------------------------------------------- | +| Less overhead than `submit()` | Use **`post()`** / **`try_post()`** when you do not need a return value or `std::future`. | +| Dedicated fire-and-forget pool | Use **`LightweightPool`** / **`LightweightPoolT`** (SBO task buffer, no futures). | +| Non-throwing submit | Use **`try_submit()`** / **`try_submit_batch()`** and check **`expected`**. | +| Tune fast pool polling | Use **`ThreadPoolBase>`** or keep **`FastThreadPool`** (10 ms default). | +| Tune HP deque size | **`HighPerformancePool(threads, deque_capacity)`**. | +| Fix global pool size early | **`GlobalPool<...>::init(n)`** before first **`instance()`**. | +| Workers in registry | Pass **`register_workers = true`** to pool constructors. | ## 6. Header and module notes -- New headers pulled in by the umbrella header include **`futures.hpp`** (combinators) and coroutine helpers on **`task.hpp`** as documented in [COROUTINES.md](COROUTINES.md). -- Include **`threadschedule/futures.hpp`** directly if you only need combinators. +- New headers pulled in by the umbrella header include **`futures.hpp`** + (combinators) and coroutine helpers on **`task.hpp`** as documented in + [COROUTINES.md](COROUTINES.md). +- Include **`threadschedule/futures.hpp`** directly if you only need + combinators. ## 7. Further reading diff --git a/include/threadschedule/futures.hpp b/include/threadschedule/futures.hpp index b637bfc..999fe63 100644 --- a/include/threadschedule/futures.hpp +++ b/include/threadschedule/futures.hpp @@ -2,8 +2,8 @@ /** * @file futures.hpp - * @brief Combinators for @c std::future: @ref when_all, @ref when_any, - * @ref when_all_settled. + * @brief Combinators for @c std::future: @c when_all, @c when_any, + * @c when_all_settled. * * These utilities simplify waiting on multiple futures produced by thread * pool submissions. diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp index 748ac8e..be29f07 100644 --- a/include/threadschedule/scheduled_pool.hpp +++ b/include/threadschedule/scheduled_pool.hpp @@ -63,7 +63,7 @@ class ScheduledTaskHandle * @brief Thread pool augmented with delayed and periodic task scheduling. * * Non-copyable, non-movable. Combines a dedicated scheduler thread with - * an underlying PoolType (default: @ref ThreadPool) that does the actual work. + * an underlying PoolType (default: @c ThreadPool) that does the actual work. * * @par How task execution works * The pool owns a single scheduler thread that runs an internal loop @@ -74,9 +74,9 @@ class ScheduledTaskHandle * 1. Removes it from the multimap. * 2. Checks if the task has been cancelled (via the atomic flag). If * cancelled, the task is discarded. - * 3. Submits the task to the underlying PoolType via pool_.submit(). + * 3. Posts the task to the underlying PoolType via pool_.post(). * From this point on, the task follows the execution rules of the - * underlying pool (see @ref ThreadPool, @ref FastThreadPool, or + * underlying pool (see @c ThreadPool, @c FastThreadPool, or * @ref HighPerformancePool documentation). * 4. For periodic tasks, the scheduler immediately re-inserts the task * into the multimap with next_run += interval. This means the next @@ -97,7 +97,7 @@ class ScheduledTaskHandle * execute. The scheduler thread exits immediately on shutdown, so * future-scheduled tasks are lost. * - Cancellation is cooperative: calling handle.cancel() sets an atomic - * flag. The scheduler checks this flag before submitting the task to + * flag. The scheduler checks this flag before posting the task to * the pool. Additionally, the pool-side wrapper checks the flag again * right before calling the task. However, a task that is already * running will NOT be interrupted by cancel(). @@ -381,13 +381,13 @@ class ScheduledThreadPoolT } }; -/** @brief @ref ScheduledThreadPoolT using the default @ref ThreadPool backend. */ +/** @brief @ref ScheduledThreadPoolT using the default @c ThreadPool backend. */ using ScheduledThreadPool = ScheduledThreadPoolT; /** @brief @ref ScheduledThreadPoolT using @ref HighPerformancePool as backend. */ using ScheduledHighPerformancePool = ScheduledThreadPoolT; -/** @brief @ref ScheduledThreadPoolT using @ref FastThreadPool as backend. */ +/** @brief @ref ScheduledThreadPoolT using @c FastThreadPool as backend. */ using ScheduledFastThreadPool = ScheduledThreadPoolT; -/** @brief @ref ScheduledThreadPoolT using @ref LightweightPool as backend (minimal overhead). */ +/** @brief @ref ScheduledThreadPoolT using @c LightweightPool as backend (minimal overhead). */ using ScheduledLightweightPool = ScheduledThreadPoolT; } // namespace threadschedule diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index a75ac3f..f510790 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -718,7 +718,7 @@ class HighPerformancePool * @brief Fire-and-forget task submission (throwing variant). * * Enqueues a callable without creating a @c std::packaged_task or - * @c std::future, giving roughly 3x higher throughput than @ref submit() + * @c std::future, giving roughly 3x higher throughput than \c submit() * for tasks whose return value is not needed. * * @throws std::runtime_error If the pool is shutting down. @@ -812,7 +812,7 @@ class HighPerformancePool * * Acquires the lock once per batch, distributing tasks across worker * queues in round-robin fashion. Significantly more efficient than - * calling @ref submit() in a loop for large batches. + * calling @c submit() in a loop for large batches. * * @tparam Iterator Forward iterator whose value_type is callable as @c void(). * @return @c expected containing a vector of futures, or @@ -1743,6 +1743,7 @@ class ThreadPoolBase }; /** + * @typedef ThreadPool * @brief General-purpose thread pool with indefinite blocking wait. * * Workers block on condition_variable::wait() when idle - zero CPU @@ -1754,6 +1755,7 @@ class ThreadPoolBase using ThreadPool = ThreadPoolBase; /** + * @typedef FastThreadPool * @brief Thread pool with 10 ms polling wait for lower wake-up latency. * * Workers poll with condition_variable::wait_for(10 ms), trading a small @@ -1771,7 +1773,8 @@ using FastThreadPool = ThreadPoolBase>; * @brief Ultra-lightweight fire-and-forget thread pool. * * Designed for maximum throughput on tasks whose return value is not needed. - * Typical measured throughput is **3x** higher than @ref submit() on the + * Typical measured throughput is **3x** higher than @c submit() on e.g. + * @ref HighPerformancePool on the * same hardware, because @c LightweightPoolT avoids the overhead of * @c std::packaged_task, @c std::future, and @c std::shared_ptr entirely. * @@ -1793,7 +1796,7 @@ using FastThreadPool = ThreadPoolBase>; * (no heap allocation). Larger callables fall back to the heap. * * @par What is @e not included (by design) - * - No @c std::future / @c std::packaged_task (use @ref submit() on other + * - No @c std::future / @c std::packaged_task (use @c submit() on other * pools if you need return values). * - No statistics counters (@ref HighPerformancePool::get_statistics). * - No tracing hooks (@ref HighPerformancePool::set_on_task_start). @@ -2098,6 +2101,7 @@ class LightweightPoolT }; /** + * @typedef LightweightPool * @brief Default lightweight pool with 64-byte task slots (56 bytes usable). * * Sufficient for lambdas capturing up to ~7 pointers on 64-bit platforms. @@ -2243,15 +2247,21 @@ class GlobalPool } }; -/** @brief Singleton @ref ThreadPool accessor. */ +/** + * @typedef GlobalThreadPool + * @brief Singleton accessor for the process-wide @c ThreadPool instance. + */ using GlobalThreadPool = GlobalPool; -/** @brief Singleton @ref HighPerformancePool accessor. */ +/** + * @typedef GlobalHighPerformancePool + * @brief Singleton accessor for the process-wide @ref HighPerformancePool instance. + */ using GlobalHighPerformancePool = GlobalPool; /** * @brief Convenience wrapper that applies a callable to every element of a - * container in parallel using the @ref GlobalThreadPool singleton. + * container in parallel using the @c GlobalThreadPool singleton. * * Equivalent to: * @code diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp index 1454544..5f74047 100644 --- a/include/threadschedule/thread_pool_with_errors.hpp +++ b/include/threadschedule/thread_pool_with_errors.hpp @@ -184,10 +184,10 @@ class PoolWithErrors /** @brief @ref HighPerformancePool with integrated error handling. */ using HighPerformancePoolWithErrors = PoolWithErrors; -/** @brief @ref FastThreadPool with integrated error handling. */ +/** @brief @c FastThreadPool with integrated error handling. */ using FastThreadPoolWithErrors = PoolWithErrors; -/** @brief @ref ThreadPool with integrated error handling. */ +/** @brief @c ThreadPool with integrated error handling. */ using ThreadPoolWithErrors = PoolWithErrors; } // namespace threadschedule diff --git a/include/threadschedule/threadschedule.hpp b/include/threadschedule/threadschedule.hpp index 48c509e..0db0ac7 100644 --- a/include/threadschedule/threadschedule.hpp +++ b/include/threadschedule/threadschedule.hpp @@ -21,7 +21,7 @@ * @brief Modern C++17/20/23/26 Thread Scheduling Library * * A comprehensive header-only library for advanced thread management - * on Linux systems, providing C++ wrappers for pthreads, std::thread, + * on Linux and Windows, providing C++ wrappers for pthreads, std::thread, * and std::jthread with extended functionality. * * Features: From 527d2a0b912336442515d8755ead32e0b1ab883c Mon Sep 17 00:00:00 2001 From: Katze719 Date: Mon, 6 Apr 2026 23:20:41 +0200 Subject: [PATCH 12/15] Update documentation links and Doxyfile settings for improved accessibility and extraction - Changed Doxyfile to enable extraction of all documentation elements, enhancing the quality of generated documentation. - Updated links in README.md and migration guide to use HTML anchor tags for better accessibility to CHANGELOG and upgrade instructions. --- Doxyfile | 2 +- README.md | 2 +- docs/MIGRATION_V2.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doxyfile b/Doxyfile index 117dcc0..b7998e3 100644 --- a/Doxyfile +++ b/Doxyfile @@ -21,7 +21,7 @@ FILE_PATTERNS = *.hpp *.md RECURSIVE = YES EXCLUDE_PATTERNS = */build/* */.git/* */install/* -EXTRACT_ALL = NO +EXTRACT_ALL = YES EXTRACT_PRIVATE = NO EXTRACT_STATIC = NO EXTRACT_LOCAL_CLASSES = YES diff --git a/README.md b/README.md index 08d7f8a..6365b4b 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ coroutines, `std::stop_token`). Highlights: | **Observability** | Optional auto-registration of pool workers in the thread registry; per-task `set_on_task_start` / `set_on_task_end` hooks. | | **Errors** | `ErrorHandler` callbacks get stable IDs; `remove_callback(id)` / `has_callback(id)`. | -See [CHANGELOG.md](CHANGELOG.md) for the full list, including breaking changes +See CHANGELOG.md for the full list, including breaking changes when upgrading from v1.x. **Upgrading from v1.x:** [Migration guide (v2.0)](docs/MIGRATION_V2.md) diff --git a/docs/MIGRATION_V2.md b/docs/MIGRATION_V2.md index 5029db6..fdc6ed4 100644 --- a/docs/MIGRATION_V2.md +++ b/docs/MIGRATION_V2.md @@ -4,7 +4,7 @@ This guide helps you move from v1.x to **v2.0.0**. It lists **breaking changes** first, then **behavioral changes** you should be aware of, and finally **optional upgrades** that are not required but often worthwhile. -For the authoritative list of every change, see [CHANGELOG.md](../CHANGELOG.md). +For the authoritative list of every change, see CHANGELOG.md. ## 1. Upgrade steps @@ -162,7 +162,7 @@ These are **not** required for a successful build but match v2 design well: ## 7. Further reading - [README.md](../README.md) -- "What's new in v2.0" summary table -- [CHANGELOG.md](../CHANGELOG.md) -- full v2.0.0 notes +- CHANGELOG.md -- full v2.0.0 notes - [INTEGRATION.md](INTEGRATION.md) -- CMake and package managers - [ERROR_HANDLING.md](ERROR_HANDLING.md) -- pools with errors and callbacks - [SCHEDULED_TASKS.md](SCHEDULED_TASKS.md) -- scheduled pools and aliases From 6557689fb3b4e21b56a51d718e2d97c39c5489d1 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Mon, 6 Apr 2026 23:38:13 +0200 Subject: [PATCH 13/15] Enhance coroutine documentation with new features and examples - Added `schedule_on{pool}` and `run_on(pool, fn)` to the coroutine documentation, detailing their usage and behavior with thread pools. - Updated descriptions for `task`, `sync_wait`, and `generator` for consistency and clarity. - Improved overall formatting and readability of the documentation to better guide users in utilizing coroutine features. --- docs/COROUTINES.md | 81 ++++++++++++++++++++++++++++----- include/threadschedule/task.hpp | 74 ++++++++++++++++++++---------- 2 files changed, 120 insertions(+), 35 deletions(-) diff --git a/docs/COROUTINES.md b/docs/COROUTINES.md index 3a2c964..37c06c9 100644 --- a/docs/COROUTINES.md +++ b/docs/COROUTINES.md @@ -11,10 +11,14 @@ write asynchronous-looking code without building your own promise types. ## Features -- **`task`** -- Lazy single-value coroutine that starts only when `co_await`ed -- **`task`** -- Void specialisation for side-effect-only coroutines -- **`sync_wait(task)`** -- Blocking bridge to run a task from synchronous code -- **`generator`** -- Lazy sequence coroutine producing values via `co_yield` +- **`task`** - Lazy single-value coroutine that starts only when `co_await`ed +- **`task`** - Void specialisation for side-effect-only coroutines +- **`sync_wait(task)`** - Blocking bridge to run a task from synchronous code +- **`generator`** - Lazy sequence coroutine producing values via `co_yield` +- **`schedule_on{pool}`** - `co_await`able hop onto a thread-pool worker (any pool + with `submit(Callable)`) +- **`run_on(pool, fn)`** - Run a callable that returns `task` on the pool and get a + `std::future` for the result - Automatic `std::generator` alias when C++23 `__cpp_lib_generator` is available @@ -37,7 +41,7 @@ int main() { } ``` -## `task` -- Lazy Single-Value Coroutine +## `task` - Lazy Single-Value Coroutine A `task` represents a computation that will produce exactly one value (or throw). It is **lazy**: the coroutine body does not execute until someone @@ -131,7 +135,7 @@ task> make_ptr() { auto ptr = sync_wait(make_ptr()); // std::unique_ptr ``` -## `sync_wait` -- Blocking Bridge +## `sync_wait` - Blocking Bridge `sync_wait` runs a task on the calling thread and blocks until it completes. This is the primary way to consume a `task` from non-coroutine code (e.g. @@ -149,9 +153,9 @@ int main() { > **Note:** `sync_wait` resumes the entire coroutine chain on the calling > thread. It is intended for top-level entry points. Avoid calling `sync_wait` -> from inside a coroutine -- use `co_await` instead. +> from inside a coroutine - use `co_await` instead. -## `generator` -- Lazy Sequence Coroutine +## `generator` - Lazy Sequence Coroutine A `generator` produces a (potentially infinite) sequence of values on-demand via `co_yield`. It is compatible with range-based `for` loops. @@ -171,7 +175,7 @@ for (int v : iota(0, 5)) { ### Infinite sequences -Generators can represent infinite sequences -- just `break` out of the loop +Generators can represent infinite sequences - just `break` out of the loop when you're done. The generator's destructor cleans up the coroutine frame: ```cpp @@ -230,13 +234,64 @@ try { When your compiler provides `std::generator` (detected via `__cpp_lib_generator >= 202207L`), `threadschedule::generator` is -automatically aliased to `std::generator`. No code changes needed -- the +automatically aliased to `std::generator`. No code changes needed - the API is compatible. ## Combining Coroutines with Thread Pools -While the coroutine primitives are standalone, they compose naturally with the -library's thread pools: +### `schedule_on{pool}` - resume on a pool worker + +`schedule_on` is an awaitable: **`co_await schedule_on{pool}`** submits the current +coroutine frame to the pool; when a worker runs it, execution continues **on that +thread**. Any pool type works as long as it provides **`submit(Callable)`** (for +example `HighPerformancePool`, `ThreadPool`, `FastThreadPool`, or the global +singletons). + +```cpp +#include +using namespace threadschedule; + +task on_pool(HighPerformancePool& pool) { + co_await schedule_on{pool}; + // this line runs on a pool worker thread + expensive_work(); + co_return; +} + +int main() { + HighPerformancePool pool(4); + sync_wait(on_pool(pool)); +} +``` + +Step-by-step behaviour, nested `schedule_on`, and comparison with `co_await` on +another `task` are documented in Doxygen on **`schedule_on`** and **`run_on`** in +[`include/threadschedule/task.hpp`](../include/threadschedule/task.hpp) (build +with `THREADSCHEDULE_BUILD_DOCS=ON` and open the HTML API reference). + +### `run_on(pool, fn)` - `task` from synchronous code via `std::future` + +**`run_on`** takes a **callable that returns `task`**, invokes it on a **pool +worker**, runs **`sync_wait`** on that task inside the worker, and returns a +**`std::future`** to the caller. Handy when the entry point is not a coroutine +but you want the body expressed as **`task`**. + +```cpp +HighPerformancePool pool(4); + +auto future = run_on(pool, []() -> task { + co_return expensive_work(); // runs on pool; co_await works inside +}); + +int result = future.get(); +``` + +The callable is executed on the pool; **`co_await`** inside the returned task +continues on that worker unless you transfer elsewhere with **`schedule_on`**. + +### Plain `submit` + `future` (no `run_on`) + +You can still bridge ordinary callables and futures without `run_on`: ```cpp #include @@ -261,6 +316,8 @@ int main() { | `task` | `task.hpp` | Lazy void coroutine | | `sync_wait(task)` | `task.hpp` | Blocking bridge, returns `T` | | `sync_wait(task)` | `task.hpp` | Blocking bridge, void overload | +| `schedule_on` | `task.hpp` | Awaitable: continue coroutine on `pool` | +| `run_on(pool, fn)` | `task.hpp` | Run `fn()` (`task`) on pool; returns `std::future` | | `generator` | `generator.hpp` | Lazy multi-value sequence coroutine | All types live in `namespace threadschedule` (alias `ts`). diff --git a/include/threadschedule/task.hpp b/include/threadschedule/task.hpp index cbeccbd..ab30d18 100644 --- a/include/threadschedule/task.hpp +++ b/include/threadschedule/task.hpp @@ -2,11 +2,11 @@ /** * @file task.hpp - * @brief Lazy single-value coroutine (`task`) and blocking bridge (`sync_wait`). + * @brief Coroutine @c task, @c sync_wait, and pool helpers @c schedule_on / @c run_on. * - * A `task` represents a lazy coroutine that produces exactly one value - * (or throws). It does not begin execution until it is `co_await`ed by - * another coroutine or passed to `sync_wait()`. + * @c task is lazy until @c co_await or @c sync_wait. For how work moves onto a + * thread pool and what nested @c schedule_on does, see struct @c schedule_on and + * function template @c run_on below (C++20 only). * * Requires C++20 coroutine support. */ @@ -114,9 +114,8 @@ struct final_awaiter * - **Continuation:** `continuation_` is set by the task's awaiter just * before resuming the task. `final_awaiter` uses it to return control * to the parent coroutine. - * - **Executor:** If `executor_` is set (e.g. via `schedule_on`), the - * continuation is dispatched through the executor instead of using - * symmetric transfer. + * - **Executor:** If @c executor_ is set on the promise, the continuation is + * dispatched through that executor instead of using symmetric transfer. */ template class task_promise_base @@ -624,18 +623,44 @@ inline void sync_wait(task t) // --------------------------------------------------------------------------- /** - * @brief Awaitable that transfers execution to a thread pool. - * - * Use `co_await schedule_on{pool}` inside any coroutine to continue - * execution on one of the pool's worker threads. - * - * @tparam Pool A thread pool type providing @c submit(Callable). + * @brief Awaitable that continues the current coroutine on a thread pool worker. + * + * @tparam Pool Pool type providing @c submit(Callable) (for example + * HighPerformancePool, ThreadPool, FastThreadPool). + * + * @par Mechanism + * The coroutine stays a single @c task frame. Nothing is split into separate + * compiled "halves". @c co_await schedule_on{pool} does the following: + * -# The coroutine suspends at the @c co_await. + * -# @c await_suspend enqueues a job on @p pool that calls @c resume() on this + * coroutine handle. + * -# A worker runs that job; @c resume() continues the coroutine on that + * thread, starting with the line after the @c co_await. Everything after + * that point runs on that worker until another explicit transfer. + * + * Code before the first @c co_await schedule_on{pool} runs on whatever thread + * was already running the coroutine (for example @c main under @c sync_wait, or + * a pool thread if you were already there). Code after runs on whichever worker + * picked up the @c submit. + * + * @par Nested @c schedule_on + * Each @c co_await schedule_on queues another @c submit(resume). With two + * different pools, you first continue on a worker of the first pool, then on a + * worker of the second. Nesting @c schedule_on on the same pool still uses + * @c pool.submit each time: you may run on another worker of that pool. There + * is no guarantee it is the same OS thread as before. + * + * @par Versus @c co_await on another @c task + * Awaiting another @c task usually does not post to a pool; when the child + * finishes, the parent is typically resumed on the same thread (symmetric + * transfer / direct resume). Only @c co_await schedule_on (or similar) + * explicitly pushes the continuation onto the pool queue. * * @par Example * @code * task work(HighPerformancePool& pool) { * co_await schedule_on{pool}; - * // now running on a pool thread + * // runs on a pool worker from here until the next transfer * } * @endcode */ @@ -659,15 +684,18 @@ struct schedule_on // --------------------------------------------------------------------------- /** - * @brief Submit a coroutine-returning callable to a pool and return a - * @c std::future for its result. - * - * The callable is invoked on a pool worker thread. Inside the callable, - * you can use `co_await` freely -- all continuations run on the calling - * pool unless explicitly transferred elsewhere. - * - * @tparam Pool A thread pool type providing @c submit(Callable). - * @tparam F A callable returning @c task. + * @brief Run a callable that returns @c task on a pool worker; return + * @c std::future for the result. + * + * @tparam Pool Pool type providing @c submit(Callable). + * @tparam F Callable with signature returning @c task (for some @c T). + * + * @par Behaviour + * The callable is invoked on a worker thread. That worker calls @c sync_wait + * on the @c task returned by the callable, so the coroutine body runs there. + * Nested @c co_await on other @c task objects typically keeps resuming on that + * same worker unless you @c co_await schedule_on to hand off again. The + * @c std::future is fulfilled when @c sync_wait completes inside the worker. * * @par Example * @code From 5ff5f1cf26b0e86f27dcf09eb1930f9687036214 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Tue, 7 Apr 2026 21:36:30 +0200 Subject: [PATCH 14/15] Update documentation for LightweightPool integration in ScheduledTaskHandle - Added references to `LightweightPool` in the documentation for `ScheduledTaskHandle`, ensuring users are aware of all available pool types. - Improved clarity and consistency in the documentation by including `LightweightPool` in the list of underlying pool types and convenience aliases. --- include/threadschedule/scheduled_pool.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp index be29f07..873f647 100644 --- a/include/threadschedule/scheduled_pool.hpp +++ b/include/threadschedule/scheduled_pool.hpp @@ -76,8 +76,8 @@ class ScheduledTaskHandle * cancelled, the task is discarded. * 3. Posts the task to the underlying PoolType via pool_.post(). * From this point on, the task follows the execution rules of the - * underlying pool (see @c ThreadPool, @c FastThreadPool, or - * @ref HighPerformancePool documentation). + * underlying pool (see @c ThreadPool, @c FastThreadPool, + * @ref HighPerformancePool, or @c LightweightPool documentation). * 4. For periodic tasks, the scheduler immediately re-inserts the task * into the multimap with next_run += interval. This means the next * execution is timed from the scheduled time, not from when the @@ -126,7 +126,7 @@ class ScheduledTaskHandle * (default: ThreadPool). * * @see ScheduledThreadPool, ScheduledHighPerformancePool, - * ScheduledFastThreadPool (convenience aliases) + * ScheduledFastThreadPool, ScheduledLightweightPool (convenience aliases) */ template class ScheduledThreadPoolT From e3e6efd64401e1d24e6186a2c07a0e2162fd8097 Mon Sep 17 00:00:00 2001 From: Katze719 Date: Tue, 7 Apr 2026 21:44:43 +0200 Subject: [PATCH 15/15] Update documentation for error handling and task scheduling features - Revised the documentation in `ERROR_HANDLING.md` to enhance clarity and remove redundant checkmarks for features. - Improved the `SCHEDULED_TASKS.md` documentation by adding details about the new `LightweightPool` and its use cases for fire-and-forget tasks. - Updated various sections across documentation files to ensure consistent formatting and clearer descriptions, particularly regarding error handling and task scheduling functionalities. --- CHANGELOG.md | 12 ++--- README.md | 52 +++++++++---------- docs/ERROR_HANDLING.md | 10 ++-- docs/INTEGRATION.md | 10 ++-- docs/REGISTRY.md | 24 ++++----- docs/SCHEDULED_TASKS.md | 20 ++++--- docs/TOPOLOGY_NUMA.md | 2 +- include/threadschedule/chaos.hpp | 2 +- include/threadschedule/error_handler.hpp | 7 ++- include/threadschedule/expected.hpp | 16 +++--- include/threadschedule/futures.hpp | 2 +- include/threadschedule/pthread_wrapper.hpp | 13 +++-- include/threadschedule/registered_threads.hpp | 7 ++- include/threadschedule/scheduled_pool.hpp | 7 ++- include/threadschedule/scheduler_policy.hpp | 5 ++ include/threadschedule/task.hpp | 4 +- include/threadschedule/thread_pool.hpp | 21 +++++--- .../thread_pool_with_errors.hpp | 5 ++ include/threadschedule/thread_registry.hpp | 41 ++++++++------- include/threadschedule/thread_wrapper.hpp | 5 ++ include/threadschedule/topology.hpp | 2 +- 21 files changed, 160 insertions(+), 107 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce81069..7cc1cb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -217,7 +217,7 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); pointers work as callables (e.g. `JThreadWrapperReg("n", "c", &MyClass::run, this)`). - Fix: `JThreadWrapperReg` now correctly forwards `std::stop_token` to callables - that accept it, while also supporting callables without `stop_token` — the + that accept it, while also supporting callables without `stop_token` - the previous `auto&&...` wrapper always claimed to accept a token, causing a compile error when the user's callable did not. @@ -237,7 +237,7 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); required scheduling API. - Added: `FastThreadPool::set_affinity()` and `FastThreadPool::wait_for_tasks()` for API parity with `ThreadPool` and `HighPerformancePool`. -- Added: Missing forwarding methods in `WithErrors` wrappers — +- Added: Missing forwarding methods in `WithErrors` wrappers - `HighPerformancePoolWithErrors::set_affinity()`, `FastThreadPoolWithErrors::set_affinity()` and `FastThreadPoolWithErrors::wait_for_tasks()`. @@ -250,12 +250,12 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); all pool classes, and `ScheduledTaskHandle`. - Removed: Unused `thread_local std::random_device` in `HighPerformancePool::worker_function`. -- Added: C++20 coroutine primitive `task` (`task.hpp`) — a lazy single-value +- Added: C++20 coroutine primitive `task` (`task.hpp`) - a lazy single-value coroutine that starts execution only when `co_await`ed. Includes full `task` specialisation and exception propagation. -- Added: `sync_wait(task)` / `sync_wait(task)` — blocking bridge that +- Added: `sync_wait(task)` / `sync_wait(task)` - blocking bridge that runs a task on the calling thread and returns its result. -- Added: C++20 coroutine primitive `generator` (`generator.hpp`) — a lazy +- Added: C++20 coroutine primitive `generator` (`generator.hpp`) - a lazy multi-value coroutine producing elements via `co_yield`. Supports range-based for loops (`begin()` / `end()` with `std::default_sentinel_t`). Automatically aliases `std::generator` when C++23 `__cpp_lib_generator` is available. @@ -277,7 +277,7 @@ auto futures = pool.submit_batch(tasks.begin(), tasks.end()); - Build/Style: Update `.clang-format` (`IndentPPDirectives: AfterHash`) for clearer preprocessor indentation. -- Core: Improve `expected.hpp` header detection — check `` or +- Core: Improve `expected.hpp` header detection - check `` or `` presence before including ``. - Refactor: Simplify and clarify conditional compilation in `expected.hpp` for maintainability. diff --git a/README.md b/README.md index 6365b4b..fa5d0eb 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ or with optional **shared runtime** for multi-DSO applications. - **NUMA-aware Topology Helpers**: Easy affinity builders across nodes - **Chaos Testing**: RAII controller to perturb affinity/priority for validation - **C++20 Coroutines**: `task`, `generator`, and `sync_wait` out of the - box -- no boilerplate promise types needed + box - no boilerplate promise types needed - **High-Performance Pools**: Work-stealing pool, `post()` / `try_post()`, and optional `LightweightPool` for fire-and-forget workloads with minimal overhead - **Scheduled Tasks**: Run tasks at specific times, after delays, or @@ -54,12 +54,12 @@ coroutines, `std::stop_token`). Highlights: | Area | What changed | | --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **Lightweight pool** | `LightweightPoolT` / `LightweightPool` -- fire-and-forget only, configurable SBO buffer (default 64 B), no futures or stats. Workers are still `ThreadWrapper` (name, affinity, policy). Ideal for maximum throughput when you do not need a return value. | -| **`post()` / `try_post()`** | On `HighPerformancePool`, `ThreadPool` / `FastThreadPool`, and `GlobalPool` -- same queue path as `submit()` but skips `packaged_task` / `future` overhead. | -| **Non-throwing submit** | `try_submit()` / `try_submit_batch()` return `expected` instead of throwing on shutdown. | +| **Lightweight pool** | `LightweightPoolT` / `LightweightPool` - fire-and-forget only, configurable SBO buffer (default 64 B), no futures or stats. Workers are still `ThreadWrapper` (name, affinity, policy). Ideal for maximum throughput when you do not need a return value. | +| **`post()` / `try_post()`** | On `HighPerformancePool`, `ThreadPool` / `FastThreadPool`, and `GlobalPool` - same queue path as `submit()` but skips `packaged_task` / `future` overhead. | +| **Non-throwing submit** | `try_submit()` returns `expected, error_code>`; `try_submit_batch()` returns `expected>, error_code>` instead of throwing on shutdown. | | **Scheduled dispatch** | `ScheduledThreadPoolT` dispatches with `post()` internally. Alias `ScheduledLightweightPool` uses `LightweightPool` as the backend. | | **Shutdown** | `ShutdownPolicy::drain` (default) vs `drop_pending`; `shutdown_for(timeout)` for a timed drain. | -| **Parallel loops** | Chunked `parallel_for_each` on single-queue pools (same helper as the work-stealing pool). | +| **Parallel loops** | Chunked `parallel_for_each` on all pool types (shared helper across single-queue and work-stealing pools). | | **Tuning** | `PollingWait` for `FastThreadPool`, configurable work-stealing deque capacity on `HighPerformancePool`, `GlobalPool::init(n)` before first use. | | **C++20** | Ranges overloads for batch submit and `parallel_for_each`; `submit`/`try_submit` with `std::stop_token` (cooperative skip). | | **Futures** | `when_all`, `when_any`, `when_all_settled` in `futures.hpp`. | @@ -104,32 +104,32 @@ on: | Platform | Compiler | C++17 | C++20 | C++23 | C++26 | | ------------------- | ------------------ | :---: | :---: | :---: | :---: | | **Linux (x86_64)** | | | | | | -| Ubuntu 22.04 | GCC 11 | ✅ | ✅ | ✅ | - | -| Ubuntu 22.04 | GCC 12 | - | ✅ | - | - | -| Ubuntu 22.04 | Clang 14 | ✅ | ✅ | ✅ | - | -| Ubuntu 22.04 | Clang 15 | - | ✅ | ✅ | - | -| Ubuntu 24.04 | GCC 13 | ✅ | ✅ | ✅ | - | -| Ubuntu 24.04 | GCC 14 | ✅ | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | GCC 15 | - | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | Clang 16 | ✅ | ✅ | - | - | -| Ubuntu 24.04 | Clang 18 | ✅ | ✅ | - | - | -| Ubuntu 24.04 | Clang 19 | - | ✅ | ✅ | ✅ | -| Ubuntu 24.04 | Clang 21 | - | ✅ | ✅ | ✅ | +| Ubuntu 22.04 | GCC 11 | yes | yes | yes | - | +| Ubuntu 22.04 | GCC 12 | - | yes | - | - | +| Ubuntu 22.04 | Clang 14 | yes | yes | yes | - | +| Ubuntu 22.04 | Clang 15 | - | yes | yes | - | +| Ubuntu 24.04 | GCC 13 | yes | yes | yes | - | +| Ubuntu 24.04 | GCC 14 | yes | yes | yes | yes | +| Ubuntu 24.04 | GCC 15 | - | yes | yes | yes | +| Ubuntu 24.04 | Clang 16 | yes | yes | - | - | +| Ubuntu 24.04 | Clang 18 | yes | yes | - | - | +| Ubuntu 24.04 | Clang 19 | - | yes | yes | yes | +| Ubuntu 24.04 | Clang 21 | - | yes | yes | yes | | **Linux (ARM64)** | | | | | | -| Ubuntu 24.04 ARM64 | GCC 13 (system) | ✅ | ✅ | ✅ | - | -| Ubuntu 24.04 ARM64 | GCC 14 | - | ✅ | ✅ | ✅ | +| Ubuntu 24.04 ARM64 | GCC 13 (system) | yes | yes | yes | - | +| Ubuntu 24.04 ARM64 | GCC 14 | - | yes | yes | yes | | **Windows** | | | | | | -| Windows Server 2022 | MSVC 2022 | ✅ | ✅ | ✅ | - | -| Windows Server 2022 | MinGW-w64 (GCC 15) | ✅ | ✅ | ✅ | - | -| Windows Server 2025 | MSVC 2022 | ✅ | ✅ | ✅ | - | -| Windows Server 2025 | MinGW-w64 (GCC 15) | ✅ | ✅ | ✅ | - | +| Windows Server 2022 | MSVC 2022 | yes | yes | yes | - | +| Windows Server 2022 | MinGW-w64 (GCC 15) | yes | yes | yes | - | +| Windows Server 2025 | MSVC 2022 | yes | yes | yes | - | +| Windows Server 2025 | MinGW-w64 (GCC 15) | yes | yes | yes | - | **Additional platforms:** ThreadSchedule should work on other platforms (macOS, FreeBSD, other Linux distributions) with standard C++17+ compilers, but these are not regularly tested in CI. > **C++23**: GCC 12's libstdc++ lacks monadic `std::expected` operations -> (`and_then`, `transform`, …). Clang 16/18 on Ubuntu 24.04 use GCC 14's +> (`and_then`, `transform`, ...). Clang 16/18 on Ubuntu 24.04 use GCC 14's > libstdc++ headers which expose `std::expected` incorrectly to those Clang > versions. These combinations are therefore only tested up to C++20. > @@ -243,7 +243,7 @@ int main() { std::cout << "Frequent task!" << std::endl; }); - // v2: ScheduledLightweightPool -- same API, LightweightPool backend (post-based dispatch) + // v2: ScheduledLightweightPool - same API, LightweightPool backend (post-based dispatch) // Error handling HighPerformancePoolWithErrors pool_safe(4); @@ -398,7 +398,7 @@ auto value = pool.submit([]{ return 42; }); // standard future-based API remains ### Coroutines (C++20) -Lazy coroutine primitives -- no boilerplate promise types required. +Lazy coroutine primitives - no boilerplate promise types required. ```cpp #include @@ -410,7 +410,7 @@ task compute(int x) { } task pipeline() { - int a = co_await compute(21); // lazy -- starts here + int a = co_await compute(21); // lazy - starts here co_return a; // 42 } diff --git a/docs/ERROR_HANDLING.md b/docs/ERROR_HANDLING.md index 9967b53..c3c12e9 100644 --- a/docs/ERROR_HANDLING.md +++ b/docs/ERROR_HANDLING.md @@ -4,11 +4,11 @@ ThreadSchedule provides comprehensive error handling for asynchronous tasks with ## Features -- ✅ **Global error callbacks** - Handle all exceptions in one place -- ✅ **Per-future error callbacks** - Handle specific task errors -- ✅ **Error context** - Get detailed information about errors (task description, thread ID, timestamp) -- ✅ **Thread-safe** - Error handlers work correctly across threads -- ✅ **Non-intrusive** - Original thread pools remain unchanged +- **Global error callbacks** - Handle all exceptions in one place +- **Per-future error callbacks** - Handle specific task errors +- **Error context** - Get detailed information about errors (task description, thread ID, timestamp) +- **Thread-safe** - Error handlers work correctly across threads +- **Non-intrusive** - Original thread pools remain unchanged ## Quick Start diff --git a/docs/INTEGRATION.md b/docs/INTEGRATION.md index 8a4b410..360f276 100644 --- a/docs/INTEGRATION.md +++ b/docs/INTEGRATION.md @@ -84,11 +84,11 @@ int main() { ``` **Why CPM?** -- ✅ Automatic caching - downloads dependencies once -- ✅ Version pinning - reproducible builds -- ✅ No git submodules needed -- ✅ Works seamlessly with CI/CD -- ✅ Compatible with all CMake features +- Automatic caching - downloads dependencies once +- Version pinning - reproducible builds +- No git submodules needed +- Works seamlessly with CI/CD +- Compatible with all CMake features ### Method 2: CMake FetchContent diff --git a/docs/REGISTRY.md b/docs/REGISTRY.md index a3aa7c9..ca2fe27 100644 --- a/docs/REGISTRY.md +++ b/docs/REGISTRY.md @@ -234,11 +234,11 @@ graph TD - **Runtime Mode**: Shared runtime created at startup, provides global registry instance - Core entrypoints: - - `threadschedule::registry()` – default global registry - - `threadschedule::set_external_registry(...)` – app-injected global registry - - `threadschedule::CompositeThreadRegistry` – merge multiple registries (views) - - `threadschedule::AutoRegisterCurrentThread` – RAII auto-registration - - `threadschedule::ThreadWrapperReg` – opt-in wrapper that auto-registers + - `threadschedule::registry()` - default global registry + - `threadschedule::set_external_registry(...)` - app-injected global registry + - `threadschedule::CompositeThreadRegistry` - merge multiple registries (views) + - `threadschedule::AutoRegisterCurrentThread` - RAII auto-registration + - `threadschedule::ThreadWrapperReg` - opt-in wrapper that auto-registers **Important:** The registry **requires control blocks** for all control operations (`set_affinity`, `set_priority`, `set_scheduling_policy`, `set_name`). Threads registered without control blocks can be queried but not controlled. Use `ThreadWrapperReg` or `AutoRegisterCurrentThread` to automatically create and register control blocks. @@ -316,8 +316,8 @@ target_link_libraries(your_dso PRIVATE ThreadSchedule::ThreadSchedule ThreadSche ``` - Exported APIs (same as header-only), provided by the runtime: - - `threadschedule::registry()` – returns the single process-wide registry instance - - `threadschedule::set_external_registry(ThreadRegistry*)` – optionally redirect runtime to an app-owned instance + - `threadschedule::registry()` - returns the single process-wide registry instance + - `threadschedule::set_external_registry(ThreadRegistry*)` - optionally redirect runtime to an app-owned instance Notes: - With `THREADSCHEDULE_RUNTIME=ON`, the header declares these functions and the `.so/.dll` provides the definitions. @@ -529,7 +529,7 @@ void foreign_thread() { } ``` -#### 5) Runtime (shared) example – app + two DSOs +#### 5) Runtime (shared) example - app + two DSOs This repository includes a minimal working example under `examples/runtime_shared/` that demonstrates using `THREADSCHEDULE_RUNTIME`: @@ -581,7 +581,7 @@ cmake -B build -DTHREADSCHEDULE_RUNTIME=ON -DTHREADSCHEDULE_BUILD_EXAMPLES=ON cmake --build build --target runtime_main ``` -Run `runtime_main` – it will list threads from both DSOs via the single shared registry. +Run `runtime_main` - it will list threads from both DSOs via the single shared registry. ### Platform notes @@ -593,9 +593,9 @@ Run `runtime_main` – it will list threads from both DSOs via the single shared ### Error handling All control functions return `expected`. Typical errors include: -- `std::errc::no_such_process` – Thread not found in registry or no control block available -- `std::errc::operation_not_permitted` – Insufficient privileges -- `std::errc::invalid_argument` – Invalid parameters +- `std::errc::no_such_process` - Thread not found in registry or no control block available +- `std::errc::operation_not_permitted` - Insufficient privileges +- `std::errc::invalid_argument` - Invalid parameters ### Duplicate registrations diff --git a/docs/SCHEDULED_TASKS.md b/docs/SCHEDULED_TASKS.md index 48b4761..05384ab 100644 --- a/docs/SCHEDULED_TASKS.md +++ b/docs/SCHEDULED_TASKS.md @@ -4,11 +4,11 @@ ThreadSchedule provides a powerful scheduling system for running tasks at specif ## Features -- ✅ **One-time scheduled tasks** - Run a task after a delay or at a specific time -- ✅ **Periodic tasks** - Run tasks repeatedly at fixed intervals -- ✅ **Cancellable tasks** - Cancel scheduled tasks before they execute -- ✅ **Flexible execution** - Choose from ThreadPool (default), HighPerformancePool, or FastThreadPool -- ✅ **Thread-safe** - Safe to use from multiple threads +- **One-time scheduled tasks** - Run a task after a delay or at a specific time +- **Periodic tasks** - Run tasks repeatedly at fixed intervals +- **Cancellable tasks** - Cancel scheduled tasks before they execute +- **Flexible execution** - Choose from ThreadPool (default), HighPerformancePool, FastThreadPool, or LightweightPool +- **Thread-safe** - Safe to use from multiple threads ## Quick Start @@ -33,7 +33,7 @@ int main() { ### Pool Types -ThreadSchedule provides three variants of the scheduled pool: +ThreadSchedule provides four built-in variants of the scheduled pool: ```cpp // Default: Uses ThreadPool (< 1k tasks/sec, simple and efficient) @@ -45,6 +45,9 @@ ScheduledHighPerformancePool scheduler_hp(4); // Fast: Uses FastThreadPool (1k-10k tasks/sec, single queue) ScheduledFastThreadPool scheduler_fast(4); +// Lightweight: Uses LightweightPool (fire-and-forget, no futures/stats) +ScheduledLightweightPool scheduler_lw(4); + // Custom: Use any pool type ScheduledThreadPoolT scheduler_custom(4); ``` @@ -271,6 +274,11 @@ auto backup = scheduler.schedule_periodic(std::chrono::hours(24), []() { - **Pros**: Single queue, balanced performance - **Best for**: Batch processing, moderate workloads +### LightweightPool +- **Use when**: Fire-and-forget scheduled work, no return values needed +- **Pros**: Minimal overhead, no future/packaged_task allocation per dispatch +- **Best for**: Periodic logging, telemetry, cleanup tasks + **Example:** ```cpp // For infrequent timers (default) diff --git a/docs/TOPOLOGY_NUMA.md b/docs/TOPOLOGY_NUMA.md index 4eb99c2..55c407d 100644 --- a/docs/TOPOLOGY_NUMA.md +++ b/docs/TOPOLOGY_NUMA.md @@ -28,7 +28,7 @@ ThreadWrapper t([]{ /* work */ }); ThreadPool pool(8); auto affs = distribute_affinities_by_numa(pool.size()); for (size_t i = 0; i < pool.size(); ++i) { - // In simple ThreadPool: use set_affinity returning bool + // set_affinity returns expected (void)pool.set_affinity(affs[i]); } ``` diff --git a/include/threadschedule/chaos.hpp b/include/threadschedule/chaos.hpp index 1be7538..78883b3 100644 --- a/include/threadschedule/chaos.hpp +++ b/include/threadschedule/chaos.hpp @@ -66,7 +66,7 @@ struct ChaosConfig * synchronized, so multiple controllers or concurrent registrations are * safe. * - * @warning Intended for testing and validation only -- not for production + * @warning Intended for testing and validation only - not for production * use. Perturbations may cause spurious priority inversions and * cache-thrashing. * diff --git a/include/threadschedule/error_handler.hpp b/include/threadschedule/error_handler.hpp index ed8b576..920957f 100644 --- a/include/threadschedule/error_handler.hpp +++ b/include/threadschedule/error_handler.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file error_handler.hpp + * @brief Error handling primitives: TaskError, ErrorHandler, and ErrorHandledTask. + */ + #include #include #include @@ -122,7 +127,7 @@ using ErrorCallback = std::function; * * @par Callback execution * - Callbacks are invoked in the order they were registered (FIFO). - * - Callbacks run **under the lock** -- keep them short and non-blocking to + * - Callbacks run **under the lock** - keep them short and non-blocking to * avoid contention with other threads that may call handle_error() or * add_callback() concurrently. * - If a callback itself throws, the exception is silently swallowed so that diff --git a/include/threadschedule/expected.hpp b/include/threadschedule/expected.hpp index 551187c..bf1dfec 100644 --- a/include/threadschedule/expected.hpp +++ b/include/threadschedule/expected.hpp @@ -18,10 +18,10 @@ * @par Monadic operations * Both the primary template and the @c void specialization support the four * monadic combinators from P0323R12: - * - @c and_then -- chain an operation that returns an @c expected - * - @c or_else -- recover from an error, returning an @c expected - * - @c transform -- map the contained value - * - @c transform_error -- map the contained error + * - @c and_then - chain an operation that returns an @c expected + * - @c or_else - recover from an error, returning an @c expected + * - @c transform - map the contained value + * - @c transform_error - map the contained error */ #include @@ -205,10 +205,10 @@ class unexpected * * @par Monadic operations * The following combinators are provided (matching the C++23 specification): - * - @c and_then(f) -- if has_value(), invoke @p f with the value and return the result - * - @c or_else(f) -- if in error state, invoke @p f with the error and return the result - * - @c transform(f) -- if has_value(), apply @p f to the value and wrap the result - * - @c transform_error(f) -- if in error state, apply @p f to the error and wrap the result + * - @c and_then(f) - if has_value(), invoke @p f with the value and return the result + * - @c or_else(f) - if in error state, invoke @p f with the error and return the result + * - @c transform(f) - if has_value(), apply @p f to the value and wrap the result + * - @c transform_error(f) - if in error state, apply @p f to the error and wrap the result */ template class expected diff --git a/include/threadschedule/futures.hpp b/include/threadschedule/futures.hpp index 999fe63..19c8c9e 100644 --- a/include/threadschedule/futures.hpp +++ b/include/threadschedule/futures.hpp @@ -144,7 +144,7 @@ inline auto when_all_settled(std::vector>& futures) * Polls all futures round-robin with a 1 ms timeout until one is ready, * then returns its index and value. * - * @note The remaining futures are left in their current state -- the caller + * @note The remaining futures are left in their current state - the caller * is responsible for managing their lifetime. * * @tparam T The value type of each future. diff --git a/include/threadschedule/pthread_wrapper.hpp b/include/threadschedule/pthread_wrapper.hpp index db7ee4b..51816de 100644 --- a/include/threadschedule/pthread_wrapper.hpp +++ b/include/threadschedule/pthread_wrapper.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file pthread_wrapper.hpp + * @brief RAII wrapper around POSIX threads (Linux only). + */ + #include "concepts.hpp" #include "expected.hpp" #include "scheduler_policy.hpp" @@ -26,7 +31,7 @@ namespace threadschedule /** * @brief RAII wrapper around POSIX threads with a modern C++ interface. * - * Linux-only -- not available on Windows (guarded by @c _WIN32). + * Linux-only - not available on Windows (guarded by @c _WIN32). * * Non-copyable, movable. The destructor automatically joins the thread * if it is still joinable, which **blocks** until the thread finishes. @@ -41,8 +46,8 @@ namespace threadschedule * affect the **calling** thread, not the PThreadWrapper's thread. * * @par Factory methods - * - create_with_config() -- creates a thread and applies name/policy/priority. - * - create_with_attributes() -- creates a thread from a raw @c pthread_attr_t. + * - create_with_config() - creates a thread and applies name/policy/priority. + * - create_with_attributes() - creates a thread from a raw @c pthread_attr_t. * * @see is_thread_like (specialised to @c true_type at end of file) */ @@ -410,7 +415,7 @@ class PThreadAttributes * * @note The constructor throws @c std::runtime_error if * @c pthread_mutex_init fails. Unusually for a mutex type, - * lock() and unlock() also throw on error -- callers should be + * lock() and unlock() also throw on error - callers should be * aware of this when mixing with code that assumes non-throwing * mutex operations. */ diff --git a/include/threadschedule/registered_threads.hpp b/include/threadschedule/registered_threads.hpp index 63a607b..88fbc0b 100644 --- a/include/threadschedule/registered_threads.hpp +++ b/include/threadschedule/registered_threads.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file registered_threads.hpp + * @brief Thread wrappers with automatic global registry registration. + */ + #include "pthread_wrapper.hpp" #include "thread_registry.hpp" #include "thread_wrapper.hpp" @@ -50,7 +55,7 @@ class ThreadWrapperReg : public ThreadWrapper * Non-copyable, movable. C++20 only. Behaves like @ref ThreadWrapperReg * but wraps a @c std::jthread and handles @c std::stop_token * forwarding: the callable may accept a @c stop_token as its first - * argument, its last argument, or not at all -- all three signatures + * argument, its last argument, or not at all - all three signatures * are detected at compile time and dispatched accordingly. */ class JThreadWrapperReg : public JThreadWrapper diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp index 873f647..f3f1863 100644 --- a/include/threadschedule/scheduled_pool.hpp +++ b/include/threadschedule/scheduled_pool.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file scheduled_pool.hpp + * @brief Delayed and periodic task scheduling on top of any pool type. + */ + #include "expected.hpp" #include "thread_pool.hpp" #include @@ -107,7 +112,7 @@ class ScheduledTaskHandle * from when the task actually finishes. * - There is no returned std::future for scheduled tasks. If you need * to observe the result, use the underlying pool directly via - * thread_pool().submit(). + * thread_pool().post() or thread_pool().submit(). * * @par Thread safety * All schedule_* methods are thread-safe (protected by an internal diff --git a/include/threadschedule/scheduler_policy.hpp b/include/threadschedule/scheduler_policy.hpp index 61a9dfc..cb167d7 100644 --- a/include/threadschedule/scheduler_policy.hpp +++ b/include/threadschedule/scheduler_policy.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file scheduler_policy.hpp + * @brief Scheduling policies, thread priority, and CPU affinity types. + */ + #include "expected.hpp" #include #include diff --git a/include/threadschedule/task.hpp b/include/threadschedule/task.hpp index ab30d18..3fd4e51 100644 --- a/include/threadschedule/task.hpp +++ b/include/threadschedule/task.hpp @@ -536,7 +536,7 @@ class sync_wait_task * its result. * * This is the primary bridge between coroutine code and synchronous code. - * The task is resumed **on the calling thread** -- no thread pool or + * The task is resumed **on the calling thread** - no thread pool or * executor is involved. * * If the task's coroutine body throws an exception, `sync_wait` @@ -587,7 +587,7 @@ auto sync_wait(task t) -> T * Overload for void tasks. Behaves identically to the `task` overload * but returns nothing. * - * The task is resumed **on the calling thread** -- no thread pool or + * The task is resumed **on the calling thread** - no thread pool or * executor is involved. If the task body throws, the exception is * re-thrown to the caller. * diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp index f510790..47c5436 100644 --- a/include/threadschedule/thread_pool.hpp +++ b/include/threadschedule/thread_pool.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file thread_pool.hpp + * @brief Thread pools: HighPerformancePool, ThreadPoolBase, LightweightPoolT, and GlobalPool. + */ + #include "expected.hpp" #include "scheduler_policy.hpp" #include "thread_registry.hpp" @@ -777,8 +782,10 @@ class HighPerformancePool #if __cpp_lib_jthread >= 201911L /** - * @brief Submit a cancellable task. If stop is already requested the task - * is skipped and the future throws @c std::future_error (broken_promise). + * @brief Submit a cancellable task (C++20). + * + * If @p token is already stopped the task body is skipped and + * the future receives a default-constructed result. */ template auto submit(std::stop_token token, F&& f, Args&&... args) -> std::future> @@ -791,9 +798,7 @@ class HighPerformancePool }); } - /** - * @brief Non-throwing cancellable submission. - */ + /// @brief Non-throwing cancellable submission (C++20). template auto try_submit(std::stop_token token, F&& f, Args&&... args) -> expected>, std::error_code> @@ -973,7 +978,7 @@ class HighPerformancePool * * Each worker is named @c name_prefix + "_0", @c "_1", etc. * - * @return @c expected -- error if the OS + * @return @c expected - error if the OS * rejected any configuration call. */ auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER, @@ -1963,8 +1968,8 @@ class LightweightPoolT /** * @brief Shut the pool down. * - * @param policy @c drain (default) -- workers finish all queued tasks - * before exiting. @c drop_pending -- the queue is cleared + * @param policy @c drain (default) - workers finish all queued tasks + * before exiting. @c drop_pending - the queue is cleared * and only the currently executing tasks are allowed to * finish. * diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp index 5f74047..756a320 100644 --- a/include/threadschedule/thread_pool_with_errors.hpp +++ b/include/threadschedule/thread_pool_with_errors.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file thread_pool_with_errors.hpp + * @brief PoolWithErrors wrapper that combines any pool with an ErrorHandler. + */ + #include "error_handler.hpp" #include "thread_pool.hpp" #include diff --git a/include/threadschedule/thread_registry.hpp b/include/threadschedule/thread_registry.hpp index 7dd5b9e..baf5d5b 100644 --- a/include/threadschedule/thread_registry.hpp +++ b/include/threadschedule/thread_registry.hpp @@ -1,5 +1,10 @@ #pragma once +/** + * @file thread_registry.hpp + * @brief Process-wide thread registry, control blocks, and composite registry. + */ + #include "expected.hpp" #include "scheduler_policy.hpp" #include "thread_wrapper.hpp" // for ThreadInfo, ThreadAffinity @@ -61,19 +66,19 @@ using Tid = pid_t; // Linux TID via gettid() * Fully copyable and movable (regular value semantics). * * @par Lifetime - * A RegisteredThreadInfo is a *snapshot* -- it may outlive the thread it + * A RegisteredThreadInfo is a *snapshot* - it may outlive the thread it * describes. The @c alive flag reflects the state at the time the snapshot * was taken; it is **not** updated retroactively when the thread unregisters. * * @par Fields - * - @c tid -- OS-level thread identifier (@c pid_t on Linux via + * - @c tid - OS-level thread identifier (@c pid_t on Linux via * @c gettid(), @c DWORD on Windows). - * - @c stdId -- The corresponding @c std::thread::id. - * - @c name -- Human-readable name given at registration time. - * - @c componentTag -- Optional logical grouping tag (e.g. "io", "compute"). - * - @c alive -- @c true while the thread is registered; set to @c false when + * - @c stdId - The corresponding @c std::thread::id. + * - @c name - Human-readable name given at registration time. + * - @c componentTag - Optional logical grouping tag (e.g. "io", "compute"). + * - @c alive - @c true while the thread is registered; set to @c false when * the thread calls @c unregister_current_thread(). - * - @c control -- Shared pointer to the thread's @ref ThreadControlBlock. May be + * - @c control - Shared pointer to the thread's @ref ThreadControlBlock. May be * @c nullptr if the thread was registered without a control * block (i.e. via the name-only overload of * @c register_current_thread()). @@ -108,7 +113,7 @@ struct RegisteredThreadInfo * * @par Thread safety * - The object is **not** copyable and **not** movable (identity type). - * - All @c set_* methods are safe to call from **any** thread -- they operate + * - All @c set_* methods are safe to call from **any** thread - they operate * on the stored native handle, not on thread-local state. * - Concurrent calls to different @c set_* methods on the same instance are * safe (each call is a single OS syscall on the stored handle). @@ -404,7 +409,7 @@ class ThreadRegistry : public detail::QueryFacadeMixin * * A QueryView is produced by ThreadRegistry::query() (or by chaining * operations on an existing QueryView). It holds an internal - * @c std::vector that is a **snapshot** -- mutations + * @c std::vector that is a **snapshot** - mutations * to the originating ThreadRegistry after the QueryView was created are * not visible. * @@ -421,18 +426,18 @@ class ThreadRegistry : public detail::QueryFacadeMixin * * @par API * Provides a functional-style interface: - * - **filter(pred)** -- returns a new QueryView containing only entries + * - **filter(pred)** - returns a new QueryView containing only entries * that satisfy @p pred. - * - **map(fn)** -- transforms each entry and returns a + * - **map(fn)** - transforms each entry and returns a * @c std::vector. - * - **for_each(fn)** -- applies @p fn to every entry. - * - **find_if(pred)** -- returns the first matching entry, or + * - **for_each(fn)** - applies @p fn to every entry. + * - **find_if(pred)** - returns the first matching entry, or * @c std::nullopt. - * - **any / all / none(pred)** -- boolean aggregation predicates. - * - **take(n) / skip(n)** -- positional slicing, returning new + * - **any / all / none(pred)** - boolean aggregation predicates. + * - **take(n) / skip(n)** - positional slicing, returning new * QueryViews. - * - **count() / empty()** -- size queries. - * - **entries()** -- direct access to the underlying vector. + * - **count() / empty()** - size queries. + * - **entries()** - direct access to the underlying vector. */ class QueryView { @@ -838,7 +843,7 @@ class CompositeThreadRegistry : public detail::QueryFacadeMixin diff --git a/include/threadschedule/topology.hpp b/include/threadschedule/topology.hpp index 76dfc90..3f78d36 100644 --- a/include/threadschedule/topology.hpp +++ b/include/threadschedule/topology.hpp @@ -47,7 +47,7 @@ struct CpuTopology * Windows: single node, sequential CPU indices. * * Called frequently by chaos/affinity helpers. The result is not - * cached internally -- consider caching the returned CpuTopology + * cached internally - consider caching the returned CpuTopology * yourself if performance of repeated calls matters. */ inline auto read_topology() -> CpuTopology