From e5e17e4fdd504c7a559e6ee650814d948f1103b2 Mon Sep 17 00:00:00 2001
From: Katze719 <pauldorn1234@gmail.com>
Date: Tue, 31 Mar 2026 21:34:29 +0200
Subject: [PATCH] Enhance documentation and structure across multiple headers

- Updated `chaos.hpp` to improve descriptions and clarify usage of `ChaosConfig` and `ChaosController`.
- Enhanced `concepts.hpp` with detailed explanations for C++20 concepts and their fallbacks for older compilers.
- Improved `error_handler.hpp` to provide clearer descriptions of error handling mechanisms and callback functionalities.
- Expanded `expected.hpp` with comprehensive documentation for the polyfill of `std::expected`, including exception handling and monadic operations.
- Refined `generator.hpp` to clarify the coroutine's behavior and usage examples.
- Enhanced `profiles.hpp` with detailed descriptions of thread profiles and their application.
- Updated `pthread_wrapper.hpp` to provide clearer semantics for POSIX thread wrappers and attributes.
- Improved `scheduled_pool.hpp` to clarify task scheduling features and cancellation semantics.
- Enhanced `scheduler_policy.hpp` with detailed explanations of scheduling policies and thread priority management.
- Updated `task.hpp` to provide comprehensive documentation for coroutine tasks and their awaiters.
---
 include/threadschedule/chaos.hpp              |  54 ++-
 include/threadschedule/concepts.hpp           |  77 +++-
 include/threadschedule/error_handler.hpp      | 182 ++++++++--
 include/threadschedule/expected.hpp           |  93 +++++
 include/threadschedule/generator.hpp          |  54 +++
 include/threadschedule/profiles.hpp           |  63 +++-
 include/threadschedule/pthread_wrapper.hpp    |  45 ++-
 include/threadschedule/registered_threads.hpp |  28 +-
 include/threadschedule/scheduled_pool.hpp     |  87 ++++-
 include/threadschedule/scheduler_policy.hpp   | 125 ++++++-
 include/threadschedule/task.hpp               | 148 +++++++-
 include/threadschedule/thread_pool.hpp        | 299 +++++++++++++++-
 .../thread_pool_with_errors.hpp               |  31 +-
 include/threadschedule/thread_registry.hpp    | 334 +++++++++++++++++-
 include/threadschedule/thread_wrapper.hpp     | 253 ++++++++++++-
 include/threadschedule/topology.hpp           |  30 +-
 16 files changed, 1782 insertions(+), 121 deletions(-)

diff --git a/include/threadschedule/chaos.hpp b/include/threadschedule/chaos.hpp
index 8caf90c..1be7538 100644
--- a/include/threadschedule/chaos.hpp
+++ b/include/threadschedule/chaos.hpp
@@ -23,18 +23,64 @@ namespace threadschedule
 {
 
 /**
- * @brief Runtime chaos settings.
+ * @brief Plain value type holding runtime chaos-testing parameters.
+ *
+ * All fields have sensible defaults so a default-constructed `ChaosConfig`
+ * is immediately usable.
+ *
+ * @see ChaosController
  */
 struct ChaosConfig
 {
+    /** Time between successive chaos perturbations (default 250 ms). */
     std::chrono::milliseconds interval{250};
-    int priority_jitter{0}; // +/- jitter applied around current priority
+
+    /**
+     * @brief +/- range applied around the current thread priority each
+     *        interval.
+     *
+     * A value of 0 disables priority perturbation.
+     */
+    int priority_jitter{0};
+
+    /** Whether to reassign CPU affinities each interval (default `true`). */
     bool shuffle_affinity{true};
 };
 
-// RAII controller that periodically perturbs affinity/priority of registered threads matching a predicate
 /**
- * @brief RAII controller that periodically applies chaos operations.
+ * @brief RAII controller that periodically perturbs scheduling attributes
+ *        of registered threads for chaos/fuzz testing.
+ *
+ * On construction, `ChaosController` spawns a background `std::thread`
+ * that wakes every `ChaosConfig::interval` and applies perturbations
+ * (affinity shuffling, priority jitter) to threads in the global
+ * `registry()` that match the user-supplied predicate.
+ *
+ * **Ownership semantics:**
+ * - Non-copyable, non-movable.
+ * - The destructor signals the worker to stop and **blocks** until it
+ *   joins. Do not destroy from a context where blocking is unacceptable.
+ *
+ * **Thread safety:**
+ * The controller operates on the global `registry()`, which is internally
+ * synchronized, so multiple controllers or concurrent registrations are
+ * safe.
+ *
+ * @warning Intended for testing and validation only -- not for production
+ *          use. Perturbations may cause spurious priority inversions and
+ *          cache-thrashing.
+ *
+ * @par Example
+ * @code
+ * ChaosConfig cfg{.interval = 100ms, .priority_jitter = 5};
+ * ChaosController chaos(cfg, [](auto const& info) {
+ *     return info.name.starts_with("worker");
+ * });
+ * // ... run tests while chaos is active ...
+ * // destructor joins the worker thread
+ * @endcode
+ *
+ * @see ChaosConfig, registry()
  */
 class ChaosController
 {
diff --git a/include/threadschedule/concepts.hpp b/include/threadschedule/concepts.hpp
index f50b964..36c42af 100644
--- a/include/threadschedule/concepts.hpp
+++ b/include/threadschedule/concepts.hpp
@@ -1,5 +1,18 @@
 #pragma once
 
+/**
+ * @file concepts.hpp
+ * @brief C++20 concepts, type traits, and SFINAE helpers for the threading library.
+ *
+ * Provides compile-time constraints (`ThreadCallable`, `ThreadIdentifiable`,
+ * `Duration`, `PriorityType`, `CPUSetType`) used throughout the library to
+ * enforce correct template arguments. When C++20 concepts are unavailable,
+ * equivalent `constexpr bool` variables are defined as fallbacks.
+ *
+ * Also defines the `is_thread_like<T>` trait hierarchy for generic thread
+ * handle dispatch.
+ */
+
 #include <chrono>
 #include <functional>
 #include <set>
@@ -10,28 +23,44 @@
 namespace threadschedule
 {
 
-// Custom duration trait for compatibility across all C++ versions
+/**
+ * @brief SFINAE trait that detects `std::chrono::duration` types.
+ *
+ * Yields `std::true_type` when @p T exposes nested `rep` and `period`
+ * type aliases (the signature of any `std::chrono::duration` instantiation).
+ * The primary template is `std::false_type`; the partial specialization
+ * using `std::void_t` matches duration-like types.
+ *
+ * @tparam T The type to test.
+ */
 template <typename T, typename = void>
 struct is_duration_impl : std::false_type
 {
 };
 
+/** @copydoc is_duration_impl */
 template <typename T>
 struct is_duration_impl<T, std::void_t<typename T::rep, typename T::period>> : std::true_type
 {
 };
 
-// C++23 concepts (with fallbacks for older compilers)
+// C++20 concepts (with constexpr-bool fallbacks for older compilers)
 #if __cpp_concepts >= 201907L
 
 /**
- * @brief Concept for callable objects that can be executed by threads
+ * @brief Constrains @p F to be invocable with @p Args.
+ *
+ * Use in template parameter lists to restrict thread-entry functions or
+ * callbacks to types that are actually callable with the given arguments.
  */
 template <typename F, typename... Args>
 concept ThreadCallable = std::is_invocable_v<F, Args...>;
 
 /**
- * @brief Concept for types that can be used as thread identifiers
+ * @brief Constrains @p T to types that expose a thread identity via
+ *        `get_id()` returning something convertible to `std::thread::id`.
+ *
+ * Satisfied by `std::thread`, `std::jthread`, and `ThreadWrapper`.
  */
 template <typename T>
 concept ThreadIdentifiable = requires(T t) {
@@ -39,7 +68,10 @@ concept ThreadIdentifiable = requires(T t) {
 };
 
 /**
- * @brief Concept for duration types used in thread operations
+ * @brief Constrains @p T to `std::chrono::duration`-like types (those
+ *        exposing `rep` and `period` nested types).
+ *
+ * Use for timeout / interval parameters in scheduling APIs.
  */
 template <typename T>
 concept Duration = requires {
@@ -48,13 +80,15 @@ concept Duration = requires {
 };
 
 /**
- * @brief Concept for types that can represent thread priorities
+ * @brief Constrains @p T to integral types suitable for representing
+ *        thread priorities.
  */
 template <typename T>
 concept PriorityType = std::is_integral_v<T>;
 
 /**
- * @brief Concept for CPU set types
+ * @brief Constrains @p T to container-like types that can represent a set
+ *        of CPU indices (must provide `size()`, `begin()`, `end()`).
  */
 template <typename T>
 concept CPUSetType = requires(T t) {
@@ -65,46 +99,67 @@ concept CPUSetType = requires(T t) {
 
 #else
 
-// Fallback using SFINAE for older compilers
+/**
+ * @brief Pre-C++20 fallback for ThreadCallable (constexpr bool).
+ * @see ThreadCallable concept above.
+ */
 template <typename F, typename... Args>
 constexpr bool ThreadCallable = std::is_invocable_v<F, Args...>;
 
+/** @brief Pre-C++20 fallback for ThreadIdentifiable (constexpr bool). */
 template <typename T>
 constexpr bool ThreadIdentifiable = std::is_same_v<decltype(std::declval<T>().get_id()), std::thread::id>;
 
+/** @brief Pre-C++20 fallback for Duration (constexpr bool). */
 template <typename T>
 constexpr bool Duration = is_duration_impl<T>::value;
 
+/** @brief Pre-C++20 fallback for PriorityType (constexpr bool). */
 template <typename T>
 constexpr bool PriorityType = std::is_integral_v<T>;
 
-// For CPU set types, we'll use a simple trait
+/** @brief Pre-C++20 fallback for CPUSetType (constexpr bool). */
 template <typename T>
 constexpr bool CPUSetType = std::is_same_v<T, std::vector<int>> || std::is_same_v<T, std::set<int>>;
 
 #endif
 
 /**
- * @brief Type trait for thread-like objects
+ * @brief Type trait that identifies thread-like types.
+ *
+ * The primary template yields `std::false_type`. Explicit specializations
+ * are provided for `std::thread` and (when C++20 is available)
+ * `std::jthread`. Additional specializations for library types such as
+ * `ThreadWrapper` are defined in `profiles.hpp`.
+ *
+ * Used by `apply_profile()` and other generic scheduling functions to
+ * accept any thread-like handle uniformly.
+ *
+ * @tparam T The type to test.
+ *
+ * @par Helper variable
+ * `is_thread_like_v<T>` is a convenience `inline constexpr bool`.
  */
 template <typename T>
 struct is_thread_like : std::false_type
 {
 };
 
+/** @brief `std::thread` is a thread-like type. */
 template <>
 struct is_thread_like<std::thread> : std::true_type
 {
 };
 
-// Only include jthread if C++20 is available
 #if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+/** @brief `std::jthread` is a thread-like type (C++20). */
 template <>
 struct is_thread_like<std::jthread> : std::true_type
 {
 };
 #endif
 
+/** @brief Convenience variable template for is_thread_like. */
 template <typename T>
 inline constexpr bool is_thread_like_v = is_thread_like<T>::value;
 
diff --git a/include/threadschedule/error_handler.hpp b/include/threadschedule/error_handler.hpp
index e59a2a3..b3e3fb6 100644
--- a/include/threadschedule/error_handler.hpp
+++ b/include/threadschedule/error_handler.hpp
@@ -14,17 +14,38 @@ namespace threadschedule
 {
 
 /**
- * @brief Information about a task exception
+ * @brief Holds diagnostic information captured from a failed task.
+ *
+ * TaskError is a value type (copyable and movable) that bundles the original
+ * exception together with context about where and when the failure occurred.
+ *
+ * Instances are typically created by ErrorHandledTask and forwarded to
+ * registered ErrorCallback functions through an ErrorHandler.
  */
 struct TaskError
 {
+    /** @brief The captured exception. Never null when produced by the library. */
     std::exception_ptr exception;
+
+    /** @brief Optional human-readable label supplied when the task was submitted. */
     std::string task_description;
+
+    /** @brief Id of the thread on which the exception was thrown. */
     std::thread::id thread_id;
+
+    /** @brief Monotonic timestamp recorded immediately after the exception was caught. */
     std::chrono::steady_clock::time_point timestamp;
 
     /**
-     * @brief Get the exception message if it's a std::exception
+     * @brief Extract the message string from the stored exception.
+     *
+     * Internally re-throws the exception and catches it as @c std::exception
+     * to call @c what().  This is safe but incurs the overhead of a throw /
+     * catch round-trip; avoid calling in tight loops.
+     *
+     * @return The exception message, @c "Unknown exception" if the stored
+     *         exception is not derived from @c std::exception, or
+     *         @c "No exception" if the pointer is empty.
      */
     [[nodiscard]] auto what() const -> std::string
     {
@@ -47,7 +68,13 @@ struct TaskError
     }
 
     /**
-     * @brief Rethrow the exception
+     * @brief Re-throw the original exception.
+     *
+     * If the stored @c exception pointer is non-null the exception is
+     * re-thrown via @c std::rethrow_exception.  This will terminate the
+     * program if called outside a try / catch block.
+     *
+     * @throws The original exception stored in @ref exception.
      */
     void rethrow() const
     {
@@ -59,23 +86,46 @@ struct TaskError
 };
 
 /**
- * @brief Error handler callback type
+ * @brief Signature for error-handling callbacks registered with ErrorHandler.
+ *
+ * Callbacks receive a const reference to the TaskError describing the failure.
  */
 using ErrorCallback = std::function<void(TaskError const&)>;
 
 /**
- * @brief Global error handler for thread pool tasks
+ * @brief Central registry and dispatcher for task-error callbacks.
+ *
+ * ErrorHandler maintains an ordered list of ErrorCallback functions and invokes
+ * them whenever a task reports a failure through handle_error().
+ *
+ * @par Thread safety
+ * All public methods are guarded by an internal @c std::mutex, so the handler
+ * can be shared across threads (typically via @c std::shared_ptr).
+ *
+ * @par Callback execution
+ * - Callbacks are invoked in the order they were registered (FIFO).
+ * - Callbacks run **under the lock** -- keep them short and non-blocking to
+ *   avoid contention with other threads that may call handle_error() or
+ *   add_callback() concurrently.
+ * - If a callback itself throws, the exception is silently swallowed so that
+ *   remaining callbacks still execute.
  *
- * Allows registering callbacks that will be invoked when tasks throw exceptions.
- * Multiple handlers can be registered and they will be called in order.
+ * @par Limitations
+ * add_callback() returns an index that identifies the callback, but there is
+ * no @c remove_callback() -- only clear_callbacks() removes all callbacks at
+ * once.  The error count returned by error_count() is monotonically
+ * increasing and is only reset by an explicit call to reset_error_count().
  */
 class ErrorHandler
 {
   public:
     /**
-     * @brief Add an error callback
-     * @param callback Function to call when a task throws
-     * @return Handle (index) that can be used to remove the callback
+     * @brief Register an error callback.
+     *
+     * @param callback Callable to invoke when a task throws.
+     * @return Zero-based index (handle) of the newly added callback.
+     *         There is currently no API to remove an individual callback;
+     *         use clear_callbacks() to remove all.
      */
     auto add_callback(ErrorCallback callback) -> size_t
     {
@@ -85,7 +135,10 @@ class ErrorHandler
     }
 
     /**
-     * @brief Remove all error callbacks
+     * @brief Remove all registered error callbacks.
+     *
+     * After this call, handle_error() will still increment the error count
+     * but no callbacks will be invoked.
      */
     void clear_callbacks()
     {
@@ -94,8 +147,13 @@ class ErrorHandler
     }
 
     /**
-     * @brief Handle an exception from a task
-     * @param error Error information
+     * @brief Dispatch an error to all registered callbacks.
+     *
+     * Increments the internal error counter and then invokes every registered
+     * callback in order.  If any callback throws, the exception is caught and
+     * silently discarded so that subsequent callbacks still run.
+     *
+     * @param error Diagnostic information about the failed task.
      */
     void handle_error(TaskError const& error)
     {
@@ -116,7 +174,12 @@ class ErrorHandler
     }
 
     /**
-     * @brief Get total number of errors handled
+     * @brief Return the total number of errors handled since the last reset.
+     *
+     * The count is monotonically increasing and is only set back to zero by
+     * an explicit call to reset_error_count().
+     *
+     * @return Cumulative number of handle_error() invocations.
      */
     [[nodiscard]] auto error_count() const -> size_t
     {
@@ -125,7 +188,7 @@ class ErrorHandler
     }
 
     /**
-     * @brief Reset error count
+     * @brief Reset the cumulative error count to zero.
      */
     void reset_error_count()
     {
@@ -140,9 +203,22 @@ class ErrorHandler
 };
 
 /**
- * @brief Task wrapper that provides error handling
+ * @brief Callable wrapper that catches exceptions and routes them to an @ref ErrorHandler.
  *
- * Wraps a task function and handles exceptions according to the provided error handler.
+ * ErrorHandledTask wraps an arbitrary callable @p Func and invokes it inside a
+ * try / catch block.  Any exception thrown by the callable is captured into a
+ * @ref TaskError and forwarded to the associated @ref ErrorHandler; the exception is
+ * **not** re-thrown, so from the caller's perspective the task completes
+ * normally (silently succeeds).
+ *
+ * @tparam Func Callable type.  Must be invocable with @c operator()() (no
+ *         arguments, return value is discarded).
+ *
+ * @par Ownership
+ * The ErrorHandler is held via @c std::shared_ptr, making it safe to copy or
+ * move ErrorHandledTask across thread boundaries without lifetime issues.
+ *
+ * @see make_error_handled_task
  */
 template <typename Func>
 class ErrorHandledTask
@@ -181,7 +257,13 @@ class ErrorHandledTask
 };
 
 /**
- * @brief Helper function to create an error-handled task
+ * @brief Factory function that creates an @ref ErrorHandledTask with perfect forwarding.
+ *
+ * @tparam Func Callable type (deduced).
+ * @param func        The callable to wrap.
+ * @param handler     Shared pointer to the ErrorHandler that will receive errors.
+ * @param description Optional human-readable label stored in TaskError::task_description.
+ * @return An ErrorHandledTask<Func> ready to be submitted to a thread pool.
  */
 template <typename Func>
 auto make_error_handled_task(Func&& func, std::shared_ptr<ErrorHandler> handler, std::string description = "")
@@ -190,9 +272,23 @@ auto make_error_handled_task(Func&& func, std::shared_ptr<ErrorHandler> handler,
 }
 
 /**
- * @brief Future wrapper that provides error callback support
+ * @brief A move-only future wrapper that supports an error callback.
+ *
+ * FutureWithErrorHandler<T> wraps a @c std::future<T> and adds an optional
+ * error callback that fires when get() encounters an exception.
  *
- * Extends std::future with the ability to attach error callbacks.
+ * @tparam T The value type of the underlying future.
+ *
+ * @par Move semantics
+ * Like @c std::future, this type is move-only (copy construction and copy
+ * assignment are deleted).
+ *
+ * @par Error callback behaviour
+ * - Attach a callback with on_error().  At most one callback is supported;
+ *   a subsequent call to on_error() replaces the previous callback.
+ * - The callback is invoked **before** the exception is re-thrown from get().
+ * - wait(), wait_for(), and wait_until() do **not** trigger the error callback.
+ * - valid() delegates directly to the underlying @c std::future::valid().
  */
 template <typename T>
 class FutureWithErrorHandler
@@ -209,9 +305,14 @@ class FutureWithErrorHandler
     auto operator=(FutureWithErrorHandler&&) -> FutureWithErrorHandler& = default;
 
     /**
-     * @brief Attach an error callback
-     * @param callback Function to call if the future throws
-     * @return Reference to this for chaining
+     * @brief Attach an error callback.
+     *
+     * The callback will be invoked with the current @c std::exception_ptr if
+     * get() encounters an exception.  Only one callback is stored; calling
+     * on_error() again replaces the previous callback.
+     *
+     * @param callback Callable invoked with the exception pointer on failure.
+     * @return Reference to @c *this, allowing fluent chaining.
      */
     auto on_error(std::function<void(std::exception_ptr)> callback) -> FutureWithErrorHandler&
     {
@@ -221,7 +322,13 @@ class FutureWithErrorHandler
     }
 
     /**
-     * @brief Get the result, calling error callback if exception is thrown
+     * @brief Retrieve the result, invoking the error callback on failure.
+     *
+     * If the underlying future holds an exception, the error callback (if any)
+     * is called **before** the exception is re-thrown to the caller.
+     *
+     * @return The stored value of type @p T.
+     * @throws Any exception stored in the underlying @c std::future.
      */
     auto get() -> T
     {
@@ -240,7 +347,9 @@ class FutureWithErrorHandler
     }
 
     /**
-     * @brief Wait for the future to complete
+     * @brief Block until the result is ready.
+     *
+     * Does **not** trigger the error callback regardless of the stored state.
      */
     void wait() const
     {
@@ -248,7 +357,11 @@ class FutureWithErrorHandler
     }
 
     /**
-     * @brief Wait for the future with timeout
+     * @brief Block until the result is ready or the timeout elapses.
+     *
+     * Does **not** trigger the error callback.
+     *
+     * @return The @c std::future_status indicating whether the result is ready.
      */
     template <typename Rep, typename Period>
     auto wait_for(std::chrono::duration<Rep, Period> const& timeout_duration) const
@@ -257,7 +370,11 @@ class FutureWithErrorHandler
     }
 
     /**
-     * @brief Wait until a specific time point
+     * @brief Block until the result is ready or the given time point is reached.
+     *
+     * Does **not** trigger the error callback.
+     *
+     * @return The @c std::future_status indicating whether the result is ready.
      */
     template <typename Clock, typename Duration>
     auto wait_until(std::chrono::time_point<Clock, Duration> const& timeout_time) const
@@ -266,7 +383,9 @@ class FutureWithErrorHandler
     }
 
     /**
-     * @brief Check if the future is valid
+     * @brief Check whether the future refers to a shared state.
+     *
+     * Delegates directly to @c std::future::valid().
      */
     [[nodiscard]] auto valid() const -> bool
     {
@@ -280,7 +399,12 @@ class FutureWithErrorHandler
 };
 
 /**
- * @brief Specialization for void futures
+ * @brief Specialization of FutureWithErrorHandler for @c void futures.
+ *
+ * Behaves identically to the primary template except that get() returns
+ * @c void instead of a value.
+ *
+ * @see FutureWithErrorHandler
  */
 template <>
 class FutureWithErrorHandler<void>
diff --git a/include/threadschedule/expected.hpp b/include/threadschedule/expected.hpp
index 68be6f2..551187c 100644
--- a/include/threadschedule/expected.hpp
+++ b/include/threadschedule/expected.hpp
@@ -1,5 +1,29 @@
 #pragma once
 
+/**
+ * @file expected.hpp
+ * @brief Polyfill for @c std::expected (C++23) for pre-C++23 compilers.
+ *
+ * When the standard library already provides @c std::expected (detected via
+ * the @c __cpp_lib_expected feature-test macro or a C++23-or-later language
+ * mode), every type in this header is a simple alias to its @c std::
+ * counterpart.  Otherwise a from-scratch implementation is supplied.
+ *
+ * @par Exception handling
+ * The polyfill respects @c -fno-exceptions builds.  When exceptions are
+ * disabled, @c value() calls @c std::terminate() instead of throwing
+ * @c bad_expected_access.  Prefer @c value_or(), @c operator*(), or an
+ * explicit @c has_value() check when building without exceptions.
+ *
+ * @par Monadic operations
+ * Both the primary template and the @c void specialization support the four
+ * monadic combinators from P0323R12:
+ * - @c and_then  -- chain an operation that returns an @c expected
+ * - @c or_else   -- recover from an error, returning an @c expected
+ * - @c transform -- map the contained value
+ * - @c transform_error -- map the contained error
+ */
+
 #include <exception>
 #include <functional>
 #include <system_error>
@@ -52,6 +76,13 @@ using expected = std::expected<T, E>;
 
 #else
 
+/**
+ * @brief Tag type used to construct an expected in the error state.
+ *
+ * Pass the global constant @c unexpect as the first argument to the
+ * @c expected constructor to indicate that the following arguments should
+ * be forwarded to the error type's constructor.
+ */
 struct unexpect_t
 {
     explicit unexpect_t() = default;
@@ -61,6 +92,15 @@ inline constexpr unexpect_t unexpect{};
 template <typename E>
 class bad_expected_access;
 
+/**
+ * @brief Exception thrown by @c expected::value() when the object is in the error state.
+ *
+ * The base specialization for @c void carries no error payload and simply
+ * reports "bad expected access".  The derived template
+ * @c bad_expected_access<E> additionally stores a copy of the error value.
+ *
+ * @tparam E Error type.  The @c void specialization serves as the common base.
+ */
 /// @cond INTERNAL
 template <>
 class bad_expected_access<void> : public std::exception
@@ -73,6 +113,10 @@ class bad_expected_access<void> : public std::exception
     }
 };
 
+/**
+ * @brief Typed specialization of bad_expected_access that carries the error value.
+ * @tparam E The error type stored in the originating @c expected.
+ */
 template <typename E>
 class bad_expected_access : public bad_expected_access<void>
 /// @endcond
@@ -102,6 +146,17 @@ class bad_expected_access : public bad_expected_access<void>
     E error_;
 };
 
+/**
+ * @brief Wrapper that holds an error value for constructing an expected in the error state.
+ *
+ * Use @c unexpected to explicitly construct or assign an error into an
+ * @c expected object:
+ * @code
+ *   threadschedule::expected<int> result = threadschedule::unexpected(make_error_code(std::errc::invalid_argument));
+ * @endcode
+ *
+ * @tparam E The error type.
+ */
 template <typename E>
 class unexpected
 {
@@ -129,6 +184,32 @@ class unexpected
     E error_;
 };
 
+/**
+ * @brief A result type that holds either a value of type @p T or an error of type @p E.
+ *
+ * This is a polyfill for @c std::expected<T,E> (C++23).  It provides
+ * value-semantic storage: copyable when both @p T and @p E are copyable,
+ * movable when both are movable.
+ *
+ * @tparam T The value type.  Must be destructible.  The default constructor is
+ *           available only when @p T is default-constructible.
+ * @tparam E The error type.  Defaults to @c std::error_code.
+ *
+ * @par Thread safety
+ * This is a plain value type with no internal synchronization.  Concurrent
+ * access from multiple threads requires external locking.
+ *
+ * @par Implementation notes
+ * Storage is implemented as a union with placement new / manual destructor
+ * calls to avoid requiring default-constructibility of either @p T or @p E.
+ *
+ * @par Monadic operations
+ * The following combinators are provided (matching the C++23 specification):
+ * - @c and_then(f)        -- if has_value(), invoke @p f with the value and return the result
+ * - @c or_else(f)         -- if in error state, invoke @p f with the error and return the result
+ * - @c transform(f)       -- if has_value(), apply @p f to the value and wrap the result
+ * - @c transform_error(f) -- if in error state, apply @p f to the error and wrap the result
+ */
 template <typename T, typename E = std::error_code>
 class expected
 {
@@ -638,6 +719,18 @@ class expected
     } storage_;
 };
 
+/**
+ * @brief Specialization of expected for operations that produce no value.
+ *
+ * @c expected<void, E> can be in either a "success" state (has_value() is
+ * @c true, no payload) or an "error" state carrying an @p E.  This is
+ * useful for functions that can fail but have nothing to return on success.
+ *
+ * @tparam E The error type.  Defaults to @c std::error_code in the primary
+ *           template; here it is explicitly specified by the user.
+ *
+ * @see expected<T, E>
+ */
 template <typename E>
 class expected<void, E>
 {
diff --git a/include/threadschedule/generator.hpp b/include/threadschedule/generator.hpp
index 2461793..9c4b449 100644
--- a/include/threadschedule/generator.hpp
+++ b/include/threadschedule/generator.hpp
@@ -46,6 +46,50 @@ using generator = std::generator<T>;
 
 #else
 
+/**
+ * @brief Lazy, multi-value coroutine that produces a sequence of @p T
+ *        values on demand via `co_yield`.
+ *
+ * @tparam T The element type yielded by the coroutine body.
+ *
+ * `generator<T>` is the coroutine return type for functions that
+ * lazily produce a stream of values. It is compatible with range-based
+ * `for` loops thanks to its `begin()` / `end()` interface.
+ *
+ * **Ownership semantics:**
+ * - Move-only; copying is deleted.
+ * - The destructor destroys the underlying coroutine frame.
+ *
+ * **Laziness:**
+ * The coroutine body does not execute until `begin()` is called, which
+ * resumes the coroutine once to produce the first value. Each subsequent
+ * `operator++` on the iterator resumes the coroutine to produce the next
+ * value.
+ *
+ * **Iteration model:**
+ * - Input iterator only (single-pass).
+ * - `end()` returns `std::default_sentinel_t`; comparison with the
+ *   iterator checks whether the coroutine is done.
+ * - If the coroutine body throws, the exception is re-thrown on the
+ *   next iterator increment (or on `begin()` if the first resumption
+ *   throws).
+ *
+ * When C++23 `std::generator` is available (`__cpp_lib_generator >= 202207L`),
+ * this class is replaced by a type alias to `std::generator<T>`.
+ *
+ * Requires C++20 coroutine support (`__cpp_impl_coroutine >= 201902L`).
+ *
+ * @par Example
+ * @code
+ * generator<int> iota(int n) {
+ *     for (int i = 0; i < n; ++i)
+ *         co_yield i;
+ * }
+ *
+ * for (int v : iota(5))
+ *     std::cout << v << '\n';
+ * @endcode
+ */
 template <typename T>
 class generator
 {
@@ -100,6 +144,16 @@ class generator
         std::exception_ptr exception_{};
     };
 
+    /**
+     * @brief Input iterator that lazily drives a generator coroutine.
+     *
+     * Satisfies `std::input_iterator_tag`. Each call to `operator++`
+     * resumes the coroutine to produce the next value. Dereferencing
+     * returns a `T&` (reference to the value stored in the promise).
+     *
+     * Comparison with `std::default_sentinel_t` returns `true` when the
+     * coroutine has finished (i.e. `coroutine_handle::done()` is true).
+     */
     class iterator
     {
       public:
diff --git a/include/threadschedule/profiles.hpp b/include/threadschedule/profiles.hpp
index 12b3e3b..9bafd81 100644
--- a/include/threadschedule/profiles.hpp
+++ b/include/threadschedule/profiles.hpp
@@ -43,7 +43,15 @@ struct is_thread_like<JThreadWrapperView> : std::true_type
 #endif
 
 /**
- * @brief Declarative profile describing desired scheduling.
+ * @brief Declarative profile bundling scheduling intent for a thread.
+ *
+ * Value type (copyable). Combines a human-readable name, a scheduling
+ * policy, a priority level, and an optional CPU affinity mask into a
+ * single object that can be passed to the apply_profile() overloads.
+ *
+ * @see profiles::realtime, profiles::low_latency, profiles::throughput,
+ *      profiles::background
+ * @see apply_profile()
  */
 struct ThreadProfile
 {
@@ -96,7 +104,16 @@ inline auto background() -> ThreadProfile
 } // namespace profiles
 
 /**
- * @brief Apply a profile to a single thread wrapper or view.
+ * @brief Apply a profile to a thread wrapper or view.
+ *
+ * SFINAE-constrained: only participates in overload resolution when
+ * @c is_thread_like_v<ThreadLike> is true (ThreadWrapper,
+ * JThreadWrapper, PThreadWrapper, and their views).
+ *
+ * @tparam ThreadLike A type satisfying the is_thread_like trait.
+ * @param t   Thread wrapper or view to configure.
+ * @param p   Profile to apply.
+ * @return    Empty expected on success, or @c operation_not_permitted.
  */
 template <typename ThreadLike, std::enable_if_t<is_thread_like_v<ThreadLike>, int> = 0>
 inline auto apply_profile(ThreadLike& t, ThreadProfile const& p) -> expected<void, std::error_code>
@@ -115,7 +132,11 @@ inline auto apply_profile(ThreadLike& t, ThreadProfile const& p) -> expected<voi
 }
 
 /**
- * @brief Apply a profile to a thread control block.
+ * @brief Apply a profile to a ThreadControlBlock directly.
+ *
+ * @param t   Control block whose underlying thread will be reconfigured.
+ * @param p   Profile to apply.
+ * @return    Empty expected on success, or @c operation_not_permitted.
  */
 inline auto apply_profile(ThreadControlBlock& t, ThreadProfile const& p) -> expected<void, std::error_code>
 {
@@ -133,7 +154,16 @@ inline auto apply_profile(ThreadControlBlock& t, ThreadProfile const& p) -> expe
 }
 
 /**
- * @brief Apply a profile to a registered thread info.
+ * @brief Apply a profile to a registered thread via its info record.
+ *
+ * Dereferences @c t.control and delegates to the ThreadControlBlock
+ * overload.
+ *
+ * @warning Undefined behaviour if @c t.control is @c nullptr.
+ *
+ * @param t   Registered thread info whose control pointer is dereferenced.
+ * @param p   Profile to apply.
+ * @return    Empty expected on success, or @c operation_not_permitted.
  */
 inline auto apply_profile(RegisteredThreadInfo& t, ThreadProfile const& p) -> expected<void, std::error_code>
 {
@@ -141,7 +171,14 @@ inline auto apply_profile(RegisteredThreadInfo& t, ThreadProfile const& p) -> ex
 }
 
 /**
- * @brief Apply a profile to all workers of a simple ThreadPool.
+ * @brief Apply a profile to every worker in a ThreadPool.
+ *
+ * Uses @c "pool" as the thread name prefix passed to
+ * ThreadPool::configure_threads().
+ *
+ * @param pool  Thread pool to configure.
+ * @param p     Profile to apply.
+ * @return      Empty expected on success, or @c operation_not_permitted.
  */
 inline auto apply_profile(ThreadPool& pool, ThreadProfile const& p) -> expected<void, std::error_code>
 {
@@ -160,7 +197,14 @@ inline auto apply_profile(ThreadPool& pool, ThreadProfile const& p) -> expected<
 }
 
 /**
- * @brief Apply a profile to all workers of a HighPerformancePool.
+ * @brief Apply a profile to every worker in a HighPerformancePool.
+ *
+ * Uses @c "hp" as the thread name prefix passed to
+ * HighPerformancePool::configure_threads().
+ *
+ * @param pool  High-performance pool to configure.
+ * @param p     Profile to apply.
+ * @return      Empty expected on success, or @c operation_not_permitted.
  */
 inline auto apply_profile(HighPerformancePool& pool, ThreadProfile const& p) -> expected<void, std::error_code>
 {
@@ -178,7 +222,12 @@ inline auto apply_profile(HighPerformancePool& pool, ThreadProfile const& p) ->
 }
 
 /**
- * @brief Apply a profile to a registry-controlled thread by TID.
+ * @brief Apply a profile to a registry-managed thread identified by TID.
+ *
+ * @param reg  Thread registry that owns the thread.
+ * @param tid  Thread identifier within the registry.
+ * @param p    Profile to apply.
+ * @return     Empty expected on success, or @c operation_not_permitted.
  */
 inline auto apply_profile(ThreadRegistry& reg, Tid tid, ThreadProfile const& p) -> expected<void, std::error_code>
 {
diff --git a/include/threadschedule/pthread_wrapper.hpp b/include/threadschedule/pthread_wrapper.hpp
index 7405352..61d5f8c 100644
--- a/include/threadschedule/pthread_wrapper.hpp
+++ b/include/threadschedule/pthread_wrapper.hpp
@@ -23,7 +23,27 @@ namespace threadschedule
 
 #ifndef _WIN32
 /**
- * @brief RAII pthread wrapper with modern C++ interface
+ * @brief RAII wrapper around POSIX threads with a modern C++ interface.
+ *
+ * Linux-only -- not available on Windows (guarded by @c _WIN32).
+ *
+ * Non-copyable, movable. The destructor automatically joins the thread
+ * if it is still joinable, which **blocks** until the thread finishes.
+ *
+ * Internally stores the callable in a heap-allocated @c std::function
+ * so that it can be passed through the C @c pthread_create API.
+ *
+ * @note Thread names are limited to 15 characters on Linux
+ *       (enforced by @c pthread_setname_np).
+ * @note cancel() sends a POSIX cancellation request to the thread.
+ *       set_cancel_state() and set_cancel_type() are @c static and
+ *       affect the **calling** thread, not the PThreadWrapper's thread.
+ *
+ * @par Factory methods
+ * - create_with_config()      -- creates a thread and applies name/policy/priority.
+ * - create_with_attributes()  -- creates a thread from a raw @c pthread_attr_t.
+ *
+ * @see is_thread_like<PThreadWrapper> (specialised to @c true_type at end of file)
  */
 class PThreadWrapper
 {
@@ -272,7 +292,16 @@ class PThreadWrapper
 };
 
 /**
- * @brief RAII pthread attribute wrapper
+ * @brief RAII wrapper for @c pthread_attr_t with a builder-style API.
+ *
+ * Non-copyable, movable. The move constructor and move assignment
+ * operator call @c std::terminate if the re-initialisation of the
+ * moved-from attribute object fails (cannot throw from @c noexcept).
+ *
+ * The destructor always calls @c pthread_attr_destroy.
+ *
+ * Provides fluent setters for detach state, stack size, guard size,
+ * scheduling policy, priority, inherit-sched, and contention scope.
  */
 class PThreadAttributes
 {
@@ -404,7 +433,17 @@ class PThreadAttributes
 };
 
 /**
- * @brief RAII pthread mutex wrapper
+ * @brief RAII wrapper for @c pthread_mutex_t.
+ *
+ * Non-copyable, **non-movable**. Satisfies the @e BasicLockable
+ * named requirement (lock / unlock / try_lock), so it can be used
+ * with @c std::lock_guard and similar RAII lock holders.
+ *
+ * @note The constructor throws @c std::runtime_error if
+ *       @c pthread_mutex_init fails. Unusually for a mutex type,
+ *       lock() and unlock() also throw on error -- callers should be
+ *       aware of this when mixing with code that assumes non-throwing
+ *       mutex operations.
  */
 class PThreadMutex
 {
diff --git a/include/threadschedule/registered_threads.hpp b/include/threadschedule/registered_threads.hpp
index 6d9cc96..63a607b 100644
--- a/include/threadschedule/registered_threads.hpp
+++ b/include/threadschedule/registered_threads.hpp
@@ -11,9 +11,15 @@
 namespace threadschedule
 {
 
-//
-
-// Registered std::thread wrapper (opt-in)
+/**
+ * @brief @ref ThreadWrapper with automatic registration in the global @ref ThreadRegistry.
+ *
+ * Non-copyable, movable. On thread start the spawned thread
+ * auto-registers itself in the global registry() (via an
+ * @ref AutoRegisterCurrentThread RAII guard) and auto-unregisters when
+ * the thread function returns. The @p name and @p componentTag
+ * arguments are forwarded to the registry entry.
+ */
 class ThreadWrapperReg : public ThreadWrapper
 {
   public:
@@ -38,6 +44,15 @@ class ThreadWrapperReg : public ThreadWrapper
 };
 
 #if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+/**
+ * @brief @ref JThreadWrapper with automatic registration in the global @ref ThreadRegistry.
+ *
+ * Non-copyable, movable. C++20 only. Behaves like @ref ThreadWrapperReg
+ * but wraps a @c std::jthread and handles @c std::stop_token
+ * forwarding: the callable may accept a @c stop_token as its first
+ * argument, its last argument, or not at all -- all three signatures
+ * are detected at compile time and dispatched accordingly.
+ */
 class JThreadWrapperReg : public JThreadWrapper
 {
   public:
@@ -71,6 +86,13 @@ class JThreadWrapperReg : public JThreadWrapper
 #endif
 
 #ifndef _WIN32
+/**
+ * @brief @ref PThreadWrapper with automatic registration in the global @ref ThreadRegistry.
+ *
+ * Non-copyable, movable. Linux-only (guarded by @c _WIN32).
+ * Same auto-register / auto-unregister semantics as @ref ThreadWrapperReg,
+ * but for POSIX threads.
+ */
 class PThreadWrapperReg : public PThreadWrapper
 {
   public:
diff --git a/include/threadschedule/scheduled_pool.hpp b/include/threadschedule/scheduled_pool.hpp
index f2e72c7..ce63655 100644
--- a/include/threadschedule/scheduled_pool.hpp
+++ b/include/threadschedule/scheduled_pool.hpp
@@ -14,7 +14,16 @@ namespace threadschedule
 {
 
 /**
- * @brief Handle for scheduled tasks that can be used to cancel them
+ * @brief Copyable handle for a cancellable scheduled task.
+ *
+ * Copyable (the cancel flag is shared via
+ * @c std::shared_ptr<std::atomic<bool>>). Both cancel() and
+ * is_cancelled() are thread-safe (atomic store / load with
+ * release / acquire ordering).
+ *
+ * Cancellation is cooperative: the scheduler checks the flag before
+ * dispatching the task to the worker pool, but a task that is already
+ * executing will **not** be interrupted.
  */
 class ScheduledTaskHandle
 {
@@ -51,16 +60,70 @@ class ScheduledTaskHandle
 };
 
 /**
- * @brief Thread pool with support for scheduled and periodic tasks
+ * @brief Thread pool augmented with delayed and periodic task scheduling.
+ *
+ * Non-copyable, non-movable. Combines a dedicated scheduler thread with
+ * an underlying PoolType (default: @ref ThreadPool) that does the actual work.
+ *
+ * @par How task execution works
+ * The pool owns a single scheduler thread that runs an internal loop
+ * (scheduler_loop). Scheduled tasks are stored in a std::multimap sorted
+ * by their next_run time point. The scheduler thread sleeps (via
+ * condition_variable::wait / wait_until) until the earliest task is due.
+ * When a task becomes due, the scheduler thread:
+ *   1. Removes it from the multimap.
+ *   2. Checks if the task has been cancelled (via the atomic flag). If
+ *      cancelled, the task is discarded.
+ *   3. Submits the task to the underlying PoolType via pool_.submit().
+ *      From this point on, the task follows the execution rules of the
+ *      underlying pool (see @ref ThreadPool, @ref FastThreadPool, or
+ *      @ref HighPerformancePool documentation).
+ *   4. For periodic tasks, the scheduler immediately re-inserts the task
+ *      into the multimap with next_run += interval. This means the next
+ *      execution is timed from the scheduled time, not from when the
+ *      task actually finishes.
+ *
+ * @par Execution guarantees
+ * - Every successfully scheduled task (schedule_after/schedule_at/
+ *   schedule_periodic returned a handle) is guaranteed to eventually
+ *   execute, unless it is cancelled or shutdown() is called before it
+ *   becomes due.
+ * - Tasks that are already due and submitted to the underlying pool
+ *   before shutdown() will still execute (the pool drains its queue).
+ * - Tasks that are not yet due at the time of shutdown() will NOT
+ *   execute. The scheduler thread exits immediately on shutdown, so
+ *   future-scheduled tasks are lost.
+ * - Cancellation is cooperative: calling handle.cancel() sets an atomic
+ *   flag. The scheduler checks this flag before submitting the task to
+ *   the pool. Additionally, the pool-side wrapper checks the flag again
+ *   right before calling the task. However, a task that is already
+ *   running will NOT be interrupted by cancel().
+ * - Periodic tasks repeat at a fixed interval, not a fixed rate. If a
+ *   task takes longer than the interval, executions can pile up because
+ *   the next run is computed from the previous scheduled time, not
+ *   from when the task actually finishes.
+ * - There is no returned std::future for scheduled tasks. If you need
+ *   to observe the result, use the underlying pool directly via
+ *   thread_pool().submit().
+ *
+ * @par Thread safety
+ * All schedule_* methods are thread-safe (protected by an internal
+ * mutex). cancel() on a ScheduledTaskHandle is also thread-safe (atomic).
+ * shutdown() is internally guarded and safe to call more than once.
+ *
+ * @par Lifetime
+ * The destructor calls shutdown(), which joins the scheduler thread and
+ * then shuts down the underlying pool. Can block if the pool still has
+ * running tasks.
+ *
+ * @par Copyability / movability
+ * Not copyable, not movable.
  *
- * Features:
- * - Schedule tasks to run at specific time points
- * - Schedule tasks to run after a delay
- * - Schedule periodic tasks with fixed intervals
- * - Cancel scheduled tasks before they execute
- * - Integrates with any thread pool type (ThreadPool by default)
+ * @tparam PoolType Thread pool used for task execution
+ *         (default: ThreadPool).
  *
- * @tparam PoolType Type of thread pool to use for task execution (default: ThreadPool)
+ * @see ScheduledThreadPool, ScheduledHighPerformancePool,
+ *      ScheduledFastThreadPool (convenience aliases)
  */
 template <typename PoolType = ThreadPool>
 class ScheduledThreadPoolT
@@ -233,7 +296,7 @@ class ScheduledThreadPoolT
      * @brief Configure worker threads
      *
      * Note: Return type depends on the underlying pool type.
-     * ThreadPool returns bool, HighPerformancePool returns expected<void, std::error_code>.
+     * @ref ThreadPool returns bool, @ref HighPerformancePool returns expected<void, std::error_code>.
      * For consistent behavior, access the pool directly via thread_pool().
      */
     auto configure_threads(std::string const& name_prefix, SchedulingPolicy policy = SchedulingPolicy::OTHER,
@@ -327,9 +390,11 @@ class ScheduledThreadPoolT
     }
 };
 
-// Convenience aliases
+/** @brief @ref ScheduledThreadPoolT using the default @ref ThreadPool backend. */
 using ScheduledThreadPool = ScheduledThreadPoolT<ThreadPool>;
+/** @brief @ref ScheduledThreadPoolT using @ref HighPerformancePool as backend. */
 using ScheduledHighPerformancePool = ScheduledThreadPoolT<HighPerformancePool>;
+/** @brief @ref ScheduledThreadPoolT using @ref FastThreadPool as backend. */
 using ScheduledFastThreadPool = ScheduledThreadPoolT<FastThreadPool>;
 
 } // namespace threadschedule
diff --git a/include/threadschedule/scheduler_policy.hpp b/include/threadschedule/scheduler_policy.hpp
index 0536a89..8f99628 100644
--- a/include/threadschedule/scheduler_policy.hpp
+++ b/include/threadschedule/scheduler_policy.hpp
@@ -20,7 +20,36 @@ namespace threadschedule
 // expected/result are provided by expected.hpp
 
 /**
- * @brief Enumeration of available scheduling policies
+ * @brief Enumeration of available thread scheduling policies.
+ *
+ * Represents the OS-level scheduling policy applied to a thread. On Linux, the
+ * enumerator values map directly to the POSIX `SCHED_*` constants defined in
+ * `<sched.h>`. On Windows, they are stored as portable integer values and
+ * translated to Windows-specific priority classes / scheduling behaviour at the
+ * point of application.
+ *
+ * ### Linux behaviour
+ * | Policy     | Description                                                                 | Privileges required          |
+ * |------------|-----------------------------------------------------------------------------|------------------------------|
+ * | OTHER      | Default CFS (Completely Fair Scheduler) time-sharing.                       | None                         |
+ * | FIFO       | Real-time FIFO - runs until it yields or a higher-priority thread arrives.  | `CAP_SYS_NICE` or root       |
+ * | RR         | Real-time round-robin - like FIFO but with a per-thread time quantum.       | `CAP_SYS_NICE` or root       |
+ * | BATCH      | Like OTHER but the scheduler assumes the thread is CPU-bound (longer slices).| None                        |
+ * | IDLE       | Extremely low priority; runs only when no other runnable thread exists.      | None                        |
+ * | DEADLINE   | EDF (Earliest Deadline First) real-time scheduling (Linux >= 3.14).          | `CAP_SYS_NICE` or root       |
+ *
+ * ### Windows behaviour
+ * Windows does not expose POSIX scheduling policies. The library maps each
+ * enumerator to an appropriate combination of process priority class and thread
+ * priority level when applying the policy. FIFO and RR are both treated as
+ * elevated real-time priorities; BATCH and IDLE are mapped to below-normal and
+ * idle priority levels respectively.
+ *
+ * @note DEADLINE is only available on Linux when `SCHED_DEADLINE` is defined by
+ *       the kernel headers. It is not available on Windows.
+ *
+ * @warning Setting FIFO, RR, or DEADLINE without adequate privileges will fail
+ *          with a permission error (`EPERM` on Linux).
  */
 enum class SchedulingPolicy : std::uint_fast8_t
 {
@@ -45,7 +74,34 @@ enum class SchedulingPolicy : std::uint_fast8_t
 };
 
 /**
- * @brief Thread priority wrapper with validation
+ * @brief Value-semantic wrapper for a thread scheduling priority.
+ *
+ * Encapsulates a single integer priority in the range **[-20, 19]** - the same
+ * range used by POSIX nice values on Linux. The value is silently clamped to
+ * this range on construction (via `std::clamp`), so out-of-range inputs never
+ * produce an invalid object.
+ *
+ * ### Semantics
+ * Lower numeric values denote **higher** scheduling priority (following the
+ * Unix nice convention): -20 is the most favourable and 19 is the least.
+ *
+ * ### Platform notes
+ * - **Linux:** The value is used directly as the nice level for `SCHED_OTHER`
+ *   / `SCHED_BATCH` / `SCHED_IDLE`, or clamped to the real-time priority
+ *   range for `SCHED_FIFO` / `SCHED_RR` by SchedulerParams::create_for_policy().
+ * - **Windows:** The value is mapped to a Windows thread priority constant
+ *   (e.g. `THREAD_PRIORITY_HIGHEST`, `THREAD_PRIORITY_LOWEST`) when applied.
+ *
+ * ### Type traits
+ * - Trivially copyable and trivially movable.
+ * - `constexpr`-constructible - can be used in compile-time contexts.
+ * - All relational operators (`==`, `!=`, `<`, `<=`, `>`, `>=`) are provided
+ *   and compare the underlying integer value.
+ * - Not thread-safe: concurrent mutation of the same instance requires
+ *   external synchronisation. Distinct instances may be used freely from
+ *   different threads.
+ *
+ * @see SchedulerParams::create_for_policy
  */
 class ThreadPriority
 {
@@ -115,7 +171,37 @@ class ThreadPriority
 };
 
 /**
- * @brief CPU affinity management
+ * @brief Manages a set of CPU indices to which a thread may be bound.
+ *
+ * ThreadAffinity is a value-semantic type that represents a CPU affinity mask.
+ * It abstracts away the platform-specific details of `cpu_set_t` (Linux) and
+ * processor-group bitmasks (Windows).
+ *
+ * ### Linux
+ * Backed by a `cpu_set_t`. Supports CPU indices in the range
+ * `[0, CPU_SETSIZE)` (typically 0-1023). The `native_handle()` accessor
+ * provides a `const cpu_set_t&` for direct use with `pthread_setaffinity_np`
+ * or `sched_setaffinity`.
+ *
+ * ### Windows
+ * Backed by a 64-bit bitmask plus a processor group index (`WORD`). Windows
+ * organises logical processors into groups of up to 64. This class supports
+ * **a single group at a time**: the group is determined by the first CPU added
+ * via `add_cpu()`. Subsequent calls to `add_cpu()` for CPUs that belong to a
+ * different group are **silently ignored**. Use `get_group()` and `get_mask()`
+ * to retrieve the platform-native values for `SetThreadGroupAffinity`.
+ *
+ * ### Thread safety
+ * None. ThreadAffinity is a plain value type with no internal synchronisation.
+ * Concurrent reads are safe; concurrent mutation (or a read concurrent with a
+ * write) requires external locking.
+ *
+ * ### Copyability / movability
+ * Implicitly copyable and movable (compiler-generated special members).
+ *
+ * @warning On Windows, CPUs from different processor groups cannot be combined
+ *          in a single ThreadAffinity instance. If you need cross-group
+ *          affinity you must apply separate ThreadAffinity objects per group.
  */
 class ThreadAffinity
 {
@@ -278,7 +364,38 @@ class ThreadAffinity
 };
 
 /**
- * @brief Scheduler parameter utilities
+ * @brief Static utility class for constructing OS-native scheduling parameters.
+ *
+ * SchedulerParams translates the portable SchedulingPolicy and
+ * ThreadPriority types into the platform-specific structures required by
+ * the OS scheduling APIs (`sched_param` on Linux, a compatible POD on Windows).
+ *
+ * ### `create_for_policy`
+ * Builds a native scheduling-parameter structure for a given policy/priority
+ * pair. The priority is **clamped** to the valid range for the requested policy
+ * (queried at runtime on Linux via `sched_get_priority_min` /
+ * `sched_get_priority_max`), so callers never need to pre-validate the range
+ * themselves. Returns an @ref expected - on failure (e.g. an unrecognised
+ * policy value) an `std::error_code` is returned instead.
+ *
+ * ### `get_priority_range`
+ * Returns the width of the valid priority range (max - min) for a policy.
+ * Useful for normalising priorities across policies.
+ *
+ * ### Platform differences
+ * - **Linux:** Delegates directly to POSIX `sched_get_priority_min` /
+ *   `sched_get_priority_max` and populates a `sched_param`.
+ * - **Windows:** Returns a fixed range of 30 (mapping to the -15 ... +15
+ *   Windows thread priority levels) and stores the raw priority in a
+ *   lightweight `sched_param_win` POD.
+ *
+ * ### Thread safety
+ * All members are static and stateless; concurrent calls from any number of
+ * threads are safe.
+ *
+ * @note This class is not intended to be instantiated.
+ *
+ * @see SchedulingPolicy, ThreadPriority
  */
 class SchedulerParams
 {
diff --git a/include/threadschedule/task.hpp b/include/threadschedule/task.hpp
index e4e3102..d0bea60 100644
--- a/include/threadschedule/task.hpp
+++ b/include/threadschedule/task.hpp
@@ -29,6 +29,17 @@ class task;
 namespace detail
 {
 
+/**
+ * @brief Awaiter that resumes the parent coroutine (continuation) when a task completes.
+ *
+ * @internal This is an implementation detail of the task coroutine machinery.
+ *
+ * When a task's coroutine body finishes, `final_awaiter` is returned from
+ * `final_suspend()`. It is never ready (always suspends), and on suspension
+ * it symmetric-transfers to the stored continuation. If no continuation has
+ * been set (e.g. the task was started via `sync_wait`), it transfers to
+ * `std::noop_coroutine()` to avoid undefined behaviour.
+ */
 struct final_awaiter
 {
     [[nodiscard]] auto await_ready() const noexcept -> bool
@@ -49,6 +60,25 @@ struct final_awaiter
     }
 };
 
+/**
+ * @brief Shared promise logic for task<T> and task<void>.
+ *
+ * @internal This is an implementation detail; users should interact with
+ * task<T> rather than its promise directly.
+ *
+ * @tparam T The value type produced by the task (may be `void`).
+ *
+ * Key behaviours:
+ * - **Lazy start:** `initial_suspend()` returns `std::suspend_always`, so
+ *   the coroutine does not begin until explicitly resumed (via `co_await`
+ *   or `sync_wait()`).
+ * - **Exception forwarding:** `unhandled_exception()` captures the active
+ *   exception into an `std::exception_ptr`; the awaiter re-throws it when
+ *   the caller retrieves the result.
+ * - **Continuation:** `continuation_` is set by the task's awaiter just
+ *   before resuming the task. `final_awaiter` uses it to return control
+ *   to the parent coroutine.
+ */
 template <typename T>
 class task_promise_base
 {
@@ -84,8 +114,39 @@ class task_promise_base
 
 } // namespace detail
 
-// ── task<T> (non-void) ─────────────────────────────────────────────
+// --- task<T> (non-void) ---
 
+/**
+ * @brief Lazy, single-value coroutine that produces a @p T on completion.
+ *
+ * @tparam T The type of the value produced by the coroutine body.
+ *
+ * A `task<T>` is the primary coroutine return type for asynchronous
+ * operations that yield exactly one result (or throw). It models a
+ * **lazy** coroutine: execution does not begin until the task is
+ * `co_await`ed by another coroutine or passed to `sync_wait()`.
+ *
+ * **Ownership semantics:**
+ * - Move-only; copying is deleted.
+ * - The destructor destroys the underlying coroutine frame, so the task
+ *   must outlive any in-progress `co_await` that references it.
+ *
+ * **Result retrieval:**
+ * `co_await`ing a `task<T>` returns `T`. If the coroutine body threw an
+ * exception, the exception is re-thrown at the `co_await` point (via
+ * `promise_type::result()`).
+ *
+ * Requires C++20 coroutine support (`__cpp_impl_coroutine >= 201902L`).
+ *
+ * @par Example
+ * @code
+ * task<int> compute() { co_return 42; }
+ *
+ * task<void> caller() {
+ *     int v = co_await compute(); // resumes compute, gets 42
+ * }
+ * @endcode
+ */
 template <typename T>
 class task
 {
@@ -189,8 +250,35 @@ class task
     std::coroutine_handle<promise_type> handle_{};
 };
 
-// ── task<void> ──────────────────────────────────────────────────────
+// --- task<void> ---
 
+/**
+ * @brief Lazy, single-value coroutine specialization for operations that
+ *        produce no result.
+ *
+ * This is the `void` specialization of task. It behaves identically to
+ * `task<T>` except that `co_await`ing it yields no value and the promise
+ * uses `return_void()` instead of `return_value()`.
+ *
+ * **Ownership semantics:**
+ * - Move-only; copying is deleted.
+ * - The destructor destroys the underlying coroutine frame, so the task
+ *   must outlive any in-progress `co_await` that references it.
+ *
+ * If the coroutine body throws, the exception is re-thrown at the
+ * `co_await` point.
+ *
+ * Requires C++20 coroutine support (`__cpp_impl_coroutine >= 201902L`).
+ *
+ * @par Example
+ * @code
+ * task<void> do_work() { co_return; }
+ *
+ * task<void> caller() {
+ *     co_await do_work(); // resumes do_work, returns void
+ * }
+ * @endcode
+ */
 template <>
 class task<void>
 {
@@ -289,11 +377,31 @@ class task<void>
     std::coroutine_handle<promise_type> handle_{};
 };
 
-// ── sync_wait ───────────────────────────────────────────────────────
+// --- sync_wait ---
 
 namespace detail
 {
 
+/**
+ * @brief Bridge coroutine used internally by `sync_wait()` to block until
+ *        a task completes.
+ *
+ * @internal This is an implementation detail; use `sync_wait()` instead.
+ *
+ * `sync_wait_task` wraps a `task<T>` inside a coroutine whose
+ * `final_suspend` signals completion via an `std::atomic<bool>` and
+ * `notify_one()`.
+ *
+ * Typical usage (inside `sync_wait`):
+ * -# Construct a `sync_wait_task` from a lambda that `co_await`s the
+ *    user's task.
+ * -# Call `start()` to resume the coroutine (runs on the calling thread).
+ * -# Call `wait()` to block until `final_suspend` fires `notify_one`.
+ * -# Call `rethrow()` to propagate any unhandled exception from the
+ *    coroutine body.
+ *
+ * The class is move-only and non-copyable.
+ */
 class sync_wait_task
 {
   public:
@@ -385,10 +493,26 @@ class sync_wait_task
 } // namespace detail
 
 /**
- * @brief Block the current thread until a task<T> completes and return its result.
+ * @brief Block the calling thread until a `task<T>` completes and return
+ *        its result.
+ *
+ * This is the primary bridge between coroutine code and synchronous code.
+ * The task is resumed **on the calling thread** -- no thread pool or
+ * executor is involved.
+ *
+ * If the task's coroutine body throws an exception, `sync_wait`
+ * re-throws it to the caller.
  *
- * This is the primary bridge between coroutine and synchronous code.
- * The task is resumed on the calling thread.
+ * @tparam T The value type produced by the task.
+ * @param  t The task to run. Consumed by move.
+ * @return   The value produced by the task's `co_return`.
+ * @throws   Any exception thrown inside the task body.
+ *
+ * @par Example
+ * @code
+ * task<int> compute() { co_return 42; }
+ * int main() { return sync_wait(compute()); }
+ * @endcode
  */
 template <typename T>
 auto sync_wait(task<T> t) -> T
@@ -419,7 +543,17 @@ auto sync_wait(task<T> t) -> T
 }
 
 /**
- * @brief Block the current thread until a task<void> completes.
+ * @brief Block the calling thread until a `task<void>` completes.
+ *
+ * Overload for void tasks. Behaves identically to the `task<T>` overload
+ * but returns nothing.
+ *
+ * The task is resumed **on the calling thread** -- no thread pool or
+ * executor is involved. If the task body throws, the exception is
+ * re-thrown to the caller.
+ *
+ * @param t The void task to run. Consumed by move.
+ * @throws  Any exception thrown inside the task body.
  */
 inline void sync_wait(task<void> t)
 {
diff --git a/include/threadschedule/thread_pool.hpp b/include/threadschedule/thread_pool.hpp
index 742eb55..99e94bd 100644
--- a/include/threadschedule/thread_pool.hpp
+++ b/include/threadschedule/thread_pool.hpp
@@ -17,7 +17,34 @@ namespace threadschedule
 {
 
 /**
- * @brief High-performance work-stealing deque for individual worker threads
+ * @brief Work-stealing deque for per-thread task queues in a thread pool.
+ *
+ * Implements a double-ended queue where the owning worker thread pushes and
+ * pops tasks from the top, while other ("thief") threads steal tasks from the
+ * bottom. This asymmetry reduces contention under typical workloads because
+ * the owner operates on one end and thieves on the other.
+ *
+ * @par Thread safety
+ * All public operations are serialized by an internal mutex, so the deque is
+ * safe to use concurrently from any number of threads. The atomic counters
+ * (top_ / bottom_) exist for a fast, lock-free size() / empty() snapshot but
+ * do @e not make push/pop/steal lock-free; the mutex is always acquired.
+ *
+ * @par Capacity
+ * The deque has a fixed capacity set at construction (default
+ * @c DEFAULT_CAPACITY = 1024). push() returns @c false when the deque is
+ * full; it never reallocates. Choose a capacity large enough for your expected
+ * burst size or use an overflow queue externally (as @ref HighPerformancePool does).
+ *
+ * @par Memory layout
+ * Each stored item is wrapped in an @c AlignedItem that is aligned to
+ * @c CACHE_LINE_SIZE (64 bytes) to prevent false sharing between adjacent
+ * elements when multiple threads access neighboring slots.
+ *
+ * @par Copyability / movability
+ * Not copyable and not movable (contains a std::mutex).
+ *
+ * @tparam T The task type. Must be move-constructible.
  */
 template <typename T>
 class WorkStealingDeque
@@ -133,7 +160,12 @@ class WorkStealingDeque
 };
 
 /**
- * @brief High-performance thread pool optimized for high-frequency task submission
+ * @brief High-performance thread pool optimized for high-frequency task submission.
+ *
+ * Uses a work-stealing architecture: each worker thread owns a private
+ * @ref WorkStealingDeque, and idle workers attempt to steal tasks from other
+ * workers' queues. A shared overflow queue absorbs bursts when all per-thread
+ * queues are full.
  *
  * Optimizations for 1k+ tasks with 10k+ tasks/second throughput:
  * - Work-stealing architecture with proper synchronization
@@ -143,8 +175,69 @@ class WorkStealingDeque
  * - Cache-friendly data structures with proper alignment
  * - Performance monitoring and statistics
  *
- * Note: Has overhead for small task counts (< 100 tasks) due to work-stealing complexity.
- * Best for high-throughput scenarios like image processing, batch operations, etc.
+ * @par How task execution works
+ * When you call submit(), the callable is wrapped in a std::packaged_task and
+ * placed into one of the per-worker queues (round-robin selection). A
+ * condition_variable then wakes one sleeping worker. The worker picks up the
+ * task from its own queue. If its own queue is empty, the worker tries to
+ * steal tasks from up to 4 other workers' queues (random selection). If no
+ * per-worker queue has work, the worker checks the shared overflow queue. If
+ * nothing is found at all, the worker sleeps for up to 100 microseconds
+ * before retrying.
+ *
+ * @par Execution guarantees
+ * - Every successfully submitted task (submit() returned without throwing)
+ *   is guaranteed to eventually execute, as long as the pool is not destroyed
+ *   while shutdown() is draining.
+ * - submit() throws std::runtime_error if the pool is already shutting down.
+ *   In that case the task is NOT enqueued and will NOT execute.
+ * - Tasks are executed in approximately FIFO order per queue, but the
+ *   work-stealing mechanism means that the global execution order across all
+ *   threads is non-deterministic. There is no ordering guarantee between two
+ *   tasks submitted from different threads, or even from the same thread if
+ *   they land in different worker queues.
+ * - The returned std::future becomes ready once the task has completed. You
+ *   can call future.get() to block until the result is available, or
+ *   future.wait() to just wait without retrieving the result.
+ * - If a task throws an exception, the exception is stored in the future.
+ *   Calling future.get() will rethrow it. The worker thread itself continues
+ *   to run and process further tasks.
+ * - shutdown() sets the stop flag and wakes all workers. Workers finish
+ *   their current task and then drain all remaining queued tasks before
+ *   exiting. The destructor calls shutdown() implicitly.
+ *
+ * @par Thread safety
+ * submit() and submit_batch() may be called from any thread concurrently.
+ * shutdown() is internally guarded and is safe to call more than once.
+ *
+ * @par Exception handling
+ * Exceptions thrown by tasks are caught inside the worker loop. They do not
+ * propagate to the caller directly, but are stored in the std::future
+ * returned by submit(). Call future.get() to observe or rethrow the
+ * exception. The worker thread is not affected and continues processing.
+ *
+ * @par Statistics accuracy
+ * Counters such as completed_tasks_, stolen_tasks_, and total_task_time_
+ * are updated with std::memory_order_relaxed, so the values returned by
+ * get_statistics() are approximate and may lag behind the true counts by
+ * a small margin.
+ *
+ * @par Blocking
+ * wait_for_tasks() blocks the calling thread until every queued and currently
+ * active task has finished.
+ *
+ * @par Lifetime
+ * The destructor calls shutdown() and joins all worker threads. It is safe
+ * to let the pool go out of scope while tasks are still running; they will be
+ * drained first. Note that this means the destructor can block for a long
+ * time if tasks are slow.
+ *
+ * @par Copyability / movability
+ * Not copyable, not movable.
+ *
+ * @note Has overhead for small task counts (< 100 tasks) due to
+ *       work-stealing complexity. Best for high-throughput scenarios like
+ *       image processing, batch operations, etc.
  */
 class HighPerformancePool
 {
@@ -591,13 +684,65 @@ class HighPerformancePool
 };
 
 /**
- * @brief Simple high-performance thread pool using single queue with optimized locking
+ * @brief Single-queue thread pool with optimized locking for medium workloads.
  *
- * Alternative implementation for cases where work-stealing overhead is not justified.
- * Uses a single queue with optimized batch processing and minimal locking.
+ * Alternative to @ref HighPerformancePool for cases where work-stealing overhead is
+ * not justified. All tasks share one std::queue protected by a single mutex,
+ * which keeps per-task overhead low while still scaling to multiple workers.
  *
  * Best for: Medium workloads (100-10k tasks), consistent task patterns where
- * work-stealing complexity isn't needed but better performance than basic ThreadPool is desired.
+ * work-stealing complexity is not needed but better performance than the basic
+ * @ref ThreadPool is desired.
+ *
+ * @par How task execution works
+ * When you call submit(), the callable is wrapped in a std::packaged_task,
+ * pushed into the single shared task queue under a mutex lock, and one
+ * sleeping worker is woken via condition_variable::notify_one(). The woken
+ * worker pops the front element from the queue and executes it. If the queue
+ * is empty when a worker wakes up, it goes back to sleep with a 10 ms
+ * timeout before checking again.
+ *
+ * @par Execution guarantees
+ * - Every successfully submitted task (submit() returned without throwing)
+ *   is guaranteed to eventually execute, as long as the pool is not
+ *   destroyed while shutdown() is draining remaining work.
+ * - submit() throws std::runtime_error if the pool is already shutting
+ *   down. In that case the task is NOT enqueued and will NOT execute.
+ * - Tasks are stored in a FIFO queue, so they are picked up roughly in
+ *   submission order. However, since multiple workers pop concurrently,
+ *   the actual completion order is non-deterministic.
+ * - The returned std::future becomes ready once the task finishes. If the
+ *   task threw an exception, future.get() rethrows it. The worker thread
+ *   itself is not affected and continues processing further tasks.
+ * - On shutdown(), workers finish their current task, then drain all
+ *   remaining queued tasks before exiting. Tasks submitted before
+ *   shutdown() are guaranteed to execute.
+ *
+ * @par Thread safety
+ * submit() and submit_batch() may be called from any thread concurrently.
+ * shutdown() is internally guarded and safe to call more than once.
+ *
+ * @par Polling / wake-up
+ * Workers use condition_variable::wait_for with a 10 ms timeout, so an idle
+ * worker may take up to 10 ms to notice the stop flag after shutdown() is
+ * called.
+ *
+ * @par Exception handling
+ * Exceptions thrown by tasks are caught inside the worker loop. They are
+ * stored in the std::future returned by submit(). The worker thread
+ * continues processing.
+ *
+ * @par Configuration return type
+ * configure_threads() and set_affinity() return bool (not
+ * expected<void, std::error_code> as in @ref HighPerformancePool). A return
+ * value of false means at least one worker could not be configured.
+ *
+ * @par Lifetime
+ * The destructor calls shutdown() and joins all worker threads. Can block
+ * if tasks are still running.
+ *
+ * @par Copyability / movability
+ * Not copyable, not movable.
  */
 class FastThreadPool
 {
@@ -894,7 +1039,7 @@ class FastThreadPool
 };
 
 /**
- * @brief Simple thread pool for general-purpose use
+ * @brief Simple, general-purpose thread pool.
  *
  * This is a straightforward thread pool implementation suitable for:
  * - Simple workloads with low task counts (< 1k tasks)
@@ -903,7 +1048,59 @@ class FastThreadPool
  * - Lower memory overhead and complexity
  * - Easier to understand and debug
  *
- * For high-throughput scenarios (> 1k tasks), consider FastThreadPool or HighPerformancePool.
+ * For high-throughput scenarios (> 1k tasks), consider @ref FastThreadPool or
+ * @ref HighPerformancePool.
+ *
+ * @par How task execution works
+ * When you call submit(), the callable is wrapped in a std::packaged_task
+ * and pushed into a single shared std::queue under a mutex lock. One
+ * sleeping worker is then woken via condition_variable::notify_one(). The
+ * woken worker pops the front task from the queue and executes it. Workers
+ * block indefinitely on the condition_variable when the queue is empty (no
+ * polling timeout), so they consume zero CPU while idle.
+ *
+ * @par Execution guarantees
+ * - Every successfully submitted task (submit() returned without throwing)
+ *   is guaranteed to eventually execute.
+ * - submit() throws std::runtime_error if the pool is already shutting
+ *   down. In that case the task is NOT enqueued.
+ * - Tasks are stored in a FIFO queue. Multiple workers pop concurrently, so
+ *   submission order is roughly preserved but completion order is
+ *   non-deterministic.
+ * - The returned std::future becomes ready once the task finishes. If the
+ *   task threw an exception, future.get() rethrows it.
+ * - On shutdown(), the stop flag is set and all workers are woken. Each
+ *   worker finishes its current task and then exits only if the queue is
+ *   empty. This means all tasks that were enqueued before shutdown() are
+ *   guaranteed to execute.
+ * - wait_for_tasks() blocks until the queue is empty AND no worker is
+ *   currently executing a task.
+ *
+ * @par Thread safety
+ * submit() may be called from any thread concurrently. All task-queue access
+ * is serialized through queue_mutex_.
+ *
+ * @par Wake-up behaviour
+ * Workers block on a std::condition_variable (no polling timeout), so they
+ * consume no CPU while idle but wake instantly when a task is enqueued.
+ *
+ * @par Internal counter note
+ * Unlike @ref FastThreadPool and @ref HighPerformancePool, active_tasks_ and
+ * completed_tasks_ are incremented/decremented while queue_mutex_ is held.
+ * This means they are always consistent with the queue size, but every task
+ * completion acquires the mutex an extra time.
+ *
+ * @par Exception handling
+ * Exceptions thrown by tasks are caught inside the worker loop. They are
+ * stored in the std::future returned by submit(). The worker thread
+ * continues processing.
+ *
+ * @par Lifetime
+ * The destructor calls shutdown() and joins all worker threads. Can block
+ * if tasks are still running.
+ *
+ * @par Copyability / movability
+ * Not copyable, not movable.
  */
 class ThreadPool
 {
@@ -1169,7 +1366,32 @@ class ThreadPool
 };
 
 /**
- * @brief Singleton thread pool for global use (simple version)
+ * @brief Singleton accessor for a process-wide @ref ThreadPool instance.
+ *
+ * Provides static convenience methods that forward to a single @ref ThreadPool
+ * whose lifetime is managed as a function-local static (Meyer's singleton).
+ *
+ * @par Thread safety
+ * The underlying @ref ThreadPool is created on the first call to instance() and is
+ * guaranteed to be thread-safe in C++11 and later (magic statics). All
+ * forwarded methods (submit, submit_range, parallel_for_each) are as
+ * thread-safe as the corresponding @ref ThreadPool methods.
+ *
+ * @par Pool size
+ * The pool is created with @c std::thread::hardware_concurrency() threads.
+ * This size is fixed for the lifetime of the process; there is no API to
+ * resize the singleton pool after creation.
+ *
+ * @par Static destruction order
+ * Because the pool is a function-local static, it is destroyed during static
+ * destruction in reverse order of construction. Submitting work to the global
+ * pool from destructors of other static objects is undefined behaviour if the
+ * pool has already been destroyed. Prefer explicit lifetime management in
+ * programs with complex static initialization dependencies.
+ *
+ * @par Copyability / movability
+ * Not instantiable (private constructor). All access is through static
+ * methods.
  */
 class GlobalThreadPool
 {
@@ -1203,7 +1425,33 @@ class GlobalThreadPool
 };
 
 /**
- * @brief Singleton high-performance thread pool for global use
+ * @brief Singleton accessor for a process-wide @ref HighPerformancePool instance.
+ *
+ * Provides static convenience methods that forward to a single
+ * @ref HighPerformancePool whose lifetime is managed as a function-local static
+ * (Meyer's singleton).
+ *
+ * @par Thread safety
+ * The underlying pool is created on the first call to instance() and is
+ * guaranteed to be thread-safe in C++11 and later (magic statics). All
+ * forwarded methods (submit, submit_batch, parallel_for_each) are as
+ * thread-safe as the corresponding @ref HighPerformancePool methods.
+ *
+ * @par Pool size
+ * The pool is created with @c std::thread::hardware_concurrency() threads.
+ * This size is fixed for the lifetime of the process; there is no API to
+ * resize the singleton pool after creation.
+ *
+ * @par Static destruction order
+ * Because the pool is a function-local static, it is destroyed during static
+ * destruction in reverse order of construction. Submitting work to the global
+ * pool from destructors of other static objects is undefined behaviour if the
+ * pool has already been destroyed. Prefer explicit lifetime management in
+ * programs with complex static initialization dependencies.
+ *
+ * @par Copyability / movability
+ * Not instantiable (private constructor). All access is through static
+ * methods.
  */
 class GlobalHighPerformancePool
 {
@@ -1237,7 +1485,32 @@ class GlobalHighPerformancePool
 };
 
 /**
- * @brief Convenience function for parallel execution with containers
+ * @brief Convenience wrapper that applies a callable to every element of a
+ *        container in parallel using the @ref GlobalThreadPool singleton.
+ *
+ * Equivalent to:
+ * @code
+ * GlobalThreadPool::parallel_for_each(container.begin(), container.end(), func);
+ * @endcode
+ *
+ * The call blocks until every element has been processed.
+ *
+ * @par Thread safety
+ * The function itself is thread-safe (it forwards to @ref GlobalThreadPool which
+ * guards its queue with a mutex). However, the caller must ensure that
+ * concurrent invocations of @p func on different elements do not race on
+ * shared state.
+ *
+ * @par Pool lifetime
+ * On the first call, GlobalThreadPool::instance() lazily creates the
+ * singleton pool sized to @c std::thread::hardware_concurrency(). See
+ * @ref GlobalThreadPool for static-destruction-order caveats.
+ *
+ * @tparam Container Any type exposing begin() / end() iterators.
+ * @tparam F         Callable compatible with @c void(Container::value_type&).
+ *
+ * @param container The container whose elements will be processed.
+ * @param func      The callable applied to each element.
  */
 template <typename Container, typename F>
 void parallel_for_each(Container& container, F&& func)
diff --git a/include/threadschedule/thread_pool_with_errors.hpp b/include/threadschedule/thread_pool_with_errors.hpp
index 949a832..1694cd4 100644
--- a/include/threadschedule/thread_pool_with_errors.hpp
+++ b/include/threadschedule/thread_pool_with_errors.hpp
@@ -8,9 +8,18 @@ namespace threadschedule
 {
 
 /**
- * @brief High-performance thread pool with built-in error handling
+ * @brief @ref HighPerformancePool combined with an @ref ErrorHandler.
  *
- * Extends HighPerformancePool with automatic exception catching and error callbacks.
+ * Non-copyable, non-movable. Thread-safe (delegates to the underlying
+ * @ref HighPerformancePool).
+ *
+ * submit() wraps every task so that exceptions are both reported to
+ * the @ref ErrorHandler (via registered callbacks) **and** re-thrown, making
+ * them accessible through the returned @ref FutureWithErrorHandler.
+ * submit_with_description() additionally attaches a user-supplied
+ * description string to the error report for easier diagnostics.
+ *
+ * @see FutureWithErrorHandler, ErrorHandler, TaskError
  */
 class HighPerformancePoolWithErrors
 {
@@ -170,7 +179,14 @@ class HighPerformancePoolWithErrors
 };
 
 /**
- * @brief Fast thread pool with built-in error handling
+ * @brief FastThreadPool combined with an ErrorHandler.
+ *
+ * Non-copyable, non-movable. Thread-safe (delegates to the underlying
+ * FastThreadPool). Same error-handling semantics as
+ * HighPerformancePoolWithErrors: exceptions are reported to the
+ * ErrorHandler **and** re-thrown through the future.
+ *
+ * @see HighPerformancePoolWithErrors for detailed behaviour.
  */
 class FastThreadPoolWithErrors
 {
@@ -303,7 +319,14 @@ class FastThreadPoolWithErrors
 };
 
 /**
- * @brief Simple thread pool with built-in error handling
+ * @brief ThreadPool combined with an ErrorHandler.
+ *
+ * Non-copyable, non-movable. Thread-safe (delegates to the underlying
+ * ThreadPool). Same error-handling semantics as
+ * HighPerformancePoolWithErrors: exceptions are reported to the
+ * ErrorHandler **and** re-thrown through the future.
+ *
+ * @see HighPerformancePoolWithErrors for detailed behaviour.
  */
 class ThreadPoolWithErrors
 {
diff --git a/include/threadschedule/thread_registry.hpp b/include/threadschedule/thread_registry.hpp
index b49ed7c..3a0dae5 100644
--- a/include/threadschedule/thread_registry.hpp
+++ b/include/threadschedule/thread_registry.hpp
@@ -43,6 +43,41 @@ using Tid = unsigned long; // DWORD thread id
 using Tid = pid_t; // Linux TID via gettid()
 #endif
 
+/**
+ * @brief Snapshot of metadata for a single registered thread.
+ *
+ * This is a POD-like value type that captures thread identity, lifecycle state,
+ * and an optional handle to the underlying ThreadControlBlock.  Instances are
+ * returned by ThreadRegistry queries and are safe to store, copy, and inspect
+ * from any thread.
+ *
+ * @par Thread safety
+ * Instances are plain value types and carry no internal synchronisation.
+ * Concurrent reads are safe; concurrent read/write on the *same* instance is
+ * not.  The @c control shared_ptr is ref-counted and the pointee
+ * (@ref ThreadControlBlock) is itself thread-safe.
+ *
+ * @par Copyability / movability
+ * Fully copyable and movable (regular value semantics).
+ *
+ * @par Lifetime
+ * A RegisteredThreadInfo is a *snapshot* -- it may outlive the thread it
+ * describes.  The @c alive flag reflects the state at the time the snapshot
+ * was taken; it is **not** updated retroactively when the thread unregisters.
+ *
+ * @par Fields
+ * - @c tid   -- OS-level thread identifier (@c pid_t on Linux via
+ *               @c gettid(), @c DWORD on Windows).
+ * - @c stdId -- The corresponding @c std::thread::id.
+ * - @c name  -- Human-readable name given at registration time.
+ * - @c componentTag -- Optional logical grouping tag (e.g. "io", "compute").
+ * - @c alive -- @c true while the thread is registered; set to @c false when
+ *               the thread calls @c unregister_current_thread().
+ * - @c control -- Shared pointer to the thread's @ref ThreadControlBlock.  May be
+ *                 @c nullptr if the thread was registered without a control
+ *                 block (i.e. via the name-only overload of
+ *                 @c register_current_thread()).
+ */
 struct RegisteredThreadInfo
 {
     Tid tid{};
@@ -53,6 +88,44 @@ struct RegisteredThreadInfo
     std::shared_ptr<class ThreadControlBlock> control;
 };
 
+/**
+ * @brief Per-thread control handle for OS-level scheduling operations.
+ *
+ * A ThreadControlBlock captures the native thread handle (pthread_t on Linux,
+ * a duplicated @c HANDLE on Windows) at construction time and exposes
+ * cross-platform methods to modify the thread's affinity, priority,
+ * scheduling policy, and OS-visible name.
+ *
+ * @par Creation
+ * Always use the static factory create_for_current_thread().  It **must** be
+ * called from the thread it will represent, because it snapshots
+ * @c pthread_self() / @c GetCurrentThread().
+ *
+ * @par Ownership
+ * ThreadControlBlock is intended to be held via @c std::shared_ptr so that
+ * the registry, the owning thread, and any observers can all share the same
+ * instance.  The static factory already returns a @c shared_ptr.
+ *
+ * @par Thread safety
+ * - The object is **not** copyable and **not** movable (identity type).
+ * - All @c set_* methods are safe to call from **any** thread -- they operate
+ *   on the stored native handle, not on thread-local state.
+ * - Concurrent calls to different @c set_* methods on the same instance are
+ *   safe (each call is a single OS syscall on the stored handle).
+ *
+ * @par Platform notes
+ * - **Linux**: stores @c pthread_t obtained via @c pthread_self().  No
+ *   resource is owned; the handle is valid for the lifetime of the thread.
+ * - **Windows**: duplicates the pseudo-handle returned by
+ *   @c GetCurrentThread() into a real @c HANDLE with
+ *   @c THREAD_SET_INFORMATION | @c THREAD_QUERY_INFORMATION rights.  The
+ *   duplicated handle is closed in the destructor.
+ *
+ * @par Caveats
+ * - Do **not** construct directly; always use create_for_current_thread().
+ * - On Linux, @c set_name() enforces the 15-character POSIX limit and
+ *   returns @c std::errc::invalid_argument if exceeded.
+ */
 class ThreadControlBlock
 {
   public:
@@ -218,6 +291,48 @@ class ThreadControlBlock
 #endif
 };
 
+/**
+ * @brief Central registry of threads indexed by OS-level thread ID (Tid).
+ *
+ * ThreadRegistry maintains a map of currently registered threads together
+ * with their metadata and optional @ref ThreadControlBlock handles.  It provides
+ * a functional-style query API (via @ref QueryView) and convenience methods that
+ * delegate scheduling operations to each thread's control block.
+ *
+ * @par Thread safety
+ * All public methods are thread-safe.  Internal state is protected by a
+ * @c std::shared_mutex: mutating operations (register, unregister, set
+ * callbacks) acquire a unique lock, while read-only operations (get, query,
+ * set_affinity, etc.) acquire a shared lock.
+ *
+ * @par Copyability / movability
+ * - **Not copyable** (copy constructor and assignment are deleted).
+ * - **Not movable** (implicitly deleted because copy operations are deleted
+ *   and the class holds a @c std::shared_mutex).
+ *
+ * @par Registration semantics
+ * - register_current_thread() must be called **from** the thread being
+ *   registered.  Duplicate registration of the same TID is silently ignored
+ *   (the first registration wins).
+ * - unregister_current_thread() removes the calling thread's entry and marks
+ *   its @c alive flag as @c false in the snapshot passed to the callback.
+ *
+ * @par Callbacks
+ * The optional @c onRegister / @c onUnregister callbacks are invoked **with
+ * the lock released** to avoid deadlock if the callback itself interacts with
+ * the registry.  The callback receives a copy of the @ref RegisteredThreadInfo.
+ *
+ * @par Querying
+ * query() returns a @ref QueryView holding a **snapshot** of the registry at the
+ * moment of the call.  Subsequent changes to the registry (new
+ * registrations, unregistrations) are not reflected in an existing @ref QueryView.
+ *
+ * @par Scheduling helpers
+ * set_affinity(), set_priority(), set_scheduling_policy(), and set_name()
+ * look up the @ref ThreadControlBlock for the given TID under a shared lock and
+ * delegate to the control block.  Returns @c std::errc::no_such_process if
+ * the TID is not registered or has no control block.
+ */
 class ThreadRegistry
 {
   public:
@@ -317,7 +432,42 @@ class ThreadRegistry
         return it->second;
     }
 
-    // Chainable query API
+    /**
+     * @brief Lazy, functional-style query/filter view over a snapshot of
+     *        registered threads.
+     *
+     * A QueryView is produced by ThreadRegistry::query() (or by chaining
+     * operations on an existing QueryView).  It holds an internal
+     * @c std::vector<RegisteredThreadInfo> that is a **snapshot** -- mutations
+     * to the originating ThreadRegistry after the QueryView was created are
+     * not visible.
+     *
+     * @par Value semantics
+     * QueryView is a regular value type (copyable and movable).  All
+     * transformation methods (filter, take, skip) return a **new** QueryView,
+     * leaving the original unchanged.
+     *
+     * @par Thread safety
+     * A single QueryView instance is **not** safe to use concurrently from
+     * multiple threads.  However, it is safe to create multiple QueryViews
+     * concurrently from the same @ref ThreadRegistry, since creation acquires a
+     * shared lock on the registry.
+     *
+     * @par API
+     * Provides a functional-style interface:
+     * - **filter(pred)** -- returns a new QueryView containing only entries
+     *   that satisfy @p pred.
+     * - **map(fn)** -- transforms each entry and returns a
+     *   @c std::vector<R>.
+     * - **for_each(fn)** -- applies @p fn to every entry.
+     * - **find_if(pred)** -- returns the first matching entry, or
+     *   @c std::nullopt.
+     * - **any / all / none(pred)** -- boolean aggregation predicates.
+     * - **take(n) / skip(n)** -- positional slicing, returning new
+     *   QueryViews.
+     * - **count() / empty()** -- size queries.
+     * - **entries()** -- direct access to the underlying vector.
+     */
     class QueryView
     {
       public:
@@ -581,18 +731,48 @@ class ThreadRegistry
     std::function<void(RegisteredThreadInfo const&)> onUnregister_;
 };
 
-// Registry access methods
+/**
+ * @name Global registry access
+ *
+ * These free functions provide access to a process-wide @ref ThreadRegistry
+ * singleton and allow injecting a custom instance.
+ *
+ * @par Header-only mode (default)
+ * Both registry() and set_external_registry() are @c inline functions that
+ * use function-local statics (Meyer's singleton pattern).  registry()
+ * returns the externally set registry if one was provided via
+ * set_external_registry(), otherwise a function-local static instance.
+ *
+ * @par Runtime / shared-library mode (@c THREADSCHEDULE_RUNTIME defined)
+ * The functions are declared here but **defined** in
+ * @c runtime_registry.cpp.  This ensures a single registry instance across
+ * shared-library boundaries even when the header is included from multiple
+ * translation units in different DSOs.
+ *
+ * @{
+ */
+
 #if defined(THREADSCHEDULE_RUNTIME)
-// Declarations only; implemented in the runtime translation unit
 THREADSCHEDULE_API auto registry() -> ThreadRegistry&;
 THREADSCHEDULE_API void set_external_registry(ThreadRegistry* reg);
 #else
+/** @cond INTERNAL */
 inline auto registry_storage() -> ThreadRegistry*&
 {
     static ThreadRegistry* external = nullptr;
     return external;
 }
-
+/** @endcond */
+
+/**
+ * @brief Returns a reference to the process-wide @ref ThreadRegistry.
+ *
+ * If set_external_registry() was called with a non-null pointer, that
+ * registry is returned.  Otherwise a function-local static instance is
+ * used (Meyer's singleton; thread-safe initialisation guaranteed by C++11).
+ *
+ * @return Reference to the active @ref ThreadRegistry.
+ */
 inline auto registry() -> ThreadRegistry&
 {
     ThreadRegistry*& ext = registry_storage();
@@ -602,36 +782,105 @@ inline auto registry() -> ThreadRegistry&
     return local;
 }
 
+/**
+ * @brief Injects a custom @ref ThreadRegistry as the global singleton.
+ *
+ * After this call, registry() returns @p reg instead of the default
+ * function-local static instance.  Pass @c nullptr to revert to the
+ * built-in singleton.
+ *
+ * @param reg Pointer to the registry to use globally.  The caller must
+ *            ensure @p reg remains valid for the lifetime of all threads
+ *            that call registry().
+ *
+ * @warning Must be called **before** any threads are registered if the
+ *          intent is to capture all threads in a single registry.
+ *          Calling it after registrations have already occurred leaves
+ *          those earlier entries in the old (default) registry.
+ */
 inline void set_external_registry(ThreadRegistry* reg)
 {
     registry_storage() = reg;
 }
+/** @} */
 #endif
 
-// Build-mode detection (compile-time constant + runtime query)
+/**
+ * @brief Indicates whether the library was compiled in header-only or
+ *        runtime (shared library) mode.
+ *
+ * The value is determined at compile time by the presence of the
+ * @c THREADSCHEDULE_RUNTIME preprocessor macro.
+ *
+ * @see build_mode(), build_mode_string(), is_runtime_build
+ */
 enum class BuildMode : std::uint8_t
 {
-    HEADER_ONLY,
-    RUNTIME
+    HEADER_ONLY, ///< All symbols are inline / header-only.
+    RUNTIME      ///< Core symbols are compiled into a shared library.
 };
 
 #if defined(THREADSCHEDULE_RUNTIME)
-inline constexpr bool is_runtime_build = true;
+inline constexpr bool is_runtime_build = true; ///< @c true when compiled with @c THREADSCHEDULE_RUNTIME.
+
+/**
+ * @brief Returns the build mode detected at compile time (runtime variant).
+ * @return BuildMode::RUNTIME.
+ */
 THREADSCHEDULE_API auto build_mode() -> BuildMode;
 #else
-inline constexpr bool is_runtime_build = false;
+inline constexpr bool is_runtime_build = false; ///< @c true when compiled with @c THREADSCHEDULE_RUNTIME.
+
+/**
+ * @brief Returns the build mode detected at compile time (header-only variant).
+ * @return BuildMode::HEADER_ONLY.
+ */
 inline auto build_mode() -> BuildMode
 {
     return BuildMode::HEADER_ONLY;
 }
 #endif
 
+/**
+ * @brief Returns a human-readable C string describing the active build mode.
+ * @return @c "runtime" or @c "header-only".
+ */
 inline auto build_mode_string() -> char const*
 {
     return is_runtime_build ? "runtime" : "header-only";
 }
 
-// Composite registry to aggregate multiple registries when explicit merging is desired
+/**
+ * @brief Aggregates multiple ThreadRegistry instances into a single queryable
+ *        view.
+ *
+ * CompositeThreadRegistry is useful when threads are spread across several
+ * independent @ref ThreadRegistry instances (e.g. one per shared library) and you
+ * want a unified query interface over all of them.
+ *
+ * @par Thread safety
+ * All public methods are thread-safe.  The internal list of attached
+ * registries is protected by a @c std::mutex.
+ *
+ * @par Copyability / movability
+ * Not copyable and not movable (holds a @c std::mutex).
+ *
+ * @par Ownership
+ * attach() stores **raw pointers** to the supplied registries.  The caller
+ * is responsible for ensuring that every attached ThreadRegistry outlives this
+ * CompositeThreadRegistry.  Violating this results in undefined behaviour.
+ *
+ * @par Deduplication
+ * No deduplication is performed.  If the same TID appears in multiple
+ * attached registries, it will appear multiple times in the merged
+ * QueryView.
+ *
+ * @par Querying
+ * query() iterates over every attached registry, calls its own query(), and
+ * concatenates the results into a single @ref ThreadRegistry::QueryView snapshot.
+ * The same functional-style helpers (filter, map, for_each, etc.) are
+ * available directly on CompositeThreadRegistry for convenience.
+ */
 class CompositeThreadRegistry
 {
   public:
@@ -734,7 +983,46 @@ class CompositeThreadRegistry
     std::vector<ThreadRegistry*> registries_;
 };
 
-// RAII helper to auto-register the current thread
+/**
+ * @brief RAII guard that registers the current thread on construction and
+ *        unregisters it on destruction.
+ *
+ * AutoRegisterCurrentThread creates a @ref ThreadControlBlock for the calling
+ * thread, sets its OS-visible name via ThreadControlBlock::set_name(), and
+ * registers it in either the global registry() or a caller-supplied
+ * @ref ThreadRegistry.
+ *
+ * @par Copyability / movability
+ * - **Not copyable** (deleted).
+ * - **Movable** -- move construction / assignment transfers registration
+ *   ownership to the new instance and disarms the source.
+ *
+ * @par Thread safety
+ * Construction and destruction interact with the target ThreadRegistry, which
+ * is itself thread-safe.  The guard object itself must not be shared across
+ * threads without external synchronisation.
+ *
+ * @par Lifetime / ownership
+ * - If constructed with a specific @c ThreadRegistry&, that registry **must**
+ *   outlive this guard.
+ * - If constructed without an explicit registry, the global registry()
+ *   singleton is used, which has static storage duration.
+ *
+ * @par Typical usage
+ * @code
+ * void worker_func() {
+ *     threadschedule::AutoRegisterCurrentThread guard("worker", "pool");
+ *     // ... thread body ...
+ * }   // automatically unregistered here
+ * @endcode
+ *
+ * @par Caveats
+ * - Must be constructed **from** the thread it represents (delegates to
+ *   ThreadControlBlock::create_for_current_thread()).
+ * - On Linux, the name must be at most 15 characters (POSIX thread name
+ *   limit); longer names cause ThreadControlBlock::set_name() to fail, but
+ *   the thread is still registered.
+ */
 class AutoRegisterCurrentThread
 {
   public:
@@ -800,9 +1088,31 @@ class AutoRegisterCurrentThread
 } // namespace threadschedule
 
 #ifndef _WIN32
-// Helper: attach a TID to a cgroup directory (cgroup v2 tries cgroup.threads, then tasks, then cgroup.procs)
 namespace threadschedule
 {
+/**
+ * @brief Attaches a thread to a Linux cgroup by writing its TID to the
+ *        appropriate control file.
+ *
+ * Tries the following files inside @p cgroupDir, in order:
+ * 1. @c cgroup.threads (cgroup v2)
+ * 2. @c tasks (cgroup v1 / hybrid)
+ * 3. @c cgroup.procs (cgroup v2 process-level; works for single-threaded
+ *    workloads)
+ *
+ * The first file that can be opened and written to successfully is used.
+ *
+ * @param cgroupDir Absolute path to the target cgroup directory
+ *                  (e.g. @c "/sys/fs/cgroup/my_group").
+ * @param tid       OS-level thread ID to attach.
+ * @return Success, or @c std::errc::operation_not_permitted if none of the
+ *         candidate files could be written.
+ *
+ * @note Linux-only.  This function is not available on Windows builds.
+ * @note The calling process needs appropriate permissions (typically
+ *       @c CAP_SYS_ADMIN or ownership of the cgroup directory) to write
+ *       to cgroup control files.
+ */
 inline auto cgroup_attach_tid(std::string const& cgroupDir, Tid tid) -> expected<void, std::error_code>
 {
     std::vector<std::string> candidates = {"cgroup.threads", "tasks", "cgroup.procs"};
diff --git a/include/threadschedule/thread_wrapper.hpp b/include/threadschedule/thread_wrapper.hpp
index d680aa0..0f055dd 100644
--- a/include/threadschedule/thread_wrapper.hpp
+++ b/include/threadschedule/thread_wrapper.hpp
@@ -23,9 +23,11 @@ namespace threadschedule
 
 namespace detail
 {
+/** @brief Tag type selecting owning (value) storage in ThreadStorage. */
 struct OwningTag
 {
 };
+/** @brief Tag type selecting non-owning (pointer) storage in ThreadStorage. */
 struct NonOwningTag
 {
 };
@@ -33,7 +35,21 @@ struct NonOwningTag
 template <typename ThreadType, typename OwnershipTag>
 class ThreadStorage;
 
-// Owning storage: no extra overhead
+/**
+ * @brief Owning thread storage - holds the thread object by value.
+ *
+ * @tparam ThreadType The thread type (e.g. std::thread, std::jthread).
+ *
+ * Stores the thread object directly as a member, introducing zero indirection
+ * overhead beyond the thread object itself. This specialization is used by
+ * wrappers that own and manage the lifetime of their thread.
+ *
+ * @par Copyability
+ * Not copyable (deleted by the underlying thread type). Movable if @p ThreadType is movable.
+ *
+ * @par Thread Safety
+ * Not thread-safe. Access must be externally synchronized.
+ */
 template <typename ThreadType>
 class ThreadStorage<ThreadType, OwningTag>
 {
@@ -52,7 +68,23 @@ class ThreadStorage<ThreadType, OwningTag>
     ThreadType thread_;
 };
 
-// Non-owning storage: reference to external thread
+/**
+ * @brief Non-owning thread storage - holds a raw pointer to an external thread.
+ *
+ * @tparam ThreadType The thread type (e.g. std::thread, std::jthread).
+ *
+ * Stores a non-owning raw pointer to a thread object managed elsewhere.
+ * Does @b not join or detach on destruction.
+ *
+ * @warning The caller is responsible for ensuring the referenced thread object
+ *          outlives this storage instance. Dangling pointer access is undefined behavior.
+ *
+ * @par Copyability
+ * Trivially copyable (pointer copy). Multiple instances may alias the same thread.
+ *
+ * @par Thread Safety
+ * Not thread-safe. Access must be externally synchronized.
+ */
 template <typename ThreadType>
 class ThreadStorage<ThreadType, NonOwningTag>
 {
@@ -76,7 +108,56 @@ class ThreadStorage<ThreadType, NonOwningTag>
 } // namespace detail
 
 /**
- * @brief Base thread wrapper with common functionality
+ * @brief Polymorphic base providing common thread management operations.
+ *
+ * @tparam ThreadType    The underlying thread type (std::thread or std::jthread).
+ * @tparam OwnershipTag  detail::OwningTag (default) or detail::NonOwningTag.
+ *
+ * Provides a uniform interface for join, detach, naming, priority, affinity, scheduling
+ * policy, and nice-value control on top of any standard thread type. Derived classes
+ * (ThreadWrapper, JThreadWrapper, and their View counterparts) customize ownership
+ * semantics while inheriting all of these operations.
+ *
+ * @par Virtual Destructor
+ * Has a virtual destructor so it can be used as a polymorphic base.
+ *
+ * @par join() / detach()
+ * Both are safe to call even if the thread is not joinable (they check first).
+ *
+ * @par set_name()
+ * - **Linux**: uses @c pthread_setname_np; names are limited to 15 characters
+ *   (returns @c errc::invalid_argument if exceeded).
+ * - **Windows**: dynamically loads @c SetThreadDescription from kernel32.dll.
+ *   Names may be longer. Returns @c errc::function_not_supported if the API is
+ *   unavailable (pre-Windows 10 1607).
+ *
+ * @par set_priority()
+ * Maps through SchedulerParams::create_for_policy(). On Linux, uses
+ * @c pthread_setschedparam and may require @c CAP_SYS_NICE or root privileges
+ * for real-time policies. On Windows, maps to @c SetThreadPriority constants.
+ *
+ * @par set_scheduling_policy()
+ * Linux-specific concept; on Windows this falls back to set_priority().
+ *
+ * @par set_affinity()
+ * - **Linux**: @c pthread_setaffinity_np with @c cpu_set_t.
+ * - **Windows**: prefers @c SetThreadGroupAffinity (multi-processor-group aware)
+ *   and falls back to @c SetThreadAffinityMask on single-group systems.
+ *
+ * @par set_nice_value() / get_nice_value()
+ * @b Process-level operation - affects **all** threads in the process.
+ * On Linux calls @c setpriority(PRIO_PROCESS, ...).
+ * On Windows maps to @c SetPriorityClass / @c GetPriorityClass.
+ *
+ * @par Return Values
+ * All @c set_* methods (except set_nice_value) return
+ * @c expected<void, std::error_code>. Always check the return value;
+ * failures are silent unless inspected.
+ *
+ * @par Thread Safety
+ * Individual method calls are safe if the underlying OS call is safe, but
+ * concurrent mutation of the same wrapper from multiple threads is not
+ * synchronized internally.
  */
 template <typename ThreadType, typename OwnershipTag = detail::OwningTag>
 class BaseThreadWrapper : protected detail::ThreadStorage<ThreadType, OwnershipTag>
@@ -432,7 +513,40 @@ class BaseThreadWrapper : protected detail::ThreadStorage<ThreadType, OwnershipT
 };
 
 /**
- * @brief Enhanced std::thread wrapper
+ * @brief Owning wrapper around std::thread with RAII join-on-destroy semantics.
+ *
+ * Extends @ref BaseThreadWrapper to provide an owning, movable, non-copyable wrapper
+ * over @c std::thread. Adds automatic lifetime management: the destructor joins
+ * the thread if it is still joinable, which means destruction can @b block until
+ * the thread finishes.
+ *
+ * @par Copyability / Movability
+ * - **Not copyable** (copy constructor and copy assignment are deleted).
+ * - **Movable**. Move construction transfers ownership. Move @b assignment first
+ *   joins the currently held thread (blocking!) before taking ownership of the
+ *   source thread.
+ *
+ * @par Destruction
+ * The destructor calls @c join() if the thread is joinable. This will @b block
+ * the destroying thread until the managed thread completes. If blocking
+ * destruction is undesirable, call @c detach() or @c release() before the
+ * wrapper goes out of scope.
+ *
+ * @par release()
+ * Transfers ownership of the underlying @c std::thread out of the wrapper,
+ * returning it by value. After release, the wrapper holds a default-constructed
+ * (non-joinable) thread and destruction becomes a no-op.
+ *
+ * @par create_with_config()
+ * Factory that creates a thread and attempts to set its name and scheduling
+ * policy. Failures from @c set_name() or @c set_scheduling_policy() are
+ * silently ignored - the thread will still be running but may not have the
+ * requested attributes. Check attributes after construction if they are
+ * critical.
+ *
+ * @par Thread Safety
+ * Not thread-safe. A single ThreadWrapper must not be mutated concurrently
+ * from multiple threads.
  */
 class ThreadWrapper : public BaseThreadWrapper<std::thread, detail::OwningTag>
 {
@@ -508,7 +622,25 @@ class ThreadWrapper : public BaseThreadWrapper<std::thread, detail::OwningTag>
     }
 };
 
-// Non-owning view over std::thread
+/**
+ * @brief Non-owning view over an externally managed std::thread.
+ *
+ * Provides the full @ref BaseThreadWrapper interface (naming, priority, affinity, etc.)
+ * without taking ownership of the thread. The destructor is trivial - it does
+ * @b not join or detach.
+ *
+ * @warning The referenced @c std::thread must outlive this view. If the thread
+ *          object is destroyed or moved while a view still references it, all
+ *          subsequent operations through the view invoke undefined behavior.
+ *
+ * @par Copyability / Movability
+ * Implicitly copyable and movable (pointer semantics). Multiple views may
+ * alias the same thread.
+ *
+ * @par Thread Safety
+ * Same caveats as BaseThreadWrapper. Concurrent use of a view and direct use
+ * of the underlying thread must be externally synchronized.
+ */
 class ThreadWrapperView : public BaseThreadWrapper<std::thread, detail::NonOwningTag>
 {
   public:
@@ -528,7 +660,41 @@ class ThreadWrapperView : public BaseThreadWrapper<std::thread, detail::NonOwnin
 };
 
 /**
- * @brief Enhanced std::jthread wrapper (C++20)
+ * @brief Owning wrapper around std::jthread with cooperative cancellation (C++20).
+ *
+ * Analogous to @ref ThreadWrapper but wraps @c std::jthread, inheriting its built-in
+ * cooperative stop semantics. On destruction the underlying @c std::jthread
+ * automatically requests a stop and joins, so the destructor may @b block
+ * until the thread acknowledges the stop request and finishes.
+ *
+ * Exposes @c request_stop(), @c stop_requested(), @c get_stop_token(), and
+ * @c get_stop_source() for cooperative cancellation.
+ *
+ * @par Copyability / Movability
+ * - **Not copyable** (copy constructor and copy assignment are deleted).
+ * - **Movable**. Move assignment transfers ownership directly (the source
+ *   @c jthread's destructor handles its own cleanup).
+ *
+ * @par Destruction
+ * Delegates to @c std::jthread's destructor which calls @c request_stop()
+ * then @c join(). This will block until the managed thread finishes.
+ *
+ * @par release()
+ * Transfers ownership of the underlying @c std::jthread out of the wrapper.
+ *
+ * @par create_with_config()
+ * Factory that creates a jthread and attempts to set its name and scheduling
+ * policy. Failures from set_name() or set_scheduling_policy() are silently
+ * ignored.
+ *
+ * @par Pre-C++20 Fallback
+ * When compiled below C++20, @c JThreadWrapper is a type alias for
+ * @ref ThreadWrapper (which lacks stop-token support).
+ *
+ * @par Thread Safety
+ * Not thread-safe. A single JThreadWrapper must not be mutated concurrently
+ * from multiple threads. The stop token/source obtained from the wrapper are
+ * independently thread-safe per the standard.
  */
 #if __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
 class JThreadWrapper : public BaseThreadWrapper<std::jthread, detail::OwningTag>
@@ -611,7 +777,30 @@ class JThreadWrapper : public BaseThreadWrapper<std::jthread, detail::OwningTag>
     }
 };
 
-// Non-owning view over std::jthread (C++20)
+/**
+ * @brief Non-owning view over an externally managed std::jthread (C++20).
+ *
+ * Provides the full @ref BaseThreadWrapper interface plus jthread-specific cooperative
+ * cancellation methods (request_stop, stop_requested, get_stop_token,
+ * get_stop_source) without taking ownership. The destructor is trivial - it
+ * does @b not request a stop, join, or detach.
+ *
+ * @warning The referenced @c std::jthread must outlive this view. Accessing a
+ *          view after the underlying jthread has been destroyed or moved is
+ *          undefined behavior.
+ *
+ * @par Copyability / Movability
+ * Implicitly copyable and movable (pointer semantics). Multiple views may
+ * alias the same jthread.
+ *
+ * @par Pre-C++20 Fallback
+ * When compiled below C++20, @c JThreadWrapperView is a type alias for
+ * @ref ThreadWrapperView.
+ *
+ * @par Thread Safety
+ * Same caveats as BaseThreadWrapper. The stop token/source obtained from the
+ * view are independently thread-safe per the standard.
+ */
 class JThreadWrapperView : public BaseThreadWrapper<std::jthread, detail::NonOwningTag>
 {
   public:
@@ -652,6 +841,39 @@ using JThreadWrapper = ThreadWrapper;
 using JThreadWrapperView = ThreadWrapperView;
 #endif // C++20
 
+/**
+ * @brief Looks up an OS thread by its name via /proc and provides scheduling control.
+ *
+ * On construction, scans @c /proc/self/task/ to find a thread whose
+ * @c comm matches the given name. If found, the Linux TID is cached and
+ * subsequent calls operate on that TID via @c sched_setscheduler /
+ * @c sched_setaffinity (TID-based syscalls, @b not pthread_setschedparam).
+ *
+ * @par Platform Support
+ * - **Linux only**. On Windows every method is a no-op or returns
+ *   @c errc::function_not_supported, and found() always returns @c false.
+ *
+ * @par Snapshot Semantics
+ * The /proc scan happens once at construction time. If the target thread
+ * exits or changes its name after construction, this view becomes stale.
+ * There is no live tracking.
+ *
+ * @par Thread Name Limit
+ * Linux thread names are limited to 15 characters. Names longer than 15
+ * characters will never match, and set_name() rejects them.
+ *
+ * @par Scheduling
+ * Uses @c sched_setscheduler(tid, ...) rather than @c pthread_setschedparam().
+ * Changing real-time policies may require @c CAP_SYS_NICE.
+ *
+ * @par Copyability / Movability
+ * Trivially copyable and movable (stores only a TID/handle).
+ *
+ * @par Thread Safety
+ * Methods are safe to call concurrently from different threads as long as
+ * the target thread still exists, but the class itself provides no
+ * internal synchronization.
+ */
 class ThreadByNameView
 {
   public:
@@ -804,7 +1026,22 @@ class ThreadByNameView
 #endif
 };
 
-// Static hardware information
+/**
+ * @brief Static utility class providing hardware and scheduling introspection.
+ *
+ * All methods are static; the class holds no state and should not be instantiated.
+ *
+ * @par Provided Queries
+ * - @c hardware_concurrency() - delegates to @c std::thread::hardware_concurrency().
+ * - @c get_thread_id() - returns the OS-level thread ID (Linux TID via
+ *   @c syscall(SYS_gettid), Windows thread ID via @c GetCurrentThreadId()).
+ * - @c get_current_policy() - returns the calling thread's scheduling policy.
+ *   On Windows this always returns @c SchedulingPolicy::OTHER.
+ * - @c get_current_priority() - returns the calling thread's scheduling priority.
+ *
+ * @par Thread Safety
+ * All methods are thread-safe (they query per-thread or immutable system state).
+ */
 class ThreadInfo
 {
   public:
diff --git a/include/threadschedule/topology.hpp b/include/threadschedule/topology.hpp
index 7e298eb..76dfc90 100644
--- a/include/threadschedule/topology.hpp
+++ b/include/threadschedule/topology.hpp
@@ -26,18 +26,29 @@ namespace threadschedule
 
 /**
  * @brief Snapshot of basic CPU/NUMA topology.
+ *
+ * Value type (copyable). Populated by read_topology().
+ *
+ * - @c cpu_count: total logical CPUs (from @c std::thread::hardware_concurrency).
+ * - @c numa_nodes: number of NUMA nodes (always 1 on Windows; detected
+ *   via @c /sys/devices/system/node/ on Linux).
+ * - @c node_to_cpus: mapping from NUMA node index to the set of
+ *   logical CPU indices belonging to that node.
  */
 struct CpuTopology
 {
     int cpu_count{0};
     int numa_nodes{1};
-    // Mapping: node -> list of CPUs
     std::vector<std::vector<int>> node_to_cpus;
 };
 
 /**
  * @brief Discover basic topology. Linux: reads /sys for NUMA nodes.
  *        Windows: single node, sequential CPU indices.
+ *
+ * Called frequently by chaos/affinity helpers. The result is not
+ * cached internally -- consider caching the returned CpuTopology
+ * yourself if performance of repeated calls matters.
  */
 inline auto read_topology() -> CpuTopology
 {
@@ -129,10 +140,13 @@ inline auto read_topology() -> CpuTopology
 }
 
 /**
- * @brief Build a `ThreadAffinity` for the given NUMA node.
- * @param node_index NUMA node index (wraps if out of range)
- * @param thread_index Used to select CPU(s) within node
- * @param threads_per_node Optionally include multiple CPUs per thread
+ * @brief Build a ThreadAffinity for the given NUMA node.
+ *
+ * Calls read_topology() internally on every invocation (no caching).
+ *
+ * @param node_index       NUMA node index (wraps if out of range).
+ * @param thread_index     Used to select CPU(s) within the node.
+ * @param threads_per_node Number of CPUs to include per thread (default 1).
  */
 inline auto affinity_for_node(int node_index, int thread_index, int threads_per_node = 1) -> ThreadAffinity
 {
@@ -158,6 +172,12 @@ inline auto affinity_for_node(int node_index, int thread_index, int threads_per_
 
 /**
  * @brief Distribute thread affinities across NUMA nodes in round-robin order.
+ *
+ * Returns one ThreadAffinity per thread, cycling through NUMA nodes
+ * so that consecutive threads are spread across different nodes.
+ *
+ * @param num_threads Number of affinity masks to generate.
+ * @return Vector of @p num_threads ThreadAffinity objects.
  */
 inline auto distribute_affinities_by_numa(size_t num_threads) -> std::vector<ThreadAffinity>
 {