diff --git a/src/native/containers/CMakeLists.txt b/src/native/containers/CMakeLists.txt index a277e82e57afbb..17c842192f48c1 100644 --- a/src/native/containers/CMakeLists.txt +++ b/src/native/containers/CMakeLists.txt @@ -1,5 +1,6 @@ set(SOURCES dn-allocator.c + dn-ebr.c dn-fwd-list.c dn-list.c dn-queue.c @@ -23,11 +24,24 @@ configure_file(dn-config.h.in ${CMAKE_CURRENT_BINARY_DIR}/dn-config.h) target_include_directories(dn-containers PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(dn-containers PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries(dn-containers PUBLIC minipal) set_target_properties(dn-containers PROPERTIES CLR_CONTROL_FLOW_GUARD ON) target_include_directories(dn-containers-no-lto PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(dn-containers-no-lto PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_link_libraries(dn-containers-no-lto PUBLIC minipal) set_target_properties(dn-containers-no-lto PROPERTIES CLR_CONTROL_FLOW_GUARD ON INTERPROCEDURAL_OPTIMIZATION OFF) + +# Tests +add_executable(dn-ebr-stress tests/ebr-stress.c) +target_sources(dn-ebr-stress PRIVATE $) +target_link_libraries(dn-ebr-stress PRIVATE minipal) +target_include_directories(dn-ebr-stress PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(dn-ebr-bench tests/ebr-bench.c) +target_sources(dn-ebr-bench PRIVATE $) +target_link_libraries(dn-ebr-bench PRIVATE minipal) +target_include_directories(dn-ebr-bench PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/native/containers/dn-ebr.c b/src/native/containers/dn-ebr.c new file mode 100644 index 00000000000000..71d1cb0dc37d2a --- /dev/null +++ b/src/native/containers/dn-ebr.c @@ -0,0 +1,469 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "dn-ebr.h" +#include +#include +#include +#include +#include + +// ============================================ +// Internal Types +// ============================================ + +// Number of epoch slots (current, current-1, current-2) +#define DN_EBR_NUM_EPOCHS 3 + +// Forward declaration +typedef struct _dn_ebr_thread_data_t dn_ebr_thread_data_t; + +// Per-thread EBR state +struct _dn_ebr_thread_data_t { + dn_ebr_collector_t *collector; + uint32_t local_epoch; // UINT32_MAX indicates quiescent + uint32_t critical_region_depth; + dn_ebr_thread_data_t *next_in_collector; // Link in collector's thread list +}; + +// Pending deletion entry +typedef struct _dn_ebr_pending_t { + void *object; + const dn_ebr_deletion_traits_t *traits; + size_t estimated_size; + struct _dn_ebr_pending_t *next; +} dn_ebr_pending_t; + +// Collector structure now declared in header (dn-ebr.h) + +// ============================================ +// TLS Management (per-collector) +// ============================================ + +static void dn_ebr_tls_destructor (void *data); +static void dn_ebr_unregister_thread_internal (dn_ebr_collector_t *collector, dn_ebr_thread_data_t *thread_data); + +static bool +dn_ebr_tls_init (dn_ebr_collector_t *collector) +{ + if (!minipal_tls_key_create (&collector->tls_key, dn_ebr_tls_destructor)) { + collector->fatal_callback ("dn_ebr: TLS key create failed"); + return false; + } + collector->tls_initialized = true; + return true; +} + +static void +dn_ebr_tls_shutdown (dn_ebr_collector_t *collector) +{ + if (collector->tls_initialized) { + minipal_tls_key_delete (&collector->tls_key); + collector->tls_initialized = false; + } +} + +static dn_ebr_thread_data_t * +dn_ebr_tls_get (dn_ebr_collector_t *collector) +{ + if (!collector->tls_initialized) { + collector->fatal_callback ("dn_ebr: TLS get before initialization"); + return NULL; + } + return (dn_ebr_thread_data_t *)minipal_tls_get (&collector->tls_key); +} + +static bool +dn_ebr_tls_set (dn_ebr_collector_t *collector, dn_ebr_thread_data_t *thread_data) +{ + if (!collector->tls_initialized) { + collector->fatal_callback ("dn_ebr: TLS set before initialization"); + return false; + } + if (!minipal_tls_set (&collector->tls_key, thread_data)) { + collector->fatal_callback ("dn_ebr: TLS set failed"); + return false; + } + return true; +} + +// TLS destructor - called automatically on thread exit +static void +dn_ebr_tls_destructor (void *data) +{ + dn_ebr_thread_data_t *thread_data = (dn_ebr_thread_data_t *)data; + if (!thread_data) + return; + + dn_ebr_unregister_thread_internal (thread_data->collector, thread_data); +} + +// Get or create thread data for this collector +static dn_ebr_thread_data_t * +dn_ebr_get_thread_data (dn_ebr_collector_t *collector) +{ + dn_ebr_thread_data_t *thread_data = dn_ebr_tls_get (collector); + if (thread_data) + return thread_data; + + // Allocate new thread data + thread_data = (dn_ebr_thread_data_t *)dn_allocator_alloc (collector->allocator, sizeof (dn_ebr_thread_data_t)); + if (!thread_data) { + collector->fatal_callback ("dn_ebr: failed to allocate thread data"); + return NULL; + } + + memset (thread_data, 0, sizeof (dn_ebr_thread_data_t)); + thread_data->collector = collector; + thread_data->critical_region_depth = 0; + // Start quiescent until thread enters its first critical region + thread_data->local_epoch = UINT32_MAX; + + // Store in TLS + if (!dn_ebr_tls_set (collector, thread_data)) { + dn_allocator_free (collector->allocator, thread_data); + return NULL; + } + + // Add to collector's thread list + minipal_mutex_enter (&collector->thread_list_lock); + thread_data->next_in_collector = collector->thread_list_head; + collector->thread_list_head = thread_data; + minipal_mutex_leave (&collector->thread_list_lock); + + return thread_data; +} + +// Remove thread data from collector's list (called during unregister) +static void +dn_ebr_unregister_thread_internal (dn_ebr_collector_t *collector, dn_ebr_thread_data_t *thread_data) +{ + if (!thread_data) + return; + + // Remove from collector's thread list + minipal_mutex_enter (&collector->thread_list_lock); + + dn_ebr_thread_data_t **pp = &collector->thread_list_head; + while (*pp) { + if (*pp == thread_data) { + *pp = thread_data->next_in_collector; + break; + } + pp = &(*pp)->next_in_collector; + } + + minipal_mutex_leave (&collector->thread_list_lock); + + // Free the thread data + dn_allocator_free (collector->allocator, thread_data); +} + +// ============================================ +// Epoch Management +// ============================================ + +// Check if all threads have observed the current epoch +static bool +dn_ebr_can_advance_epoch (dn_ebr_collector_t *collector) +{ + uint32_t current_epoch = minipal_atomic_load_u32 (&collector->global_epoch); + + // Must hold thread_list_lock when calling this + dn_ebr_thread_data_t *thread_data = collector->thread_list_head; + while (thread_data) { + // Only active threads (in critical region) matter + // Ignore quiescent threads (local_epoch == UINT32_MAX) + if (thread_data->local_epoch != UINT32_MAX) { + // If any active thread hasn't observed the current epoch, we can't advance + if (thread_data->local_epoch != current_epoch) + return false; + } + thread_data = thread_data->next_in_collector; + } + + return true; +} + +// Try to advance the global epoch +static bool +dn_ebr_try_advance_epoch (dn_ebr_collector_t *collector) +{ + minipal_mutex_enter (&collector->thread_list_lock); + + bool can_advance = dn_ebr_can_advance_epoch (collector); + if (can_advance) { + // Keep epoch in range [0, DN_EBR_NUM_EPOCHS-1] + uint32_t new_epoch = (collector->global_epoch + 1) % DN_EBR_NUM_EPOCHS; + minipal_atomic_store_u32 (&collector->global_epoch, new_epoch); + } + + minipal_mutex_leave (&collector->thread_list_lock); + + return can_advance; +} + +// ============================================ +// Reclamation +// ============================================ + +// Delete all objects in a pending queue +static size_t +dn_ebr_drain_queue (dn_ebr_collector_t *collector, uint32_t slot) +{ + size_t freed_size = 0; + + dn_ebr_pending_t *pending = collector->pending_heads[slot]; + collector->pending_heads[slot] = NULL; + + while (pending) { + dn_ebr_pending_t *next = pending->next; + + // Delete the object + pending->traits->delete_object (pending->object); + freed_size += pending->estimated_size; + + // Free the pending entry + dn_allocator_free (collector->allocator, pending); + + pending = next; + } + + return freed_size; +} + +// Attempt to reclaim objects from old epochs +static void +dn_ebr_try_reclaim (dn_ebr_collector_t *collector) +{ + // Try to advance the epoch + // If successful, objects from 2 epochs ago are safe to delete + if (dn_ebr_try_advance_epoch (collector)) { + minipal_mutex_enter (&collector->pending_lock); + + uint32_t current_epoch = minipal_atomic_load_u32 (&collector->global_epoch); + // Objects retired at epoch E are safe when global epoch has advanced twice past E + // With epoch in [0, DN_EBR_NUM_EPOCHS-1], the safe slot is (current + 1) % DN_EBR_NUM_EPOCHS + // because that's the slot that was current 2 advances ago + uint32_t safe_slot = (current_epoch + 1) % DN_EBR_NUM_EPOCHS; + + size_t freed = dn_ebr_drain_queue (collector, safe_slot); + if (freed > 0) { + minipal_atomic_sub_size (&collector->pending_size, freed); + } + + minipal_mutex_leave (&collector->pending_lock); + } +} + +// ============================================ +// Public API Implementation +// ============================================ + +dn_ebr_collector_t * +dn_ebr_collector_init ( + dn_ebr_collector_t *collector, + size_t memory_budget, + dn_allocator_t *allocator, + dn_ebr_fatal_callback_t fatal_callback) +{ + if (!fatal_callback) { + // Can't report this error without a callback, just return NULL + return NULL; + } + + DN_ASSERT (collector); + + memset (collector, 0, sizeof (dn_ebr_collector_t)); + + collector->memory_budget = memory_budget; + collector->allocator = allocator; + collector->fatal_callback = fatal_callback; + collector->global_epoch = 0; + collector->pending_size = 0; + collector->thread_list_head = NULL; + collector->tls_initialized = false; + + // Initialize mutexes + if (!minipal_mutex_init (&collector->thread_list_lock)) { + fatal_callback ("dn_ebr: failed to initialize thread_list_lock"); + return NULL; + } + + if (!minipal_mutex_init (&collector->pending_lock)) { + minipal_mutex_destroy (&collector->thread_list_lock); + fatal_callback ("dn_ebr: failed to initialize pending_lock"); + return NULL; + } + + // Initialize TLS slot for this collector + if (!dn_ebr_tls_init (collector)) { + minipal_mutex_destroy (&collector->pending_lock); + minipal_mutex_destroy (&collector->thread_list_lock); + return NULL; + } + + for (uint32_t i = 0; i < DN_EBR_NUM_EPOCHS; i++) { + collector->pending_heads[i] = NULL; + } + + return collector; +} + +void +dn_ebr_collector_shutdown (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + // Drain all pending queues (force delete everything) + minipal_mutex_enter (&collector->pending_lock); + + for (uint32_t i = 0; i < DN_EBR_NUM_EPOCHS; i++) { + dn_ebr_drain_queue (collector, i); + } + + minipal_mutex_leave (&collector->pending_lock); + + // Shutdown TLS + dn_ebr_tls_shutdown (collector); + + // Destroy mutexes + minipal_mutex_destroy (&collector->pending_lock); + minipal_mutex_destroy (&collector->thread_list_lock); + + // Note: Threads should have been unregistered before shutdown + // Any remaining thread_data will be orphaned but the TLS destructor + // will no longer be called since we freed the TLS slot +} + +void +dn_ebr_register_thread (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + // This will create thread data if it doesn't exist + dn_ebr_get_thread_data (collector); +} + +void +dn_ebr_unregister_thread (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + dn_ebr_thread_data_t *thread_data = dn_ebr_tls_get (collector); + if (!thread_data) + return; + + // Clear TLS slot first (so destructor won't be called again) + dn_ebr_tls_set (collector, NULL); + + // Now unregister + dn_ebr_unregister_thread_internal (collector, thread_data); +} + +void +dn_ebr_enter_critical_region (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + dn_ebr_thread_data_t *thread_data = dn_ebr_get_thread_data (collector); + if (!thread_data) + return; + + thread_data->critical_region_depth++; + + // Only update epoch on outermost entry + if (thread_data->critical_region_depth == 1) { + thread_data->local_epoch = minipal_atomic_load_u32 (&collector->global_epoch); + } +} + +void +dn_ebr_exit_critical_region (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + dn_ebr_thread_data_t *thread_data = dn_ebr_tls_get (collector); + if (!thread_data) { + collector->fatal_callback ("dn_ebr: exit_critical_region called but thread not registered"); + return; + } + + if (thread_data->critical_region_depth == 0) { + collector->fatal_callback ("dn_ebr: exit_critical_region called without matching enter"); + return; + } + + thread_data->critical_region_depth--; + + // Only mark quiescent on outermost exit + if (thread_data->critical_region_depth == 0) { + thread_data->local_epoch = UINT32_MAX; + } +} + +void +dn_ebr_queue_for_deletion ( + dn_ebr_collector_t *collector, + void *object, + const dn_ebr_deletion_traits_t *traits) +{ + DN_ASSERT (collector); + + if (!object) { + collector->fatal_callback ("dn_ebr: queue_for_deletion called with NULL object"); + return; + } + + if (!traits || !traits->estimate_size || !traits->delete_object) { + collector->fatal_callback ("dn_ebr: queue_for_deletion called with invalid traits"); + return; + } + + // Must be in a critical region + dn_ebr_thread_data_t *thread_data = dn_ebr_tls_get (collector); + if (!thread_data || thread_data->critical_region_depth == 0) { + collector->fatal_callback ("dn_ebr: queue_for_deletion called outside critical region"); + return; + } + + // Allocate pending entry + dn_ebr_pending_t *pending = (dn_ebr_pending_t *)dn_allocator_alloc (collector->allocator, sizeof (dn_ebr_pending_t)); + if (!pending) { + collector->fatal_callback ("dn_ebr: failed to allocate pending entry"); + return; + } + + pending->object = object; + pending->traits = traits; + pending->estimated_size = traits->estimate_size (object); + pending->next = NULL; + + // Add to the appropriate epoch queue + minipal_mutex_enter (&collector->pending_lock); + + uint32_t current_epoch = minipal_atomic_load_u32 (&collector->global_epoch); + uint32_t slot = current_epoch; // epoch is already in [0, DN_EBR_NUM_EPOCHS-1] + + // Push-front into list; deletion order not required to be FIFO + pending->next = collector->pending_heads[slot]; + collector->pending_heads[slot] = pending; + + minipal_atomic_add_size (&collector->pending_size, pending->estimated_size); + + minipal_mutex_leave (&collector->pending_lock); + + // Check if we need to try reclamation + if (minipal_atomic_load_size (&collector->pending_size) > collector->memory_budget) { + dn_ebr_try_reclaim (collector); + } +} + +bool +dn_ebr_in_critical_region (dn_ebr_collector_t *collector) +{ + DN_ASSERT (collector); + + dn_ebr_thread_data_t *thread_data = dn_ebr_tls_get (collector); + return thread_data && thread_data->critical_region_depth > 0; +} diff --git a/src/native/containers/dn-ebr.h b/src/native/containers/dn-ebr.h new file mode 100644 index 00000000000000..5242f5729c6caa --- /dev/null +++ b/src/native/containers/dn-ebr.h @@ -0,0 +1,235 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// dn-ebr.h - Epoch-Based Reclamation for safe memory reclamation + +// Overview +// -------- +// Epoch-Based Reclamation (EBR) is a technique for safe, low-overhead memory +// reclamation in concurrent lock-free/read-heavy data structures. Threads that +// wish to access shared objects enter a critical region, during which they may +// read shared pointers without additional per-access synchronization. When an +// object is removed from a shared structure, it is not immediately freed. +// Instead, it is queued for deferred deletion and reclaimed only after all +// threads that could have had a reference (i.e., those that were in a critical +// region at the time of retirement) have subsequently passed through a +// quiescent state. +// +// Resources +// --------- +// - Keir Fraser, "Practical Lock-Freedom": +// https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf +// +// This implementation +// ------------------- +// - Per-collector epochs cycle over a small fixed range [0, DN_EBR_NUM_EPOCHS-1]. +// - Each thread calls dn_ebr_enter_critical_region() before reading shared state +// and dn_ebr_exit_critical_region() when finished. +// - When retiring an object, call dn_ebr_queue_for_deletion(). The object will be +// reclaimed after all threads have passed through a quiescent point. +// - A memory budget can be provided to trigger periodic reclamation. +// - A fatal error callback is required to report unrecoverable conditions +// (e.g., TLS/OS allocation failures or invalid arguments). +// +// +// Typical usage +// ------------- +// // At startup initialize a collector (likely a global singleton). The collector defines +// // the memory budget for pending deletions, the allocator used for internal bookkeeping, +// // and the fatal error callback. The collector allows up to budget bytes to be in a pending +// // deletion state before attempting reclamation. +// dn_ebr_collector_t storage; +// dn_ebr_collector_t *collector = dn_ebr_collector_init(&storage, /*budget*/ 1<<17, +// DN_DEFAULT_ALLOCATOR, fatal_cb); +// +// // For each object type that needs to have deferred deletion define traits that will +// // will estimate the object's memory size and free it. Objects can be arbitrarily complex +// // graphs using arbitrary allocators as long as provided function pointers handle them correctly. +// // Memory estimates don't have to be accurate, but those are the sizes that will be used to determine +// // when to trigger reclamation. +// typedef struct Node { /* ... */ } Node; +// +// static void node_delete(void *p) { free(p); } +// static size_t node_size(void *p) { (void)p; return sizeof(Node); } +// +// static const dn_ebr_deletion_traits_t node_traits = { +// .estimate_size = node_size, +// .delete_object = node_delete, +// }; +// +// // Instances of these objects are expected to have pointers stored in some shared location. +// // When threads want to access these objects safely they enter a critical region. The critical +// // region is not a lock - multiple threads can be in critical regions simultaneously. Threads +// // need to at least occasionally leave the critical region so that old objects can be reclaimed +// // but its fine to re-enter rapidly if need be. +// dn_ebr_enter_critical_region(collector); +// +// // read shared pointers safely, optionally swap and retire old pointers: +// // old = atomic_exchange(&g_shared, new); +// // if (old) dn_ebr_queue_for_deletion(collector, old, &node_traits); +// dn_ebr_exit_critical_region(collector); +// +// // At process shutdown, after threads quiesce we can clean up any remaining objects if desired +// dn_ebr_collector_shutdown(collector); +// +// +// Performance +// ----------- +// EBR is designed to be low-overhead for read-heavy workloads with infrequent +// updates. There is an ebr-bench microbenchmark app that can measure the critical region enter/exit +// overhead in the tests directory. On my local dev machine I'm seeing approximately 7ns per call pair +// and it could likely be improved a bit further if a scenario needed it. + +#ifndef __DN_EBR_H__ +#define __DN_EBR_H__ + +#include "dn-utils.h" +#include "dn-allocator.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================ +// Types +// ============================================ + +typedef struct _dn_ebr_collector_t dn_ebr_collector_t; +typedef struct _dn_ebr_deletion_traits_t dn_ebr_deletion_traits_t; + +// Callback to estimate the size of an object (for budget tracking) +typedef size_t (*dn_ebr_estimate_size_func_t)(void *object); + +// Callback to delete an object +typedef void (*dn_ebr_delete_func_t)(void *object); + +// Callback invoked on fatal errors (TLS allocation failure, bookkeeping allocation failure, +// OS API failures, or invalid arguments). After this callback returns, the operation that +// triggered the error will return without completing its work. +typedef void (*dn_ebr_fatal_callback_t)(const char *message); + +// Deletion traits - describes how to handle a particular type of retired object +// Instances must remain valid until all objects using them have been deleted +struct _dn_ebr_deletion_traits_t { + dn_ebr_estimate_size_func_t estimate_size; // Required, must not be NULL + dn_ebr_delete_func_t delete_object; // Required, must not be NULL +}; + +// ============================================ +// Internal types exposed for allocation needs +// ============================================ + +// Number of epoch slots +#define DN_EBR_NUM_EPOCHS 3 + +typedef struct _dn_ebr_thread_data_t dn_ebr_thread_data_t; + +struct _dn_ebr_collector_t { + // Configuration + size_t memory_budget; + dn_allocator_t *allocator; + dn_ebr_fatal_callback_t fatal_callback; + + // Per-collector TLS slot + minipal_tls_key tls_key; + bool tls_initialized; + + // Epoch management + volatile uint32_t global_epoch; + + // Thread tracking + minipal_mutex thread_list_lock; + dn_ebr_thread_data_t *thread_list_head; + + // Pending deletions - one list per epoch slot + minipal_mutex pending_lock; + struct _dn_ebr_pending_t *pending_heads[DN_EBR_NUM_EPOCHS]; + volatile size_t pending_size; +}; + +// ============================================ +// Collector Lifecycle +// ============================================ + +// Initialize an EBR collector in caller-provided memory +// memory_budget: target max size of pending deletions before attempting reclamation +// allocator: used only for internal bookkeeping (thread list, deletion queues); NULL for malloc/free +// fatal_callback: invoked on fatal errors; must not be NULL +// Returns collector on success, NULL on failure (after invoking fatal_callback) +dn_ebr_collector_t * +dn_ebr_collector_init ( + dn_ebr_collector_t *collector, + size_t memory_budget, + dn_allocator_t *allocator, + dn_ebr_fatal_callback_t fatal_callback); + +// Shutdown an EBR collector, releasing internal resources +// All threads should be unregistered before calling +// Deletes any remaining pending objects +void +dn_ebr_collector_shutdown (dn_ebr_collector_t *collector); + +// ============================================ +// Thread Registration +// ============================================ + +// Explicitly register the current thread with the collector +// Optional - threads are auto-registered on first dn_ebr_enter_critical_region +// Safe to call multiple times (subsequent calls are no-ops) +void +dn_ebr_register_thread (dn_ebr_collector_t *collector); + +// Unregister the current thread from the collector +// Called automatically on thread exit (via TLS destructor) on supported platforms +// May be called explicitly for early cleanup +// Must not be called from within a critical region +void +dn_ebr_unregister_thread (dn_ebr_collector_t *collector); + +// ============================================ +// Critical Region +// ============================================ + +// Enter a critical region +// While in a critical region, objects queued for deletion will not be freed +// Re-entrant: nested calls increment a counter; only outermost affects epoch +void +dn_ebr_enter_critical_region (dn_ebr_collector_t *collector); + +// Exit a critical region +// Must be paired with dn_ebr_enter_critical_region +// When exiting outermost region, thread becomes quiescent +void +dn_ebr_exit_critical_region (dn_ebr_collector_t *collector); + +// ============================================ +// Object Deletion +// ============================================ + +// Queue an object for deferred deletion +// Must be called from within a critical region +// The object will be deleted (via traits->delete_object) once all threads +// have passed through a quiescent state +// If memory budget is exceeded, attempts reclamation before returning +void +dn_ebr_queue_for_deletion ( + dn_ebr_collector_t *collector, + void *object, + const dn_ebr_deletion_traits_t *traits); + +// ============================================ +// Utilities +// ============================================ + +// Returns true if the current thread is in a critical region for this collector +bool +dn_ebr_in_critical_region (dn_ebr_collector_t *collector); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* __DN_EBR_H__ */ diff --git a/src/native/containers/tests/ebr-bench.c b/src/native/containers/tests/ebr-bench.c new file mode 100644 index 00000000000000..4df3612641815f --- /dev/null +++ b/src/native/containers/tests/ebr-bench.c @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include + +#include "../dn-ebr.h" +#include + +static void fatal_cb(const char *msg) +{ + fprintf(stderr, "FATAL: %s\n", msg); + fflush(stderr); + abort(); +} + +int main(int argc, char **argv) +{ + (void)argc; (void)argv; + + uint64_t iterations = 10000000ULL; // default 10M enter/exit pairs + if (argc >= 2) { + char *end = NULL; + uint64_t val = strtoull(argv[1], &end, 10); + if (end && *end == '\0' && val > 0) iterations = val; + } + + dn_ebr_collector_t collector_storage; + dn_ebr_collector_t *collector = dn_ebr_collector_init(&collector_storage, 1024 /*bytes*/, DN_DEFAULT_ALLOCATOR, fatal_cb); + if (!collector) { + fprintf(stderr, "Failed to init collector\n"); + return 2; + } + + // Warm up to initialize TLS/thread state + for (int i = 0; i < 10000; i++) { + dn_ebr_enter_critical_region(collector); + dn_ebr_exit_critical_region(collector); + } + + int64_t freq = minipal_hires_tick_frequency(); + int64_t start = minipal_hires_ticks(); + + for (uint64_t i = 0; i < iterations; i++) { + dn_ebr_enter_critical_region(collector); + dn_ebr_exit_critical_region(collector); + } + + int64_t end = minipal_hires_ticks(); + + double seconds = (double)(end - start) / (double)freq; + double pairs_per_sec = (double)iterations / (seconds > 0.0 ? seconds : 1.0); + double ns_per_pair = seconds * 1e9 / (double)iterations; + + printf("EBR enter/exit: iterations=%" PRIu64 ", seconds=%.6f, pairs/sec=%.0f, ns/pair=%.2f\n", + iterations, seconds, pairs_per_sec, ns_per_pair); + + dn_ebr_collector_shutdown(collector); + return 0; +} diff --git a/src/native/containers/tests/ebr-stress.c b/src/native/containers/tests/ebr-stress.c new file mode 100644 index 00000000000000..da093b3190ac94 --- /dev/null +++ b/src/native/containers/tests/ebr-stress.c @@ -0,0 +1,217 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#endif + +#include "../dn-ebr.h" +#include "../dn-vector.h" +#include +#include +#include + +typedef struct TestObj { + uint64_t id; + uint32_t checksum; + volatile uint32_t deleted; // set to 1 in delete callback +} TestObj; + +// Globals +#define SLOT_COUNT 8 +static void *g_slots[SLOT_COUNT] = {0}; +static volatile uint32_t g_allocations = 0; +static volatile uint32_t g_deletions_marked = 0; + +// Deferred free list to allow use-after-free detection window +typedef struct DeletedNode { void *ptr; struct DeletedNode *next; } DeletedNode; +static DeletedNode *g_deleted_head = NULL; +static dn_ebr_collector_t g_collector_storage_obj; +static dn_ebr_collector_t *g_collector = NULL; + +static size_t estimate_size_cb(void *obj) { + (void)obj; + return sizeof(TestObj); +} + +static void delete_cb(void *obj) { + TestObj *t = (TestObj*)obj; + t->deleted = 1; + // Track deleted objects in a list and eventually free + DeletedNode *node = (DeletedNode*)malloc(sizeof(DeletedNode)); + if (!node) abort(); + node->ptr = obj; + node->next = g_deleted_head; + g_deleted_head = node; + + // Count deletions marked + minipal_atomic_increment_u32(&g_deletions_marked); + + // When list grows large, free a batch to avoid indefinite leaks + // Free oldest ~1000 by popping from head repeatedly + size_t count = 0; + DeletedNode *iter = g_deleted_head; + while (iter) { count++; iter = iter->next; } + if (count > 1000) { + // Free 1000 nodes + for (size_t i = 0; i < 1000 && g_deleted_head; i++) { + DeletedNode *cur = g_deleted_head; + g_deleted_head = g_deleted_head->next; + free(cur->ptr); + free(cur); + } + } +} + +static const dn_ebr_deletion_traits_t g_traits = { + .estimate_size = estimate_size_cb, + .delete_object = delete_cb, +}; + +static void fatal_cb(const char *msg) { + fprintf(stderr, "FATAL: %s\n", msg); + fflush(stderr); + abort(); +} + +static TestObj * alloc_obj(uint64_t id) { + TestObj *t = (TestObj*)malloc(sizeof(TestObj)); + if (!t) abort(); + t->id = id; + t->checksum = (uint32_t)(id ^ 0xA5A5A5A5u); + t->deleted = 0; + minipal_atomic_increment_u32(&g_allocations); + return t; +} + +typedef struct ThreadArgs { + uint32_t seed; + uint32_t iterations; + uint32_t swaps_per_thousand; +} ThreadArgs; + +static +#ifdef _WIN32 +DWORD WINAPI +#else +void * +#endif +thread_func(void *arg) { + ThreadArgs *ta = (ThreadArgs*)arg; + // Initialize per-thread RNG + struct minipal_xoshiro128pp rng; + minipal_xoshiro128pp_init(&rng, ta->seed ^ (uint32_t)(uintptr_t)&rng); + + for (uint32_t it = 0; it < ta->iterations; it++) { + dn_ebr_enter_critical_region(g_collector); + + uint32_t r = minipal_xoshiro128pp_next(&rng); + uint32_t slot = (r >> 8) % SLOT_COUNT; + TestObj *obj = (TestObj*)minipal_atomic_load_ptr(&g_slots[slot]); + if (obj != NULL) { + // Detect use-after-delete by checking deleted flag + if (obj->deleted) { + fprintf(stderr, "Detected use-after-delete on object id=%llu\n", (unsigned long long)obj->id); + // Immediate failure: exit the process + exit(1); + } else { + // simulate some work + (void)(obj->checksum + (uint32_t)obj->id); + } + } + + // Occasionally swap in a new object and retire the old one + if ((r % 1000u) < ta->swaps_per_thousand) { + TestObj *new_obj = alloc_obj(((uint64_t)r << 16) ^ it); + void *old = minipal_atomic_exchange_ptr(&g_slots[slot], new_obj); + if (old) { + dn_ebr_queue_for_deletion(g_collector, old, &g_traits); + } + } + + dn_ebr_exit_critical_region(g_collector); + } + +#ifdef _WIN32 + return 0; +#else + return NULL; +#endif +} + +int main(int argc, char **argv) { + (void)argc; (void)argv; + + // Init EBR collector with small budget to encourage rapid cycling + g_collector = dn_ebr_collector_init(&g_collector_storage_obj, 100 /*bytes*/, DN_DEFAULT_ALLOCATOR, fatal_cb); + if (!g_collector) { fprintf(stderr, "failed to init collector\n"); return 2; } + + // Pre-populate slots + for (uint32_t i = 0; i < SLOT_COUNT; i++) { + g_slots[i] = alloc_obj(i + 1); + } + + const uint32_t thread_count = 8; + const uint32_t iterations = 5000000; + const uint32_t swaps_per_thousand = 5; // 0.5% swaps + +#ifdef _WIN32 + HANDLE threads[64]; +#else + pthread_t threads[64]; +#endif + ThreadArgs args[64]; + + for (uint32_t i = 0; i < thread_count; i++) { + args[i].seed = (uint32_t)(0xC001D00Du ^ (i * 2654435761u)); + args[i].iterations = iterations; + args[i].swaps_per_thousand = swaps_per_thousand; +#ifdef _WIN32 + threads[i] = CreateThread(NULL, 0, thread_func, &args[i], 0, NULL); + if (!threads[i]) { fprintf(stderr, "CreateThread failed\n"); return 2; } +#else + if (pthread_create(&threads[i], NULL, thread_func, &args[i]) != 0) { + fprintf(stderr, "pthread_create failed\n"); + return 2; + } +#endif + } + +#ifdef _WIN32 + WaitForMultipleObjects(thread_count, threads, TRUE, INFINITE); + for (uint32_t i = 0; i < thread_count; i++) CloseHandle(threads[i]); +#else + for (uint32_t i = 0; i < thread_count; i++) pthread_join(threads[i], NULL); +#endif + + // Validate deletion pace BEFORE shutdown so shutdown-queued items aren't counted + uint32_t allocs = g_allocations; + uint32_t dels = g_deletions_marked; + printf("Allocations=%u, DeletionsMarked=%u\n", allocs, dels); + if (allocs > dels + 30) { + fprintf(stderr, "EBR did not delete quickly enough (allocations exceed deletions by %u)\n", (allocs - dels)); + return 1; + } + + // Teardown after validation + dn_ebr_collector_shutdown(g_collector); + // Free any remaining deleted objects + while (g_deleted_head) { + DeletedNode *cur = g_deleted_head; + g_deleted_head = g_deleted_head->next; + free(cur->ptr); + free(cur); + } + + return 0; +} diff --git a/src/native/minipal/CMakeLists.txt b/src/native/minipal/CMakeLists.txt index d7f9ad5e2ab78a..50849563becc62 100644 --- a/src/native/minipal/CMakeLists.txt +++ b/src/native/minipal/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES descriptorlimit.c memorybarrierprocesswide.c mutex.c + tls.c guid.c random.c debugger.c diff --git a/src/native/minipal/atomic.h b/src/native/minipal/atomic.h new file mode 100644 index 00000000000000..af90da11d2603b --- /dev/null +++ b/src/native/minipal/atomic.h @@ -0,0 +1,115 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef HAVE_MINIPAL_ATOMIC_H +#define HAVE_MINIPAL_ATOMIC_H + +#include +#include +#include + +#ifdef HOST_WINDOWS +#include +#include +#else +// GCC/Clang +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +static inline uint32_t minipal_atomic_load_u32(const volatile uint32_t *ptr) +{ +#ifdef HOST_WINDOWS + return *(const volatile uint32_t *)ptr; +#else + return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); +#endif +} + +static inline void minipal_atomic_increment_u32(volatile uint32_t *ptr) +{ +#ifdef HOST_WINDOWS + InterlockedIncrement((volatile long*)ptr); +#else + __atomic_fetch_add(ptr, 1u, __ATOMIC_RELAXED); +#endif +} + +static inline void* minipal_atomic_load_ptr(void * volatile *ptr) +{ +#ifdef HOST_WINDOWS + return (void*)InterlockedCompareExchangePointer(ptr, NULL, NULL); +#else + return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); +#endif +} + +static inline void* minipal_atomic_exchange_ptr(void * volatile *ptr, void *value) +{ +#ifdef HOST_WINDOWS + return InterlockedExchangePointer(ptr, value); +#else + return __atomic_exchange_n(ptr, value, __ATOMIC_ACQ_REL); +#endif +} + +static inline void minipal_atomic_store_u32(volatile uint32_t *ptr, uint32_t value) +{ +#ifdef HOST_WINDOWS + *(volatile uint32_t *)ptr = value; +#else + __atomic_store_n(ptr, value, __ATOMIC_RELEASE); +#endif +} + +static inline size_t minipal_atomic_load_size(const volatile size_t *ptr) +{ +#ifdef HOST_WINDOWS + return *(const volatile size_t *)ptr; +#else + return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); +#endif +} + +static inline void minipal_atomic_store_size(volatile size_t *ptr, size_t value) +{ +#ifdef HOST_WINDOWS + *(volatile size_t *)ptr = value; +#else + __atomic_store_n(ptr, value, __ATOMIC_RELEASE); +#endif +} + +static inline size_t minipal_atomic_add_size(volatile size_t *ptr, size_t value) +{ +#ifdef HOST_WINDOWS +#if defined(_WIN64) + return (size_t)_InterlockedExchangeAdd64((volatile long long *)ptr, (long long)value); +#else + return (size_t)_InterlockedExchangeAdd((volatile long *)ptr, (long)value); +#endif +#else + return __atomic_fetch_add(ptr, value, __ATOMIC_ACQ_REL); +#endif +} + +static inline size_t minipal_atomic_sub_size(volatile size_t *ptr, size_t value) +{ +#ifdef HOST_WINDOWS +#if defined(_WIN64) + return (size_t)_InterlockedExchangeAdd64((volatile long long *)ptr, -(long long)value); +#else + return (size_t)_InterlockedExchangeAdd((volatile long *)ptr, -(long)value); +#endif +#else + return __atomic_fetch_sub(ptr, value, __ATOMIC_ACQ_REL); +#endif +} + +#ifdef __cplusplus +} +#endif + +#endif // HAVE_MINIPAL_ATOMIC_H diff --git a/src/native/minipal/tls.c b/src/native/minipal/tls.c new file mode 100644 index 00000000000000..4e84b545654dd5 --- /dev/null +++ b/src/native/minipal/tls.c @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "tls.h" +// All functionality is defined inline in the header for minimal overhead. diff --git a/src/native/minipal/tls.h b/src/native/minipal/tls.h new file mode 100644 index 00000000000000..9661064255a270 --- /dev/null +++ b/src/native/minipal/tls.h @@ -0,0 +1,83 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef HAVE_MINIPAL_TLS_H +#define HAVE_MINIPAL_TLS_H + +#include + +#ifdef HOST_WINDOWS +#include +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*minipal_tls_destructor_t)(void *); + +#ifdef HOST_WINDOWS + +typedef struct _minipal_tls_key { + DWORD key; +} minipal_tls_key; + +static inline bool minipal_tls_key_create(minipal_tls_key *k, minipal_tls_destructor_t dtor) +{ + k->key = FlsAlloc((PFLS_CALLBACK_FUNCTION)dtor); + return k->key != FLS_OUT_OF_INDEXES; +} + +static inline void minipal_tls_key_delete(minipal_tls_key *k) +{ + if (k->key != FLS_OUT_OF_INDEXES) { + FlsFree(k->key); + k->key = FLS_OUT_OF_INDEXES; + } +} + +static inline void * minipal_tls_get(minipal_tls_key *k) +{ + return FlsGetValue(k->key); +} + +static inline bool minipal_tls_set(minipal_tls_key *k, void *value) +{ + return FlsSetValue(k->key, value) != 0; +} + +#else + +typedef struct _minipal_tls_key { + pthread_key_t key; +} minipal_tls_key; + +static inline bool minipal_tls_key_create(minipal_tls_key *k, minipal_tls_destructor_t dtor) +{ + return pthread_key_create(&k->key, dtor) == 0; +} + +static inline void minipal_tls_key_delete(minipal_tls_key *k) +{ + pthread_key_delete(k->key); +} + +static inline void * minipal_tls_get(minipal_tls_key *k) +{ + return pthread_getspecific(k->key); +} + +static inline bool minipal_tls_set(minipal_tls_key *k, void *value) +{ + return pthread_setspecific(k->key, value) == 0; +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // HAVE_MINIPAL_TLS_H