diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp index a0aa4979f..931da8626 100644 --- a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp +++ b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp @@ -102,10 +102,10 @@ CallTraceHashTable::~CallTraceHashTable() { } -void CallTraceHashTable::clear() { - // Wait for all hazard pointers to clear before deallocation to prevent races +ChunkList CallTraceHashTable::clearTableOnly() { + // Wait for all hazard pointers to clear before detaching chunks HazardPointer::waitForAllHazardPointersToClear(); - + // Clear previous chain pointers to prevent traversal during deallocation for (LongHashTable *table = _table; table != nullptr; table = table->prev()) { LongHashTable *prev_table = table->prev(); @@ -113,13 +113,21 @@ void CallTraceHashTable::clear() { table->setPrev(nullptr); // Clear link before deallocation } } - - // Now safe to deallocate all memory - _allocator.clear(); - - // Reinitialize with fresh table + + // Detach chunks for deferred deallocation - keeps trace memory alive + ChunkList detached_chunks = _allocator.detachChunks(); + + // Reinitialize with fresh table (using the new chunk from detachChunks) _table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator); _overflow = 0; + + return detached_chunks; +} + +void CallTraceHashTable::clear() { + // Clear table and immediately free chunks (original behavior) + ChunkList chunks = clearTableOnly(); + LinearAllocator::freeChunks(chunks); } // Adaptation of MurmurHash64A by Austin Appleby diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.h b/ddprof-lib/src/main/cpp/callTraceHashTable.h index 5232ded7e..368dd1f6e 100644 --- a/ddprof-lib/src/main/cpp/callTraceHashTable.h +++ b/ddprof-lib/src/main/cpp/callTraceHashTable.h @@ -79,6 +79,19 @@ class CallTraceHashTable { ~CallTraceHashTable(); void clear(); + + /** + * Resets the hash table structure but defers memory deallocation. + * Returns a ChunkList containing the detached memory chunks. + * The caller must call LinearAllocator::freeChunks() on the returned + * ChunkList after processing is complete. + * + * This is used to fix use-after-free in processTraces(): the table + * structure is reset immediately (allowing rotation), but trace memory + * remains valid until the processor finishes accessing it. + */ + ChunkList clearTableOnly(); + void collect(std::unordered_set &traces, std::function trace_hook = nullptr); u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight); diff --git a/ddprof-lib/src/main/cpp/callTraceStorage.cpp b/ddprof-lib/src/main/cpp/callTraceStorage.cpp index 586d6848a..4838cbee3 100644 --- a/ddprof-lib/src/main/cpp/callTraceStorage.cpp +++ b/ddprof-lib/src/main/cpp/callTraceStorage.cpp @@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag CallTraceStorage::~CallTraceStorage() { // Atomically invalidate storage pointers to prevent new put() operations - // Relaxed ordering is sufficient since no concurrent readers exist during destruction - CallTraceHashTable* active = const_cast(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED)); - CallTraceHashTable* standby = const_cast(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED)); - CallTraceHashTable* scratch = const_cast(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED)); + // ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load, + // ACQUIRE ensures we see the latest pointer value for subsequent deletion + CallTraceHashTable* active = const_cast(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL)); + CallTraceHashTable* standby = const_cast(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL)); + CallTraceHashTable* scratch = const_cast(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL)); // Wait for any ongoing hazard pointer usage to complete and delete each unique table // Note: In triple-buffering, all three pointers should be unique, but check anyway @@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED)); - CallTraceHashTable* original_standby = const_cast(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED)); - CallTraceHashTable* original_scratch = const_cast(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED)); + // Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from + // previous processTraces() calls and constructor initialization + CallTraceHashTable* original_active = const_cast(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE)); + CallTraceHashTable* original_standby = const_cast(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE)); + CallTraceHashTable* original_scratch = const_cast(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE)); // Clear process collection for reuse (no malloc/free) _traces_buffer.clear(); @@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function it will become the new active - original_standby->clear(); + // Step 3: Clear standby table structure but DEFER memory deallocation + // The standby traces are now in _traces_buffer as raw pointers. + // We must keep the underlying memory alive until processor() finishes. + // clearTableOnly() resets the table for reuse but returns the chunks for later freeing. + ChunkList standby_chunks = original_standby->clearTableOnly(); { // Critical section for table swap operations - disallow signals to interrupt @@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::functionclear(); // Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations: @@ -546,9 +555,10 @@ void CallTraceStorage::clear() { // Mark critical section during clear operation for consistency CriticalSection cs; - // Load current table pointers - relaxed ordering sufficient within critical section - CallTraceHashTable* active = const_cast(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED)); - CallTraceHashTable* standby = const_cast(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED)); + // Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores + // from processTraces() rotation and constructor initialization + CallTraceHashTable* active = const_cast(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE)); + CallTraceHashTable* standby = const_cast(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE)); // Direct clear operations with critical section protection if (active) { diff --git a/ddprof-lib/src/main/cpp/codeCache.cpp b/ddprof-lib/src/main/cpp/codeCache.cpp index 5fb17d964..14796d701 100644 --- a/ddprof-lib/src/main/cpp/codeCache.cpp +++ b/ddprof-lib/src/main/cpp/codeCache.cpp @@ -6,13 +6,15 @@ #include "codeCache.h" #include "dwarf_dd.h" #include "os_dd.h" + #include #include #include #include char *NativeFunc::create(const char *name, short lib_index) { - NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name)); + size_t size = align_up(sizeof(NativeFunc) + 1 + strlen(name), sizeof(NativeFunc*)); + NativeFunc *f = (NativeFunc *)aligned_alloc(sizeof(NativeFunc*), size); f->_lib_index = lib_index; f->_mark = 0; // cppcheck-suppress memleak diff --git a/ddprof-lib/src/main/cpp/codeCache.h b/ddprof-lib/src/main/cpp/codeCache.h index 233da56c1..3398ad029 100644 --- a/ddprof-lib/src/main/cpp/codeCache.h +++ b/ddprof-lib/src/main/cpp/codeCache.h @@ -6,6 +6,8 @@ #ifndef _CODECACHE_H #define _CODECACHE_H +#include "utils.h" + #include #include #include @@ -62,7 +64,7 @@ class NativeFunc { static short libIndex(const char *name) { NativeFunc* func = from(name); - if (posix_memalign((void**)(&func), sizeof(NativeFunc*), sizeof(NativeFunc)) != 0) { + if (!is_aligned(func, sizeof(func))) { return -1; } return func->_lib_index; diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp index ab39b2ca2..42c6023bd 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.cpp +++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp @@ -40,8 +40,6 @@ #include #include -static SpinLock _rec_lock(0); - static const char *const SETTING_RING[] = {NULL, "kernel", "user", "any"}; static const char *const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr"}; @@ -1484,62 +1482,57 @@ Error FlightRecorder::newRecording(bool reset) { return Error("Could not open Flight Recorder output file"); } - // Given some of reads are not protected by _rec_lock, - // we want to publish _rec with full fence, so that read - // side cannot see partially initialized recording. - Recording* tmp = new Recording(fd, _args); - __atomic_store_n(&_rec, tmp, __ATOMIC_SEQ_CST); + _rec = new Recording(fd, _args); return Error::OK; } void FlightRecorder::stop() { ExclusiveLockGuard locker(&_rec_lock); - if (_rec != NULL) { - volatile Recording *tmp = _rec; + Recording* rec = _rec; + if (rec != nullptr) { // NULL first, deallocate later - __atomic_store_n(&_rec, nullptr, __ATOMIC_RELAXED); - delete tmp; + _rec = nullptr; + delete rec; } } Error FlightRecorder::dump(const char *filename, const int length) { ExclusiveLockGuard locker(&_rec_lock); - if (_rec != NULL) { + Recording* rec = _rec; + if (rec != nullptr) { if (_filename.length() != length || strncmp(filename, _filename.c_str(), length) != 0) { // if the filename to dump the recording to is specified move the current // working file there int copy_fd = open(filename, O_CREAT | O_RDWR | O_TRUNC, 0644); - _rec->switchChunk(copy_fd); + rec->switchChunk(copy_fd); close(copy_fd); - } else { - return Error( - "Can not dump recording to itself. Provide a different file name!"); + return Error::OK; } - - return Error::OK; - } else { - return Error("No active recording"); + return Error( + "Can not dump recording to itself. Provide a different file name!"); } + return Error("No active recording"); } void FlightRecorder::flush() { ExclusiveLockGuard locker(&_rec_lock); - if (_rec != NULL) { - jvmtiEnv *jvmti = VM::jvmti(); - JNIEnv *env = VM::jni(); + Recording* rec = _rec; + if (rec != nullptr) { + jvmtiEnv* jvmti = VM::jvmti(); + JNIEnv* env = VM::jni(); - jclass **classes = NULL; + jclass** classes = NULL; jint count = 0; // obtaining the class list will create local refs to all loaded classes, // effectively preventing them from being unloaded while flushing jvmtiError err = jvmti->GetLoadedClasses(&count, classes); - _rec->switchChunk(-1); + rec->switchChunk(-1); if (!err) { // deallocate all loaded classes for (int i = 0; i < count; i++) { - env->DeleteLocalRef((jobject)classes[i]); - jvmti->Deallocate((unsigned char *)classes[i]); + env->DeleteLocalRef((jobject) classes[i]); + jvmti->Deallocate((unsigned char*) classes[i]); } } } @@ -1547,96 +1540,119 @@ void FlightRecorder::flush() { void FlightRecorder::wallClockEpoch(int lock_index, WallClockEpochEvent *event) { - if (_rec != NULL) { - Buffer *buf = _rec->buffer(lock_index); - _rec->recordWallClockEpoch(buf, event); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->recordWallClockEpoch(buf, event); + } } } void FlightRecorder::recordTraceRoot(int lock_index, int tid, TraceRootEvent *event) { - if (_rec != NULL) { - Buffer *buf = _rec->buffer(lock_index); - _rec->recordTraceRoot(buf, tid, event); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->recordTraceRoot(buf, tid, event); + } } } void FlightRecorder::recordQueueTime(int lock_index, int tid, QueueTimeEvent *event) { - if (_rec != NULL) { - Buffer *buf = _rec->buffer(lock_index); - _rec->recordQueueTime(buf, tid, event); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->recordQueueTime(buf, tid, event); + } } } void FlightRecorder::recordDatadogSetting(int lock_index, int length, const char *name, const char *value, const char *unit) { - if (_rec != NULL) { - Buffer *buf = _rec->buffer(lock_index); - _rec->writeDatadogSetting(buf, length, name, value, unit); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->writeDatadogSetting(buf, length, name, value, unit); + } } } void FlightRecorder::recordHeapUsage(int lock_index, long value, bool live) { - if (_rec != NULL) { - Buffer *buf = _rec->buffer(lock_index); - _rec->writeHeapUsage(buf, value, live); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + Buffer *buf = rec->buffer(lock_index); + rec->writeHeapUsage(buf, value, live); + } } } void FlightRecorder::recordEvent(int lock_index, int tid, u64 call_trace_id, int event_type, Event *event) { - if (_rec != NULL) { - RecordingBuffer *buf = _rec->buffer(lock_index); - switch (event_type) { - case 0: - _rec->recordExecutionSample(buf, tid, call_trace_id, + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + RecordingBuffer *buf = rec->buffer(lock_index); + switch (event_type) { + case 0: + rec->recordExecutionSample(buf, tid, call_trace_id, + (ExecutionEvent *)event); + break; + case BCI_WALL: + rec->recordMethodSample(buf, tid, call_trace_id, (ExecutionEvent *)event); - break; - case BCI_WALL: - _rec->recordMethodSample(buf, tid, call_trace_id, - (ExecutionEvent *)event); - break; - case BCI_ALLOC: - _rec->recordAllocation(buf, tid, call_trace_id, (AllocEvent *)event); - break; - case BCI_LIVENESS: - _rec->recordHeapLiveObject(buf, tid, call_trace_id, - (ObjectLivenessEvent *)event); - break; - case BCI_LOCK: - _rec->recordMonitorBlocked(buf, tid, call_trace_id, (LockEvent *)event); - break; - case BCI_PARK: - _rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent *)event); - break; - } - _rec->flushIfNeeded(buf); - _rec->addThread(lock_index, tid); + break; + case BCI_ALLOC: + rec->recordAllocation(buf, tid, call_trace_id, (AllocEvent *)event); + break; + case BCI_LIVENESS: + rec->recordHeapLiveObject(buf, tid, call_trace_id, + (ObjectLivenessEvent *)event); + break; + case BCI_LOCK: + rec->recordMonitorBlocked(buf, tid, call_trace_id, (LockEvent *)event); + break; + case BCI_PARK: + rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent *)event); + break; + } + rec->flushIfNeeded(buf); + rec->addThread(lock_index, tid); + } } } void FlightRecorder::recordLog(LogLevel level, const char *message, size_t len) { - if (!_rec_lock.tryLockShared()) { - // No active recording - return; - } - - if (len > MAX_STRING_LENGTH) - len = MAX_STRING_LENGTH; - // cppcheck-suppress obsoleteFunctions - Buffer *buf = (Buffer *)alloca(len + 40); - buf->reset(); + OptionalSharedLockGuard locker(&_rec_lock); + if (locker.ownsLock()) { + Recording* rec = _rec; + if (rec != nullptr) { + if (len > MAX_STRING_LENGTH) + len = MAX_STRING_LENGTH; + // cppcheck-suppress obsoleteFunctions + Buffer *buf = (Buffer *)alloca(len + 40); + buf->reset(); - int start = buf->skip(5); - buf->putVar64(T_LOG); - buf->putVar64(TSC::ticks()); - buf->putVar64(level); - buf->putUtf8(message, len); - buf->putVar32(start, buf->offset() - start); - _rec->flush(buf); - - _rec_lock.unlockShared(); + int start = buf->skip(5); + buf->putVar64(T_LOG); + buf->putVar64(TSC::ticks()); + buf->putVar64(level); + buf->putUtf8(message, len); + buf->putVar32(start, buf->offset() - start); + _rec->flush(buf); + } + } } diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h index 7e119ad4f..c5bc067b4 100644 --- a/ddprof-lib/src/main/cpp/flightRecorder.h +++ b/ddprof-lib/src/main/cpp/flightRecorder.h @@ -291,7 +291,9 @@ class FlightRecorder { private: std::string _filename; Arguments _args; - Recording* volatile _rec; + + SpinLock _rec_lock; + Recording* _rec; Error newRecording(bool reset); diff --git a/ddprof-lib/src/main/cpp/linearAllocator.cpp b/ddprof-lib/src/main/cpp/linearAllocator.cpp index 275829e61..a48a7edcf 100644 --- a/ddprof-lib/src/main/cpp/linearAllocator.cpp +++ b/ddprof-lib/src/main/cpp/linearAllocator.cpp @@ -17,6 +17,23 @@ #include "linearAllocator.h" #include "counters.h" #include "os_dd.h" +#include "common.h" +#include + +// Enable ASan memory poisoning for better use-after-free detection +#ifdef __has_feature + #if __has_feature(address_sanitizer) + #define ASAN_ENABLED 1 + #endif +#endif + +#ifdef __SANITIZE_ADDRESS__ + #define ASAN_ENABLED 1 +#endif + +#ifdef ASAN_ENABLED + #include +#endif LinearAllocator::LinearAllocator(size_t chunk_size) { _chunk_size = chunk_size; @@ -32,6 +49,27 @@ void LinearAllocator::clear() { if (_reserve->prev == _tail) { freeChunk(_reserve); } + + // ASAN POISONING: Mark all allocated memory as poisoned BEFORE freeing chunks + // This catches use-after-free even when memory isn't munmap'd (kept in _tail) + #ifdef ASAN_ENABLED + int chunk_count = 0; + size_t total_poisoned = 0; + for (Chunk *chunk = _tail; chunk != NULL; chunk = chunk->prev) { + // Poison from the start of usable data to the current offset + size_t used_size = chunk->offs - sizeof(Chunk); + if (used_size > 0) { + void* data_start = (char*)chunk + sizeof(Chunk); + ASAN_POISON_MEMORY_REGION(data_start, used_size); + chunk_count++; + total_poisoned += used_size; + } + } + if (chunk_count > 0) { + TEST_LOG("[LinearAllocator::clear] ASan poisoned %d chunks, %zu bytes total", chunk_count, total_poisoned); + } + #endif + while (_tail->prev != NULL) { Chunk *current = _tail; _tail = _tail->prev; @@ -39,6 +77,57 @@ void LinearAllocator::clear() { } _reserve = _tail; _tail->offs = sizeof(Chunk); + + // DON'T UNPOISON HERE - let alloc() do it on-demand! + // This ensures ASan can catch use-after-free bugs when code accesses + // memory that was cleared but not yet reallocated. +} + +ChunkList LinearAllocator::detachChunks() { + // Capture current state before detaching + ChunkList result(_tail, _chunk_size); + + // Handle reserve chunk: if it's ahead of tail, it needs special handling + if (_reserve->prev == _tail) { + // Reserve is a separate chunk ahead of tail - it becomes part of detached list + // We need to include it in the chain by making it the new head + result.head = _reserve; + } + + // Allocate a fresh chunk for new allocations + Chunk* fresh = allocateChunk(NULL); + if (fresh != NULL) { + _tail = fresh; + _reserve = fresh; + } else { + // Allocation failed - restore original state and return empty list + // This maintains the invariant that the allocator is always usable + _reserve = _tail; + _tail->offs = sizeof(Chunk); + return ChunkList(); + } + + return result; +} + +void LinearAllocator::freeChunks(ChunkList& chunks) { + if (chunks.head == nullptr || chunks.chunk_size == 0) { + return; + } + + // Walk the chain and free each chunk + Chunk* current = chunks.head; + while (current != nullptr) { + Chunk* prev = current->prev; + OS::safeFree(current, chunks.chunk_size); + Counters::decrement(LINEAR_ALLOCATOR_BYTES, chunks.chunk_size); + Counters::decrement(LINEAR_ALLOCATOR_CHUNKS); + current = prev; + } + + // Mark as freed to prevent double-free + chunks.head = nullptr; + chunks.chunk_size = 0; } void *LinearAllocator::alloc(size_t size) { @@ -49,11 +138,20 @@ void *LinearAllocator::alloc(size_t size) { offs + size <= _chunk_size; offs = __atomic_load_n(&chunk->offs, __ATOMIC_ACQUIRE)) { if (__sync_bool_compare_and_swap(&chunk->offs, offs, offs + size)) { + void* allocated_ptr = (char *)chunk + offs; + + // ASAN UNPOISONING: Unpoison ONLY the allocated region on-demand + // This allows ASan to detect use-after-free of memory that was cleared + // but not yet reallocated + #ifdef ASAN_ENABLED + ASAN_UNPOISON_MEMORY_REGION(allocated_ptr, size); + #endif + if (_chunk_size / 2 - offs < size) { // Stepped over a middle of the chunk - it's time to prepare a new one reserveChunk(chunk); } - return (char *)chunk + offs; + return allocated_ptr; } } } while ((chunk = getNextChunk(chunk)) != NULL); @@ -66,6 +164,16 @@ Chunk *LinearAllocator::allocateChunk(Chunk *current) { if (chunk != NULL) { chunk->prev = current; chunk->offs = sizeof(Chunk); + + // ASAN UNPOISONING: New chunks from mmap are clean, unpoison them for use + // mmap returns memory that ASan may track as unallocated, so we need to + // explicitly unpoison it to allow allocations + #ifdef ASAN_ENABLED + size_t usable_size = _chunk_size - sizeof(Chunk); + void* data_start = (char*)chunk + sizeof(Chunk); + ASAN_UNPOISON_MEMORY_REGION(data_start, usable_size); + #endif + Counters::increment(LINEAR_ALLOCATOR_BYTES, _chunk_size); Counters::increment(LINEAR_ALLOCATOR_CHUNKS); } diff --git a/ddprof-lib/src/main/cpp/linearAllocator.h b/ddprof-lib/src/main/cpp/linearAllocator.h index a680d0738..8d201dd17 100644 --- a/ddprof-lib/src/main/cpp/linearAllocator.h +++ b/ddprof-lib/src/main/cpp/linearAllocator.h @@ -26,6 +26,19 @@ struct Chunk { char _padding[56]; }; +/** + * Holds detached chunks from a LinearAllocator for deferred deallocation. + * Used to keep trace memory alive during processor execution while allowing + * the allocator to be reset for new allocations. + */ +struct ChunkList { + Chunk *head; + size_t chunk_size; + + ChunkList() : head(nullptr), chunk_size(0) {} + ChunkList(Chunk *h, size_t sz) : head(h), chunk_size(sz) {} +}; + class LinearAllocator { private: size_t _chunk_size; @@ -43,6 +56,22 @@ class LinearAllocator { void clear(); + /** + * Detaches all chunks from this allocator, returning them as a ChunkList. + * The allocator is reset to an empty state with a fresh chunk for new allocations. + * The detached chunks remain allocated and valid until freeChunks() is called. + * + * This enables deferred deallocation: reset the allocator immediately while + * keeping old data alive until it's no longer needed. + */ + ChunkList detachChunks(); + + /** + * Frees all chunks in a previously detached ChunkList. + * Call this after processing is complete to deallocate the memory. + */ + static void freeChunks(ChunkList& chunks); + void *alloc(size_t size); }; diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index 56e34006e..144b9289d 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -45,11 +45,11 @@ class SafeAccess { static bool handle_safefetch(int sig, void* context); - static inline void *load(void **ptr) { + static inline void *load(void **ptr, void* default_value = nullptr) { return loadPtr(ptr, nullptr); } - static inline u32 load32(u32 *ptr, u32 default_value) { + static inline u32 load32(u32 *ptr, u32 default_value = 0) { int res = safefetch32_impl((int*)ptr, (int)default_value); return static_cast(res); } diff --git a/ddprof-lib/src/main/cpp/spinLock.h b/ddprof-lib/src/main/cpp/spinLock.h index 0b82fc55d..36df20c06 100644 --- a/ddprof-lib/src/main/cpp/spinLock.h +++ b/ddprof-lib/src/main/cpp/spinLock.h @@ -30,7 +30,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { volatile int _lock; char _padding[DEFAULT_CACHE_LINE_SIZE - sizeof(_lock)]; public: - constexpr SpinLock(int initial_state = 0) : _lock(initial_state), _padding() { + explicit constexpr SpinLock(int initial_state = 0) : _lock(initial_state), _padding() { static_assert(sizeof(SpinLock) == DEFAULT_CACHE_LINE_SIZE); } @@ -48,8 +48,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { bool tryLockShared() { int value; - // we use relaxed as the compare already offers the guarantees we need - while ((value = __atomic_load_n(&_lock, __ATOMIC_RELAXED)) <= 0) { + while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) <= 0) { if (__sync_bool_compare_and_swap(&_lock, value, value - 1)) { return true; } @@ -59,7 +58,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock { void lockShared() { int value; - while ((value = __atomic_load_n(&_lock, __ATOMIC_RELAXED)) > 0 || + while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) > 0 || !__sync_bool_compare_and_swap(&_lock, value, value - 1)) { spinPause(); } @@ -86,6 +85,29 @@ class SharedLockGuard { SharedLockGuard& operator=(SharedLockGuard&&) = delete; }; +class OptionalSharedLockGuard { + SpinLock* _lock; +public: + OptionalSharedLockGuard(SpinLock* lock) : _lock(lock) { + if (!_lock->tryLockShared()) { + // Locking failed, no need to unlock. + _lock = nullptr; + } + } + ~OptionalSharedLockGuard() { + if (_lock != nullptr) { + _lock->unlockShared(); + } + } + bool ownsLock() { return _lock != nullptr; } + + // Non-copyable and non-movable + OptionalSharedLockGuard(const OptionalSharedLockGuard&) = delete; + OptionalSharedLockGuard& operator=(const OptionalSharedLockGuard&) = delete; + OptionalSharedLockGuard(OptionalSharedLockGuard&&) = delete; + OptionalSharedLockGuard& operator=(OptionalSharedLockGuard&&) = delete; +}; + class ExclusiveLockGuard { private: SpinLock* _lock; diff --git a/ddprof-lib/src/main/cpp/utils.h b/ddprof-lib/src/main/cpp/utils.h new file mode 100644 index 000000000..47d58eb96 --- /dev/null +++ b/ddprof-lib/src/main/cpp/utils.h @@ -0,0 +1,35 @@ +#ifndef _UTILS_H +#define _UTILS_H + +#include +#include +#include + +inline bool is_power_of_2(size_t size) { + return size > 0 && (size & (size - 1)) == 0; +} + +template +inline bool is_aligned(const T* ptr, size_t alignment) noexcept { + assert(is_power_of_2(alignment)); + // Convert the pointer to an integer type + auto iptr = reinterpret_cast(ptr); + + // Check if the integer value is a multiple of the alignment + return (iptr & ~(alignment - 1) == 0); +} + +inline size_t align_down(size_t size, size_t alignment) noexcept { + assert(is_power_of_2(alignment)); + return size & ~(alignment - 1); +} + +inline size_t align_up(size_t size, size_t alignment) noexcept { + assert(is_power_of_2(alignment)); + return align_down(size + alignment - 1, alignment); +} + + + + +#endif // _UTILS_H \ No newline at end of file diff --git a/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp b/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp index 70c532007..aed48352b 100644 --- a/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp +++ b/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp @@ -495,4 +495,134 @@ TEST_F(CallTraceStorageTest, HazardPointerSynchronizationDuringSwap) { EXPECT_GT(final_trace_count.load(), 0) << "Storage should still contain traces after synchronization test"; } +/** + * Reproducer test for use-after-free bug in processTraces(). + * + * BUG DESCRIPTION: + * In processTraces(), traces are collected from standby into _traces_buffer (raw pointers), + * then standby->clear() frees all memory, but processor(_traces_buffer) is called AFTER + * the clear, accessing freed memory. + * + * Timeline of the bug: + * 1. original_standby->collect(_traces_buffer, ...) - collects raw pointers from STANDBY + * 2. original_standby->clear() - FREES ALL MEMORY including CallTrace objects! + * 3. processor(_traces_buffer) - accesses freed memory (USE-AFTER-FREE) + * + * KEY INSIGHT: The bug only affects traces from STANDBY, not ACTIVE. + * - Active traces are cleared AFTER the processor runs (safe) + * - Standby traces are cleared BEFORE the processor runs (BUG!) + * + * To trigger the bug, we need traces IN STANDBY at the start of processTraces(). + * Traces get into standby via the liveness preservation mechanism: + * 1. Register a liveness checker that marks traces as "live" + * 2. During processTraces(), live traces are copied to SCRATCH + * 3. After rotation, SCRATCH becomes the new STANDBY + * 4. Next processTraces() will have those preserved traces in STANDBY + * 5. Those traces are collected, then FREED, then ACCESSED → USE-AFTER-FREE! + * + * This test should FAIL (crash or ASan error) before the fix and PASS after. + */ +TEST_F(CallTraceStorageTest, UseAfterFreeInProcessTraces) { + // Create multiple traces with varying frame counts to increase memory footprint + const int NUM_TRACES = 100; + const int MAX_FRAMES = 20; + + std::vector trace_ids; + trace_ids.reserve(NUM_TRACES); + + // Create traces with multiple frames to use more memory + for (int i = 0; i < NUM_TRACES; i++) { + std::vector frames(MAX_FRAMES); + for (int j = 0; j < MAX_FRAMES; j++) { + frames[j].bci = i * 1000 + j; + frames[j].method_id = (jmethodID)(0x10000 + i * 100 + j); + } + + u64 trace_id = storage->put(MAX_FRAMES, frames.data(), false, 1); + ASSERT_GT(trace_id, 0) << "Failed to store trace " << i; + trace_ids.push_back(trace_id); + } + + // CRITICAL: Register a liveness checker that preserves ALL traces. + // This causes traces to be copied to SCRATCH during processTraces(). + // After rotation, SCRATCH becomes STANDBY, so the NEXT processTraces() + // will have these traces in STANDBY where the bug manifests. + storage->registerLivenessChecker([&trace_ids](std::unordered_set& buffer) { + for (u64 id : trace_ids) { + buffer.insert(id); + } + }); + + // First processTraces: traces are in ACTIVE, get collected and preserved to SCRATCH. + // After rotation: SCRATCH becomes STANDBY (now contains preserved traces) + int first_count = 0; + storage->processTraces([&first_count](const std::unordered_set& traces) { + first_count = traces.size(); + printf("First processTraces: %d traces collected\n", first_count); + }); + EXPECT_GT(first_count, NUM_TRACES) << "First processTraces should collect all traces"; + + // Second processTraces: THIS IS WHERE THE BUG OCCURS! + // 1. STANDBY now contains the preserved traces (from first call's scratch) + // 2. Standby traces are collected into _traces_buffer (raw pointers) + // 3. original_standby->clear() - FREES the trace memory! + // 4. processor(_traces_buffer) - accesses FREED memory (USE-AFTER-FREE!) + storage->processTraces([&](const std::unordered_set& traces) { + int total_frames = 0; + int total_bci_sum = 0; + int trace_count = 0; + + for (CallTrace* trace : traces) { + if (trace == nullptr) continue; + if (trace->trace_id == CallTraceStorage::DROPPED_TRACE_ID) continue; + + trace_count++; + // Deep access to detect use-after-free: + // After standby->clear(), this memory is FREED but we're accessing it! + // ASan will catch this. Without ASan, we might read garbage. + EXPECT_GE(trace->num_frames, 1) << "Corrupted num_frames (use-after-free?)"; + EXPECT_LE(trace->num_frames, MAX_FRAMES) << "Corrupted num_frames (use-after-free?)"; + EXPECT_FALSE(trace->truncated) << "Corrupted truncated flag (use-after-free?)"; + EXPECT_GT(trace->trace_id, 0) << "Corrupted trace_id (use-after-free?)"; + + total_frames += trace->num_frames; + + // Access every frame - this maximizes chance of detecting corruption + for (int i = 0; i < trace->num_frames; i++) { + int bci = trace->frames[i].bci; + total_bci_sum += bci; + EXPECT_GE(bci, 0) << "Corrupted BCI at frame " << i << " (use-after-free?)"; + + jmethodID method = trace->frames[i].method_id; + EXPECT_NE(method, nullptr) << "Null method_id at frame " << i << " (use-after-free?)"; + } + } + + printf("Second processTraces: %d traces, %d total frames, bci_sum=%d\n", + trace_count, total_frames, total_bci_sum); + + // This is the key assertion: we expect to find the preserved traces + // If the bug exists, we're reading freed memory here! + EXPECT_GE(trace_count, NUM_TRACES) << "Should find preserved traces from standby"; + }); + + // Third processTraces: traces should still be preserved (copied to new scratch) + // This further exercises the use-after-free if the bug exists + storage->processTraces([&](const std::unordered_set& traces) { + int trace_count = 0; + for (CallTrace* trace : traces) { + if (trace == nullptr) continue; + if (trace->trace_id == CallTraceStorage::DROPPED_TRACE_ID) continue; + trace_count++; + + // Access all frame data + volatile int sum = 0; + for (int i = 0; i < trace->num_frames; i++) { + sum += trace->frames[i].bci; + } + } + printf("Third processTraces: %d traces\n", trace_count); + EXPECT_GE(trace_count, NUM_TRACES) << "Should still find preserved traces"; + }); +}