Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions ddprof-lib/src/main/cpp/callTraceHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,24 +102,32 @@ CallTraceHashTable::~CallTraceHashTable() {
}


void CallTraceHashTable::clear() {
// Wait for all hazard pointers to clear before deallocation to prevent races
ChunkList CallTraceHashTable::clearTableOnly() {
// Wait for all hazard pointers to clear before detaching chunks
HazardPointer::waitForAllHazardPointersToClear();

// Clear previous chain pointers to prevent traversal during deallocation
for (LongHashTable *table = _table; table != nullptr; table = table->prev()) {
LongHashTable *prev_table = table->prev();
if (prev_table != nullptr) {
table->setPrev(nullptr); // Clear link before deallocation
}
}
// Now safe to deallocate all memory
_allocator.clear();
// Reinitialize with fresh table

// Detach chunks for deferred deallocation - keeps trace memory alive
ChunkList detached_chunks = _allocator.detachChunks();

// Reinitialize with fresh table (using the new chunk from detachChunks)
_table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator);
_overflow = 0;

return detached_chunks;
}

void CallTraceHashTable::clear() {
// Clear table and immediately free chunks (original behavior)
ChunkList chunks = clearTableOnly();
LinearAllocator::freeChunks(chunks);
}

// Adaptation of MurmurHash64A by Austin Appleby
Expand Down
13 changes: 13 additions & 0 deletions ddprof-lib/src/main/cpp/callTraceHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ class CallTraceHashTable {
~CallTraceHashTable();

void clear();

/**
* Resets the hash table structure but defers memory deallocation.
* Returns a ChunkList containing the detached memory chunks.
* The caller must call LinearAllocator::freeChunks() on the returned
* ChunkList after processing is complete.
*
* This is used to fix use-after-free in processTraces(): the table
* structure is reset immediately (allowing rotation), but trace memory
* remains valid until the processor finishes accessing it.
*/
ChunkList clearTableOnly();

void collect(std::unordered_set<CallTrace *> &traces, std::function<void(CallTrace*)> trace_hook = nullptr);

u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight);
Expand Down
40 changes: 25 additions & 15 deletions ddprof-lib/src/main/cpp/callTraceStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag

CallTraceStorage::~CallTraceStorage() {
// Atomically invalidate storage pointers to prevent new put() operations
// Relaxed ordering is sufficient since no concurrent readers exist during destruction
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED));
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED));
// ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load,
// ACQUIRE ensures we see the latest pointer value for subsequent deletion
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL));
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL));

// Wait for any ongoing hazard pointer usage to complete and delete each unique table
// Note: In triple-buffering, all three pointers should be unique, but check anyway
Expand Down Expand Up @@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
}
}

// PHASE 2: 8-Step Triple-Buffer Rotation
// PHASE 2: 10-Step Triple-Buffer Rotation

// Load storage pointers - relaxed ordering sufficient in single-threaded processTraces context
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED));
// Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from
// previous processTraces() calls and constructor initialization
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE));

// Clear process collection for reuse (no malloc/free)
_traces_buffer.clear();
Expand All @@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
}
});

// Step 3: Clear original_standby after collection -> it will become the new active
original_standby->clear();
// Step 3: Clear standby table structure but DEFER memory deallocation
// The standby traces are now in _traces_buffer as raw pointers.
// We must keep the underlying memory alive until processor() finishes.
// clearTableOnly() resets the table for reuse but returns the chunks for later freeing.
ChunkList standby_chunks = original_standby->clearTableOnly();

{
// Critical section for table swap operations - disallow signals to interrupt
Expand Down Expand Up @@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set

processor(_traces_buffer);

// Step 9: Clear the original active area (now scratch)
// Step 9: NOW safe to free standby chunks - processor is done accessing those traces
// This completes the deferred deallocation that prevents use-after-free
LinearAllocator::freeChunks(standby_chunks);

// Step 10: Clear the original active area (now scratch)
original_active->clear();

// Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations:
Expand All @@ -546,9 +555,10 @@ void CallTraceStorage::clear() {
// Mark critical section during clear operation for consistency
CriticalSection cs;

// Load current table pointers - relaxed ordering sufficient within critical section
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
// Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores
// from processTraces() rotation and constructor initialization
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));

// Direct clear operations with critical section protection
if (active) {
Expand Down
110 changes: 109 additions & 1 deletion ddprof-lib/src/main/cpp/linearAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@
#include "linearAllocator.h"
#include "counters.h"
#include "os_dd.h"
#include "common.h"
#include <stdio.h>

// Enable ASan memory poisoning for better use-after-free detection
#ifdef __has_feature
#if __has_feature(address_sanitizer)
#define ASAN_ENABLED 1
#endif
#endif

#ifdef __SANITIZE_ADDRESS__
#define ASAN_ENABLED 1
#endif

#ifdef ASAN_ENABLED
#include <sanitizer/asan_interface.h>
#endif

LinearAllocator::LinearAllocator(size_t chunk_size) {
_chunk_size = chunk_size;
Expand All @@ -32,13 +49,85 @@ void LinearAllocator::clear() {
if (_reserve->prev == _tail) {
freeChunk(_reserve);
}

// ASAN POISONING: Mark all allocated memory as poisoned BEFORE freeing chunks
// This catches use-after-free even when memory isn't munmap'd (kept in _tail)
#ifdef ASAN_ENABLED
int chunk_count = 0;
size_t total_poisoned = 0;
for (Chunk *chunk = _tail; chunk != NULL; chunk = chunk->prev) {
// Poison from the start of usable data to the current offset
size_t used_size = chunk->offs - sizeof(Chunk);
if (used_size > 0) {
void* data_start = (char*)chunk + sizeof(Chunk);
ASAN_POISON_MEMORY_REGION(data_start, used_size);
chunk_count++;
total_poisoned += used_size;
}
}
if (chunk_count > 0) {
TEST_LOG("[LinearAllocator::clear] ASan poisoned %d chunks, %zu bytes total", chunk_count, total_poisoned);
}
#endif

while (_tail->prev != NULL) {
Chunk *current = _tail;
_tail = _tail->prev;
freeChunk(current);
}
_reserve = _tail;
_tail->offs = sizeof(Chunk);

// DON'T UNPOISON HERE - let alloc() do it on-demand!
// This ensures ASan can catch use-after-free bugs when code accesses
// memory that was cleared but not yet reallocated.
}

ChunkList LinearAllocator::detachChunks() {
// Capture current state before detaching
ChunkList result(_tail, _chunk_size);

// Handle reserve chunk: if it's ahead of tail, it needs special handling
if (_reserve->prev == _tail) {
// Reserve is a separate chunk ahead of tail - it becomes part of detached list
// We need to include it in the chain by making it the new head
result.head = _reserve;
}

// Allocate a fresh chunk for new allocations
Chunk* fresh = allocateChunk(NULL);
if (fresh != NULL) {
_tail = fresh;
_reserve = fresh;
} else {
// Allocation failed - restore original state and return empty list
// This maintains the invariant that the allocator is always usable
_reserve = _tail;
_tail->offs = sizeof(Chunk);
return ChunkList();
}

return result;
}

void LinearAllocator::freeChunks(ChunkList& chunks) {
if (chunks.head == nullptr || chunks.chunk_size == 0) {
return;
}

// Walk the chain and free each chunk
Chunk* current = chunks.head;
while (current != nullptr) {
Chunk* prev = current->prev;
OS::safeFree(current, chunks.chunk_size);
Counters::decrement(LINEAR_ALLOCATOR_BYTES, chunks.chunk_size);
Counters::decrement(LINEAR_ALLOCATOR_CHUNKS);
current = prev;
}

// Mark as freed to prevent double-free
chunks.head = nullptr;
chunks.chunk_size = 0;
}

void *LinearAllocator::alloc(size_t size) {
Expand All @@ -49,11 +138,20 @@ void *LinearAllocator::alloc(size_t size) {
offs + size <= _chunk_size;
offs = __atomic_load_n(&chunk->offs, __ATOMIC_ACQUIRE)) {
if (__sync_bool_compare_and_swap(&chunk->offs, offs, offs + size)) {
void* allocated_ptr = (char *)chunk + offs;

// ASAN UNPOISONING: Unpoison ONLY the allocated region on-demand
// This allows ASan to detect use-after-free of memory that was cleared
// but not yet reallocated
#ifdef ASAN_ENABLED
ASAN_UNPOISON_MEMORY_REGION(allocated_ptr, size);
#endif

if (_chunk_size / 2 - offs < size) {
// Stepped over a middle of the chunk - it's time to prepare a new one
reserveChunk(chunk);
}
return (char *)chunk + offs;
return allocated_ptr;
}
}
} while ((chunk = getNextChunk(chunk)) != NULL);
Expand All @@ -66,6 +164,16 @@ Chunk *LinearAllocator::allocateChunk(Chunk *current) {
if (chunk != NULL) {
chunk->prev = current;
chunk->offs = sizeof(Chunk);

// ASAN UNPOISONING: New chunks from mmap are clean, unpoison them for use
// mmap returns memory that ASan may track as unallocated, so we need to
// explicitly unpoison it to allow allocations
#ifdef ASAN_ENABLED
size_t usable_size = _chunk_size - sizeof(Chunk);
void* data_start = (char*)chunk + sizeof(Chunk);
ASAN_UNPOISON_MEMORY_REGION(data_start, usable_size);
#endif

Counters::increment(LINEAR_ALLOCATOR_BYTES, _chunk_size);
Counters::increment(LINEAR_ALLOCATOR_CHUNKS);
}
Expand Down
29 changes: 29 additions & 0 deletions ddprof-lib/src/main/cpp/linearAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ struct Chunk {
char _padding[56];
};

/**
* Holds detached chunks from a LinearAllocator for deferred deallocation.
* Used to keep trace memory alive during processor execution while allowing
* the allocator to be reset for new allocations.
*/
struct ChunkList {
Chunk *head;
size_t chunk_size;

ChunkList() : head(nullptr), chunk_size(0) {}
ChunkList(Chunk *h, size_t sz) : head(h), chunk_size(sz) {}
};

class LinearAllocator {
private:
size_t _chunk_size;
Expand All @@ -43,6 +56,22 @@ class LinearAllocator {

void clear();

/**
* Detaches all chunks from this allocator, returning them as a ChunkList.
* The allocator is reset to an empty state with a fresh chunk for new allocations.
* The detached chunks remain allocated and valid until freeChunks() is called.
*
* This enables deferred deallocation: reset the allocator immediately while
* keeping old data alive until it's no longer needed.
*/
ChunkList detachChunks();

/**
* Frees all chunks in a previously detached ChunkList.
* Call this after processing is complete to deallocate the memory.
*/
static void freeChunks(ChunkList& chunks);

void *alloc(size_t size);
};

Expand Down
Loading
Loading