Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions ddprof-lib/src/main/cpp/callTraceHashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,24 +102,32 @@ CallTraceHashTable::~CallTraceHashTable() {
}


void CallTraceHashTable::clear() {
// Wait for all hazard pointers to clear before deallocation to prevent races
ChunkList CallTraceHashTable::clearTableOnly() {
// Wait for all hazard pointers to clear before detaching chunks
HazardPointer::waitForAllHazardPointersToClear();

// Clear previous chain pointers to prevent traversal during deallocation
for (LongHashTable *table = _table; table != nullptr; table = table->prev()) {
LongHashTable *prev_table = table->prev();
if (prev_table != nullptr) {
table->setPrev(nullptr); // Clear link before deallocation
}
}
// Now safe to deallocate all memory
_allocator.clear();
// Reinitialize with fresh table

// Detach chunks for deferred deallocation - keeps trace memory alive
ChunkList detached_chunks = _allocator.detachChunks();

// Reinitialize with fresh table (using the new chunk from detachChunks)
_table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator);
_overflow = 0;

return detached_chunks;
}

void CallTraceHashTable::clear() {
// Clear table and immediately free chunks (original behavior)
ChunkList chunks = clearTableOnly();
LinearAllocator::freeChunks(chunks);
}

// Adaptation of MurmurHash64A by Austin Appleby
Expand Down
13 changes: 13 additions & 0 deletions ddprof-lib/src/main/cpp/callTraceHashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ class CallTraceHashTable {
~CallTraceHashTable();

void clear();

/**
* Resets the hash table structure but defers memory deallocation.
* Returns a ChunkList containing the detached memory chunks.
* The caller must call LinearAllocator::freeChunks() on the returned
* ChunkList after processing is complete.
*
* This is used to fix use-after-free in processTraces(): the table
* structure is reset immediately (allowing rotation), but trace memory
* remains valid until the processor finishes accessing it.
*/
ChunkList clearTableOnly();

void collect(std::unordered_set<CallTrace *> &traces, std::function<void(CallTrace*)> trace_hook = nullptr);

u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight);
Expand Down
40 changes: 25 additions & 15 deletions ddprof-lib/src/main/cpp/callTraceStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag

CallTraceStorage::~CallTraceStorage() {
// Atomically invalidate storage pointers to prevent new put() operations
// Relaxed ordering is sufficient since no concurrent readers exist during destruction
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED));
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED));
// ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load,
// ACQUIRE ensures we see the latest pointer value for subsequent deletion
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL));
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL));

// Wait for any ongoing hazard pointer usage to complete and delete each unique table
// Note: In triple-buffering, all three pointers should be unique, but check anyway
Expand Down Expand Up @@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
}
}

// PHASE 2: 8-Step Triple-Buffer Rotation
// PHASE 2: 10-Step Triple-Buffer Rotation

// Load storage pointers - relaxed ordering sufficient in single-threaded processTraces context
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED));
// Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from
// previous processTraces() calls and constructor initialization
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE));

// Clear process collection for reuse (no malloc/free)
_traces_buffer.clear();
Expand All @@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
}
});

// Step 3: Clear original_standby after collection -> it will become the new active
original_standby->clear();
// Step 3: Clear standby table structure but DEFER memory deallocation
// The standby traces are now in _traces_buffer as raw pointers.
// We must keep the underlying memory alive until processor() finishes.
// clearTableOnly() resets the table for reuse but returns the chunks for later freeing.
ChunkList standby_chunks = original_standby->clearTableOnly();

{
// Critical section for table swap operations - disallow signals to interrupt
Expand Down Expand Up @@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set

processor(_traces_buffer);

// Step 9: Clear the original active area (now scratch)
// Step 9: NOW safe to free standby chunks - processor is done accessing those traces
// This completes the deferred deallocation that prevents use-after-free
LinearAllocator::freeChunks(standby_chunks);

// Step 10: Clear the original active area (now scratch)
original_active->clear();

// Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations:
Expand All @@ -546,9 +555,10 @@ void CallTraceStorage::clear() {
// Mark critical section during clear operation for consistency
CriticalSection cs;

// Load current table pointers - relaxed ordering sufficient within critical section
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
// Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores
// from processTraces() rotation and constructor initialization
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));

// Direct clear operations with critical section protection
if (active) {
Expand Down
4 changes: 3 additions & 1 deletion ddprof-lib/src/main/cpp/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
#include "codeCache.h"
#include "dwarf_dd.h"
#include "os_dd.h"

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>

char *NativeFunc::create(const char *name, short lib_index) {
NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
size_t size = align_up(sizeof(NativeFunc) + 1 + strlen(name), sizeof(NativeFunc*));
NativeFunc *f = (NativeFunc *)aligned_alloc(sizeof(NativeFunc*), size);
f->_lib_index = lib_index;
f->_mark = 0;
// cppcheck-suppress memleak
Expand Down
4 changes: 3 additions & 1 deletion ddprof-lib/src/main/cpp/codeCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#ifndef _CODECACHE_H
#define _CODECACHE_H

#include "utils.h"

#include <jvmti.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -62,7 +64,7 @@ class NativeFunc {

static short libIndex(const char *name) {
NativeFunc* func = from(name);
if (posix_memalign((void**)(&func), sizeof(NativeFunc*), sizeof(NativeFunc)) != 0) {
if (!is_aligned(func, sizeof(func))) {
return -1;
}
return func->_lib_index;
Expand Down
Loading
Loading