diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp
index a0aa4979f..931da8626 100644
--- a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp
+++ b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp
@@ -102,10 +102,10 @@ CallTraceHashTable::~CallTraceHashTable() {
 }
 
 
-void CallTraceHashTable::clear() {
-  // Wait for all hazard pointers to clear before deallocation to prevent races
+ChunkList CallTraceHashTable::clearTableOnly() {
+  // Wait for all hazard pointers to clear before detaching chunks
   HazardPointer::waitForAllHazardPointersToClear();
-  
+
   // Clear previous chain pointers to prevent traversal during deallocation
   for (LongHashTable *table = _table; table != nullptr; table = table->prev()) {
     LongHashTable *prev_table = table->prev();
@@ -113,13 +113,21 @@ void CallTraceHashTable::clear() {
       table->setPrev(nullptr);  // Clear link before deallocation
     }
   }
-  
-  // Now safe to deallocate all memory
-  _allocator.clear();
-  
-  // Reinitialize with fresh table
+
+  // Detach chunks for deferred deallocation - keeps trace memory alive
+  ChunkList detached_chunks = _allocator.detachChunks();
+
+  // Reinitialize with fresh table (using the new chunk from detachChunks)
   _table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator);
   _overflow = 0;
+
+  return detached_chunks;
+}
+
+void CallTraceHashTable::clear() {
+  // Clear table and immediately free chunks (original behavior)
+  ChunkList chunks = clearTableOnly();
+  LinearAllocator::freeChunks(chunks);
 }
 
 // Adaptation of MurmurHash64A by Austin Appleby
diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.h b/ddprof-lib/src/main/cpp/callTraceHashTable.h
index 5232ded7e..368dd1f6e 100644
--- a/ddprof-lib/src/main/cpp/callTraceHashTable.h
+++ b/ddprof-lib/src/main/cpp/callTraceHashTable.h
@@ -79,6 +79,19 @@ class CallTraceHashTable {
   ~CallTraceHashTable();
 
   void clear();
+
+  /**
+   * Resets the hash table structure but defers memory deallocation.
+   * Returns a ChunkList containing the detached memory chunks.
+   * The caller must call LinearAllocator::freeChunks() on the returned
+   * ChunkList after processing is complete.
+   *
+   * This is used to fix use-after-free in processTraces(): the table
+   * structure is reset immediately (allowing rotation), but trace memory
+   * remains valid until the processor finishes accessing it.
+   */
+  ChunkList clearTableOnly();
+
   void collect(std::unordered_set<CallTrace *> &traces, std::function<void(CallTrace*)> trace_hook = nullptr);
 
   u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight);
diff --git a/ddprof-lib/src/main/cpp/callTraceStorage.cpp b/ddprof-lib/src/main/cpp/callTraceStorage.cpp
index 586d6848a..4838cbee3 100644
--- a/ddprof-lib/src/main/cpp/callTraceStorage.cpp
+++ b/ddprof-lib/src/main/cpp/callTraceStorage.cpp
@@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag
 
 CallTraceStorage::~CallTraceStorage() {
     // Atomically invalidate storage pointers to prevent new put() operations
-    // Relaxed ordering is sufficient since no concurrent readers exist during destruction
-    CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED));
-    CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED));
-    CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED));
+    // ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load,
+    // ACQUIRE ensures we see the latest pointer value for subsequent deletion
+    CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL));
+    CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL));
+    CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL));
 
     // Wait for any ongoing hazard pointer usage to complete and delete each unique table
     // Note: In triple-buffering, all three pointers should be unique, but check anyway
@@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
         }
     }
 
-    // PHASE 2: 8-Step Triple-Buffer Rotation
+    // PHASE 2: 10-Step Triple-Buffer Rotation
 
-    // Load storage pointers - relaxed ordering sufficient in single-threaded processTraces context
-    CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
-    CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
-    CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED));
+    // Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from
+    // previous processTraces() calls and constructor initialization
+    CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
+    CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
+    CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE));
 
     // Clear process collection for reuse (no malloc/free)
     _traces_buffer.clear();
@@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
         }
     });
 
-    // Step 3: Clear original_standby after collection -> it will become the new active
-    original_standby->clear();
+    // Step 3: Clear standby table structure but DEFER memory deallocation
+    // The standby traces are now in _traces_buffer as raw pointers.
+    // We must keep the underlying memory alive until processor() finishes.
+    // clearTableOnly() resets the table for reuse but returns the chunks for later freeing.
+    ChunkList standby_chunks = original_standby->clearTableOnly();
 
     {
         // Critical section for table swap operations - disallow signals to interrupt
@@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
 
     processor(_traces_buffer);
 
-    // Step 9: Clear the original active area (now scratch)
+    // Step 9: NOW safe to free standby chunks - processor is done accessing those traces
+    // This completes the deferred deallocation that prevents use-after-free
+    LinearAllocator::freeChunks(standby_chunks);
+
+    // Step 10: Clear the original active area (now scratch)
     original_active->clear();
     
     // Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations:
@@ -546,9 +555,10 @@ void CallTraceStorage::clear() {
     // Mark critical section during clear operation for consistency
     CriticalSection cs;
 
-    // Load current table pointers - relaxed ordering sufficient within critical section
-    CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
-    CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
+    // Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores
+    // from processTraces() rotation and constructor initialization
+    CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
+    CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
     
     // Direct clear operations with critical section protection
     if (active) {
diff --git a/ddprof-lib/src/main/cpp/codeCache.cpp b/ddprof-lib/src/main/cpp/codeCache.cpp
index 5fb17d964..14796d701 100644
--- a/ddprof-lib/src/main/cpp/codeCache.cpp
+++ b/ddprof-lib/src/main/cpp/codeCache.cpp
@@ -6,13 +6,15 @@
 #include "codeCache.h"
 #include "dwarf_dd.h"
 #include "os_dd.h"
+
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
 
 char *NativeFunc::create(const char *name, short lib_index) {
-  NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
+  size_t size = align_up(sizeof(NativeFunc) + 1 + strlen(name), sizeof(NativeFunc*));
+  NativeFunc *f = (NativeFunc *)aligned_alloc(sizeof(NativeFunc*), size);
   f->_lib_index = lib_index;
   f->_mark = 0;
   // cppcheck-suppress memleak
diff --git a/ddprof-lib/src/main/cpp/codeCache.h b/ddprof-lib/src/main/cpp/codeCache.h
index 233da56c1..3398ad029 100644
--- a/ddprof-lib/src/main/cpp/codeCache.h
+++ b/ddprof-lib/src/main/cpp/codeCache.h
@@ -6,6 +6,8 @@
 #ifndef _CODECACHE_H
 #define _CODECACHE_H
 
+#include "utils.h"
+
 #include <jvmti.h>
 #include <stdlib.h>
 #include <string.h>
@@ -62,7 +64,7 @@ class NativeFunc {
 
   static short libIndex(const char *name) {
     NativeFunc* func = from(name);
-    if (posix_memalign((void**)(&func), sizeof(NativeFunc*), sizeof(NativeFunc)) != 0) {
+    if (!is_aligned(func, sizeof(func))) {
       return -1;
     }
     return func->_lib_index;
diff --git a/ddprof-lib/src/main/cpp/flightRecorder.cpp b/ddprof-lib/src/main/cpp/flightRecorder.cpp
index ab39b2ca2..42c6023bd 100644
--- a/ddprof-lib/src/main/cpp/flightRecorder.cpp
+++ b/ddprof-lib/src/main/cpp/flightRecorder.cpp
@@ -40,8 +40,6 @@
 #include <vector>
 #include <unistd.h>
 
-static SpinLock _rec_lock(0);
-
 static const char *const SETTING_RING[] = {NULL, "kernel", "user", "any"};
 static const char *const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr"};
 
@@ -1484,62 +1482,57 @@ Error FlightRecorder::newRecording(bool reset) {
     return Error("Could not open Flight Recorder output file");
   }
 
-  // Given some of reads are not protected by _rec_lock,
-  // we want to publish _rec with full fence, so that read
-  // side cannot see partially initialized recording.
-  Recording* tmp = new Recording(fd, _args);
-  __atomic_store_n(&_rec, tmp, __ATOMIC_SEQ_CST);
+  _rec = new Recording(fd, _args);
   return Error::OK;
 }
 
 void FlightRecorder::stop() {
   ExclusiveLockGuard locker(&_rec_lock);
-  if (_rec != NULL) {
-    volatile Recording *tmp = _rec;
+  Recording* rec = _rec;
+  if (rec != nullptr) {
     // NULL first, deallocate later
-    __atomic_store_n(&_rec, nullptr, __ATOMIC_RELAXED);
-    delete tmp;
+    _rec = nullptr;
+    delete rec;
   }
 }
 
 Error FlightRecorder::dump(const char *filename, const int length) {
   ExclusiveLockGuard locker(&_rec_lock);
-  if (_rec != NULL) {
+  Recording* rec = _rec;
+  if (rec != nullptr) {
     if (_filename.length() != length ||
         strncmp(filename, _filename.c_str(), length) != 0) {
       // if the filename to dump the recording to is specified move the current
       // working file there
       int copy_fd = open(filename, O_CREAT | O_RDWR | O_TRUNC, 0644);
-      _rec->switchChunk(copy_fd);
+      rec->switchChunk(copy_fd);
       close(copy_fd);
-    } else {
-      return Error(
-          "Can not dump recording to itself. Provide a different file name!");
+      return Error::OK;
     }
-
-    return Error::OK;
-  } else {
-    return Error("No active recording");
+    return Error(
+      "Can not dump recording to itself. Provide a different file name!");
   }
+  return Error("No active recording");
 }
 
 void FlightRecorder::flush() {
   ExclusiveLockGuard locker(&_rec_lock);
-  if (_rec != NULL) {
-    jvmtiEnv *jvmti = VM::jvmti();
-    JNIEnv *env = VM::jni();
+  Recording* rec = _rec;
+  if (rec != nullptr) {
+    jvmtiEnv* jvmti = VM::jvmti();
+    JNIEnv* env = VM::jni();
 
-    jclass **classes = NULL;
+    jclass** classes = NULL;
     jint count = 0;
     // obtaining the class list will create local refs to all loaded classes,
     // effectively preventing them from being unloaded while flushing
     jvmtiError err = jvmti->GetLoadedClasses(&count, classes);
-    _rec->switchChunk(-1);
+    rec->switchChunk(-1);
     if (!err) {
       // deallocate all loaded classes
       for (int i = 0; i < count; i++) {
-        env->DeleteLocalRef((jobject)classes[i]);
-        jvmti->Deallocate((unsigned char *)classes[i]);
+        env->DeleteLocalRef((jobject) classes[i]);
+        jvmti->Deallocate((unsigned char*) classes[i]);
       }
     }
   }
@@ -1547,96 +1540,119 @@ void FlightRecorder::flush() {
 
 void FlightRecorder::wallClockEpoch(int lock_index,
                                     WallClockEpochEvent *event) {
-  if (_rec != NULL) {
-    Buffer *buf = _rec->buffer(lock_index);
-    _rec->recordWallClockEpoch(buf, event);
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      Buffer *buf = rec->buffer(lock_index);
+      rec->recordWallClockEpoch(buf, event);
+    }
   }
 }
 
 void FlightRecorder::recordTraceRoot(int lock_index, int tid,
                                      TraceRootEvent *event) {
-  if (_rec != NULL) {
-    Buffer *buf = _rec->buffer(lock_index);
-    _rec->recordTraceRoot(buf, tid, event);
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      Buffer *buf = rec->buffer(lock_index);
+      rec->recordTraceRoot(buf, tid, event);
+    }
   }
 }
 
 void FlightRecorder::recordQueueTime(int lock_index, int tid,
                                      QueueTimeEvent *event) {
-  if (_rec != NULL) {
-    Buffer *buf = _rec->buffer(lock_index);
-    _rec->recordQueueTime(buf, tid, event);
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      Buffer *buf = rec->buffer(lock_index);
+      rec->recordQueueTime(buf, tid, event);
+    }
   }
 }
 
 void FlightRecorder::recordDatadogSetting(int lock_index, int length,
                                           const char *name, const char *value,
                                           const char *unit) {
-  if (_rec != NULL) {
-    Buffer *buf = _rec->buffer(lock_index);
-    _rec->writeDatadogSetting(buf, length, name, value, unit);
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      Buffer *buf = rec->buffer(lock_index);
+      rec->writeDatadogSetting(buf, length, name, value, unit);
+    }
   }
 }
 
 void FlightRecorder::recordHeapUsage(int lock_index, long value, bool live) {
-  if (_rec != NULL) {
-    Buffer *buf = _rec->buffer(lock_index);
-    _rec->writeHeapUsage(buf, value, live);
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      Buffer *buf = rec->buffer(lock_index);
+      rec->writeHeapUsage(buf, value, live);
+    }
   }
 }
 
 void FlightRecorder::recordEvent(int lock_index, int tid, u64 call_trace_id,
                                  int event_type, Event *event) {
-  if (_rec != NULL) {
-    RecordingBuffer *buf = _rec->buffer(lock_index);
-    switch (event_type) {
-    case 0:
-      _rec->recordExecutionSample(buf, tid, call_trace_id,
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      RecordingBuffer *buf = rec->buffer(lock_index);
+      switch (event_type) {
+      case 0:
+          rec->recordExecutionSample(buf, tid, call_trace_id,
+                                     (ExecutionEvent *)event);
+          break;
+        case BCI_WALL:
+          rec->recordMethodSample(buf, tid, call_trace_id,
                                   (ExecutionEvent *)event);
-      break;
-    case BCI_WALL:
-      _rec->recordMethodSample(buf, tid, call_trace_id,
-                               (ExecutionEvent *)event);
-      break;
-    case BCI_ALLOC:
-      _rec->recordAllocation(buf, tid, call_trace_id, (AllocEvent *)event);
-      break;
-    case BCI_LIVENESS:
-      _rec->recordHeapLiveObject(buf, tid, call_trace_id,
-                                 (ObjectLivenessEvent *)event);
-      break;
-    case BCI_LOCK:
-      _rec->recordMonitorBlocked(buf, tid, call_trace_id, (LockEvent *)event);
-      break;
-    case BCI_PARK:
-      _rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent *)event);
-      break;
-    }
-    _rec->flushIfNeeded(buf);
-    _rec->addThread(lock_index, tid);
+          break;
+        case BCI_ALLOC:
+          rec->recordAllocation(buf, tid, call_trace_id, (AllocEvent *)event);
+          break;
+        case BCI_LIVENESS:
+          rec->recordHeapLiveObject(buf, tid, call_trace_id,
+                                    (ObjectLivenessEvent *)event);
+          break;
+        case BCI_LOCK:
+          rec->recordMonitorBlocked(buf, tid, call_trace_id, (LockEvent *)event);
+          break;
+        case BCI_PARK:
+          rec->recordThreadPark(buf, tid, call_trace_id, (LockEvent *)event);
+          break;
+        }
+        rec->flushIfNeeded(buf);
+        rec->addThread(lock_index, tid);
+      }
   }
 }
 
 void FlightRecorder::recordLog(LogLevel level, const char *message,
                                size_t len) {
-  if (!_rec_lock.tryLockShared()) {
-    // No active recording
-    return;
-  }
-
-  if (len > MAX_STRING_LENGTH)
-    len = MAX_STRING_LENGTH;
-  // cppcheck-suppress obsoleteFunctions
-  Buffer *buf = (Buffer *)alloca(len + 40);
-  buf->reset();
+  OptionalSharedLockGuard locker(&_rec_lock);
+  if (locker.ownsLock()) {
+    Recording* rec = _rec;
+    if (rec != nullptr) {
+      if (len > MAX_STRING_LENGTH)
+        len = MAX_STRING_LENGTH;
+      // cppcheck-suppress obsoleteFunctions
+      Buffer *buf = (Buffer *)alloca(len + 40);
+      buf->reset();
 
-  int start = buf->skip(5);
-  buf->putVar64(T_LOG);
-  buf->putVar64(TSC::ticks());
-  buf->putVar64(level);
-  buf->putUtf8(message, len);
-  buf->putVar32(start, buf->offset() - start);
-  _rec->flush(buf);
-
-  _rec_lock.unlockShared();
+      int start = buf->skip(5);
+      buf->putVar64(T_LOG);
+      buf->putVar64(TSC::ticks());
+      buf->putVar64(level);
+      buf->putUtf8(message, len);
+      buf->putVar32(start, buf->offset() - start);
+      _rec->flush(buf);
+    }
+  }
 }
diff --git a/ddprof-lib/src/main/cpp/flightRecorder.h b/ddprof-lib/src/main/cpp/flightRecorder.h
index 7e119ad4f..c5bc067b4 100644
--- a/ddprof-lib/src/main/cpp/flightRecorder.h
+++ b/ddprof-lib/src/main/cpp/flightRecorder.h
@@ -291,7 +291,9 @@ class FlightRecorder {
 private:
   std::string _filename;
   Arguments _args;
-  Recording* volatile _rec;
+
+  SpinLock _rec_lock;
+  Recording* _rec;
 
   Error newRecording(bool reset);
 
diff --git a/ddprof-lib/src/main/cpp/linearAllocator.cpp b/ddprof-lib/src/main/cpp/linearAllocator.cpp
index 275829e61..a48a7edcf 100644
--- a/ddprof-lib/src/main/cpp/linearAllocator.cpp
+++ b/ddprof-lib/src/main/cpp/linearAllocator.cpp
@@ -17,6 +17,23 @@
 #include "linearAllocator.h"
 #include "counters.h"
 #include "os_dd.h"
+#include "common.h"
+#include <stdio.h>
+
+// Enable ASan memory poisoning for better use-after-free detection
+#ifdef __has_feature
+  #if __has_feature(address_sanitizer)
+    #define ASAN_ENABLED 1
+  #endif
+#endif
+
+#ifdef __SANITIZE_ADDRESS__
+  #define ASAN_ENABLED 1
+#endif
+
+#ifdef ASAN_ENABLED
+  #include <sanitizer/asan_interface.h>
+#endif
 
 LinearAllocator::LinearAllocator(size_t chunk_size) {
   _chunk_size = chunk_size;
@@ -32,6 +49,27 @@ void LinearAllocator::clear() {
   if (_reserve->prev == _tail) {
     freeChunk(_reserve);
   }
+
+  // ASAN POISONING: Mark all allocated memory as poisoned BEFORE freeing chunks
+  // This catches use-after-free even when memory isn't munmap'd (kept in _tail)
+  #ifdef ASAN_ENABLED
+  int chunk_count = 0;
+  size_t total_poisoned = 0;
+  for (Chunk *chunk = _tail; chunk != NULL; chunk = chunk->prev) {
+    // Poison from the start of usable data to the current offset
+    size_t used_size = chunk->offs - sizeof(Chunk);
+    if (used_size > 0) {
+      void* data_start = (char*)chunk + sizeof(Chunk);
+      ASAN_POISON_MEMORY_REGION(data_start, used_size);
+      chunk_count++;
+      total_poisoned += used_size;
+    }
+  }
+  if (chunk_count > 0) {
+    TEST_LOG("[LinearAllocator::clear] ASan poisoned %d chunks, %zu bytes total", chunk_count, total_poisoned);
+  }
+  #endif
+
   while (_tail->prev != NULL) {
     Chunk *current = _tail;
     _tail = _tail->prev;
@@ -39,6 +77,57 @@ void LinearAllocator::clear() {
   }
   _reserve = _tail;
   _tail->offs = sizeof(Chunk);
+
+  // DON'T UNPOISON HERE - let alloc() do it on-demand!
+  // This ensures ASan can catch use-after-free bugs when code accesses
+  // memory that was cleared but not yet reallocated.
+}
+
+ChunkList LinearAllocator::detachChunks() {
+  // Capture current state before detaching
+  ChunkList result(_tail, _chunk_size);
+
+  // Handle reserve chunk: if it's ahead of tail, it needs special handling
+  if (_reserve->prev == _tail) {
+    // Reserve is a separate chunk ahead of tail - it becomes part of detached list
+    // We need to include it in the chain by making it the new head
+    result.head = _reserve;
+  }
+
+  // Allocate a fresh chunk for new allocations
+  Chunk* fresh = allocateChunk(NULL);
+  if (fresh != NULL) {
+    _tail = fresh;
+    _reserve = fresh;
+  } else {
+    // Allocation failed - restore original state and return empty list
+    // This maintains the invariant that the allocator is always usable
+    _reserve = _tail;
+    _tail->offs = sizeof(Chunk);
+    return ChunkList();
+  }
+
+  return result;
+}
+
+void LinearAllocator::freeChunks(ChunkList& chunks) {
+  if (chunks.head == nullptr || chunks.chunk_size == 0) {
+    return;
+  }
+
+  // Walk the chain and free each chunk
+  Chunk* current = chunks.head;
+  while (current != nullptr) {
+    Chunk* prev = current->prev;
+    OS::safeFree(current, chunks.chunk_size);
+    Counters::decrement(LINEAR_ALLOCATOR_BYTES, chunks.chunk_size);
+    Counters::decrement(LINEAR_ALLOCATOR_CHUNKS);
+    current = prev;
+  }
+
+  // Mark as freed to prevent double-free
+  chunks.head = nullptr;
+  chunks.chunk_size = 0;
 }
 
 void *LinearAllocator::alloc(size_t size) {
@@ -49,11 +138,20 @@ void *LinearAllocator::alloc(size_t size) {
          offs + size <= _chunk_size;
          offs = __atomic_load_n(&chunk->offs, __ATOMIC_ACQUIRE)) {
       if (__sync_bool_compare_and_swap(&chunk->offs, offs, offs + size)) {
+        void* allocated_ptr = (char *)chunk + offs;
+
+        // ASAN UNPOISONING: Unpoison ONLY the allocated region on-demand
+        // This allows ASan to detect use-after-free of memory that was cleared
+        // but not yet reallocated
+        #ifdef ASAN_ENABLED
+        ASAN_UNPOISON_MEMORY_REGION(allocated_ptr, size);
+        #endif
+
         if (_chunk_size / 2 - offs < size) {
           // Stepped over a middle of the chunk - it's time to prepare a new one
           reserveChunk(chunk);
         }
-        return (char *)chunk + offs;
+        return allocated_ptr;
       }
     }
   } while ((chunk = getNextChunk(chunk)) != NULL);
@@ -66,6 +164,16 @@ Chunk *LinearAllocator::allocateChunk(Chunk *current) {
   if (chunk != NULL) {
     chunk->prev = current;
     chunk->offs = sizeof(Chunk);
+
+    // ASAN UNPOISONING: New chunks from mmap are clean, unpoison them for use
+    // mmap returns memory that ASan may track as unallocated, so we need to
+    // explicitly unpoison it to allow allocations
+    #ifdef ASAN_ENABLED
+    size_t usable_size = _chunk_size - sizeof(Chunk);
+    void* data_start = (char*)chunk + sizeof(Chunk);
+    ASAN_UNPOISON_MEMORY_REGION(data_start, usable_size);
+    #endif
+
     Counters::increment(LINEAR_ALLOCATOR_BYTES, _chunk_size);
     Counters::increment(LINEAR_ALLOCATOR_CHUNKS);
   }
diff --git a/ddprof-lib/src/main/cpp/linearAllocator.h b/ddprof-lib/src/main/cpp/linearAllocator.h
index a680d0738..8d201dd17 100644
--- a/ddprof-lib/src/main/cpp/linearAllocator.h
+++ b/ddprof-lib/src/main/cpp/linearAllocator.h
@@ -26,6 +26,19 @@ struct Chunk {
   char _padding[56];
 };
 
+/**
+ * Holds detached chunks from a LinearAllocator for deferred deallocation.
+ * Used to keep trace memory alive during processor execution while allowing
+ * the allocator to be reset for new allocations.
+ */
+struct ChunkList {
+  Chunk *head;
+  size_t chunk_size;
+
+  ChunkList() : head(nullptr), chunk_size(0) {}
+  ChunkList(Chunk *h, size_t sz) : head(h), chunk_size(sz) {}
+};
+
 class LinearAllocator {
 private:
   size_t _chunk_size;
@@ -43,6 +56,22 @@ class LinearAllocator {
 
   void clear();
 
+  /**
+   * Detaches all chunks from this allocator, returning them as a ChunkList.
+   * The allocator is reset to an empty state with a fresh chunk for new allocations.
+   * The detached chunks remain allocated and valid until freeChunks() is called.
+   *
+   * This enables deferred deallocation: reset the allocator immediately while
+   * keeping old data alive until it's no longer needed.
+   */
+  ChunkList detachChunks();
+
+  /**
+   * Frees all chunks in a previously detached ChunkList.
+   * Call this after processing is complete to deallocate the memory.
+   */
+  static void freeChunks(ChunkList& chunks);
+
   void *alloc(size_t size);
 };
 
diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h
index 56e34006e..144b9289d 100644
--- a/ddprof-lib/src/main/cpp/safeAccess.h
+++ b/ddprof-lib/src/main/cpp/safeAccess.h
@@ -45,11 +45,11 @@ class SafeAccess {
 
   static bool handle_safefetch(int sig, void* context);
 
-  static inline void *load(void **ptr) {
+  static inline void *load(void **ptr, void* default_value = nullptr) {
     return loadPtr(ptr, nullptr);
   }
 
-  static inline u32 load32(u32 *ptr, u32 default_value) {
+  static inline u32 load32(u32 *ptr, u32 default_value = 0) {
     int res = safefetch32_impl((int*)ptr, (int)default_value);
     return static_cast<u32>(res);
   }
diff --git a/ddprof-lib/src/main/cpp/spinLock.h b/ddprof-lib/src/main/cpp/spinLock.h
index 0b82fc55d..36df20c06 100644
--- a/ddprof-lib/src/main/cpp/spinLock.h
+++ b/ddprof-lib/src/main/cpp/spinLock.h
@@ -30,7 +30,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock {
   volatile int _lock;
   char _padding[DEFAULT_CACHE_LINE_SIZE - sizeof(_lock)];
 public:
-  constexpr SpinLock(int initial_state = 0) : _lock(initial_state), _padding() {
+  explicit constexpr SpinLock(int initial_state = 0) : _lock(initial_state), _padding() {
     static_assert(sizeof(SpinLock) == DEFAULT_CACHE_LINE_SIZE);
   }
 
@@ -48,8 +48,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock {
 
   bool tryLockShared() {
     int value;
-    // we use relaxed as the compare already offers the guarantees we need
-    while ((value = __atomic_load_n(&_lock, __ATOMIC_RELAXED)) <= 0) {
+    while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) <= 0) {
       if (__sync_bool_compare_and_swap(&_lock, value, value - 1)) {
         return true;
       }
@@ -59,7 +58,7 @@ class alignas(DEFAULT_CACHE_LINE_SIZE) SpinLock {
 
   void lockShared() {
     int value;
-    while ((value = __atomic_load_n(&_lock, __ATOMIC_RELAXED)) > 0 ||
+    while ((value = __atomic_load_n(&_lock, __ATOMIC_ACQUIRE)) > 0 ||
            !__sync_bool_compare_and_swap(&_lock, value, value - 1)) {
       spinPause();
     }
@@ -86,6 +85,29 @@ class SharedLockGuard {
   SharedLockGuard& operator=(SharedLockGuard&&) = delete;
 };
 
+class OptionalSharedLockGuard {
+  SpinLock* _lock;
+public:
+  OptionalSharedLockGuard(SpinLock* lock) : _lock(lock) {
+    if (!_lock->tryLockShared()) {
+      // Locking failed, no need to unlock.
+      _lock = nullptr;
+    }
+  }
+  ~OptionalSharedLockGuard() {
+    if (_lock != nullptr) {
+      _lock->unlockShared();
+    }
+  }
+  bool ownsLock() { return _lock != nullptr; }
+
+  // Non-copyable and non-movable
+  OptionalSharedLockGuard(const OptionalSharedLockGuard&) = delete;
+  OptionalSharedLockGuard& operator=(const OptionalSharedLockGuard&) = delete;
+  OptionalSharedLockGuard(OptionalSharedLockGuard&&) = delete;
+  OptionalSharedLockGuard& operator=(OptionalSharedLockGuard&&) = delete;
+};
+
 class ExclusiveLockGuard {
 private:
   SpinLock* _lock;
diff --git a/ddprof-lib/src/main/cpp/utils.h b/ddprof-lib/src/main/cpp/utils.h
new file mode 100644
index 000000000..47d58eb96
--- /dev/null
+++ b/ddprof-lib/src/main/cpp/utils.h
@@ -0,0 +1,35 @@
+#ifndef _UTILS_H
+#define _UTILS_H
+
+#include <cassert>
+#include <cstdint>
+#include <cstddef>
+
+inline bool is_power_of_2(size_t size) {
+    return size > 0 && (size & (size - 1)) == 0;
+}
+
+template <typename T>
+inline bool is_aligned(const T* ptr, size_t alignment) noexcept {
+    assert(is_power_of_2(alignment));
+    // Convert the pointer to an integer type
+    auto iptr = reinterpret_cast<uintptr_t>(ptr);
+
+    // Check if the integer value is a multiple of the alignment
+    return (iptr & ~(alignment - 1) == 0);
+}
+
+inline size_t align_down(size_t size, size_t alignment) noexcept {
+    assert(is_power_of_2(alignment));
+    return size & ~(alignment - 1);
+}
+
+inline size_t align_up(size_t size, size_t alignment) noexcept {
+    assert(is_power_of_2(alignment));
+    return align_down(size + alignment - 1, alignment);
+}
+
+
+
+
+#endif // _UTILS_H
\ No newline at end of file
diff --git a/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp b/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp
index 70c532007..aed48352b 100644
--- a/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp
+++ b/ddprof-lib/src/test/cpp/test_callTraceStorage.cpp
@@ -495,4 +495,134 @@ TEST_F(CallTraceStorageTest, HazardPointerSynchronizationDuringSwap) {
     EXPECT_GT(final_trace_count.load(), 0) << "Storage should still contain traces after synchronization test";
 }
 
+/**
+ * Reproducer test for use-after-free bug in processTraces().
+ *
+ * BUG DESCRIPTION:
+ * In processTraces(), traces are collected from standby into _traces_buffer (raw pointers),
+ * then standby->clear() frees all memory, but processor(_traces_buffer) is called AFTER
+ * the clear, accessing freed memory.
+ *
+ * Timeline of the bug:
+ *   1. original_standby->collect(_traces_buffer, ...) - collects raw pointers from STANDBY
+ *   2. original_standby->clear() - FREES ALL MEMORY including CallTrace objects!
+ *   3. processor(_traces_buffer) - accesses freed memory (USE-AFTER-FREE)
+ *
+ * KEY INSIGHT: The bug only affects traces from STANDBY, not ACTIVE.
+ * - Active traces are cleared AFTER the processor runs (safe)
+ * - Standby traces are cleared BEFORE the processor runs (BUG!)
+ *
+ * To trigger the bug, we need traces IN STANDBY at the start of processTraces().
+ * Traces get into standby via the liveness preservation mechanism:
+ *   1. Register a liveness checker that marks traces as "live"
+ *   2. During processTraces(), live traces are copied to SCRATCH
+ *   3. After rotation, SCRATCH becomes the new STANDBY
+ *   4. Next processTraces() will have those preserved traces in STANDBY
+ *   5. Those traces are collected, then FREED, then ACCESSED → USE-AFTER-FREE!
+ *
+ * This test should FAIL (crash or ASan error) before the fix and PASS after.
+ */
+TEST_F(CallTraceStorageTest, UseAfterFreeInProcessTraces) {
+    // Create multiple traces with varying frame counts to increase memory footprint
+    const int NUM_TRACES = 100;
+    const int MAX_FRAMES = 20;
+
+    std::vector<u64> trace_ids;
+    trace_ids.reserve(NUM_TRACES);
+
+    // Create traces with multiple frames to use more memory
+    for (int i = 0; i < NUM_TRACES; i++) {
+        std::vector<ASGCT_CallFrame> frames(MAX_FRAMES);
+        for (int j = 0; j < MAX_FRAMES; j++) {
+            frames[j].bci = i * 1000 + j;
+            frames[j].method_id = (jmethodID)(0x10000 + i * 100 + j);
+        }
+
+        u64 trace_id = storage->put(MAX_FRAMES, frames.data(), false, 1);
+        ASSERT_GT(trace_id, 0) << "Failed to store trace " << i;
+        trace_ids.push_back(trace_id);
+    }
+
+    // CRITICAL: Register a liveness checker that preserves ALL traces.
+    // This causes traces to be copied to SCRATCH during processTraces().
+    // After rotation, SCRATCH becomes STANDBY, so the NEXT processTraces()
+    // will have these traces in STANDBY where the bug manifests.
+    storage->registerLivenessChecker([&trace_ids](std::unordered_set<u64>& buffer) {
+        for (u64 id : trace_ids) {
+            buffer.insert(id);
+        }
+    });
+
+    // First processTraces: traces are in ACTIVE, get collected and preserved to SCRATCH.
+    // After rotation: SCRATCH becomes STANDBY (now contains preserved traces)
+    int first_count = 0;
+    storage->processTraces([&first_count](const std::unordered_set<CallTrace*>& traces) {
+        first_count = traces.size();
+        printf("First processTraces: %d traces collected\n", first_count);
+    });
+    EXPECT_GT(first_count, NUM_TRACES) << "First processTraces should collect all traces";
+
+    // Second processTraces: THIS IS WHERE THE BUG OCCURS!
+    // 1. STANDBY now contains the preserved traces (from first call's scratch)
+    // 2. Standby traces are collected into _traces_buffer (raw pointers)
+    // 3. original_standby->clear() - FREES the trace memory!
+    // 4. processor(_traces_buffer) - accesses FREED memory (USE-AFTER-FREE!)
+    storage->processTraces([&](const std::unordered_set<CallTrace*>& traces) {
+        int total_frames = 0;
+        int total_bci_sum = 0;
+        int trace_count = 0;
+
+        for (CallTrace* trace : traces) {
+            if (trace == nullptr) continue;
+            if (trace->trace_id == CallTraceStorage::DROPPED_TRACE_ID) continue;
+
+            trace_count++;
 
+            // Deep access to detect use-after-free:
+            // After standby->clear(), this memory is FREED but we're accessing it!
+            // ASan will catch this. Without ASan, we might read garbage.
+            EXPECT_GE(trace->num_frames, 1) << "Corrupted num_frames (use-after-free?)";
+            EXPECT_LE(trace->num_frames, MAX_FRAMES) << "Corrupted num_frames (use-after-free?)";
+            EXPECT_FALSE(trace->truncated) << "Corrupted truncated flag (use-after-free?)";
+            EXPECT_GT(trace->trace_id, 0) << "Corrupted trace_id (use-after-free?)";
+
+            total_frames += trace->num_frames;
+
+            // Access every frame - this maximizes chance of detecting corruption
+            for (int i = 0; i < trace->num_frames; i++) {
+                int bci = trace->frames[i].bci;
+                total_bci_sum += bci;
+                EXPECT_GE(bci, 0) << "Corrupted BCI at frame " << i << " (use-after-free?)";
+
+                jmethodID method = trace->frames[i].method_id;
+                EXPECT_NE(method, nullptr) << "Null method_id at frame " << i << " (use-after-free?)";
+            }
+        }
+
+        printf("Second processTraces: %d traces, %d total frames, bci_sum=%d\n",
+               trace_count, total_frames, total_bci_sum);
+
+        // This is the key assertion: we expect to find the preserved traces
+        // If the bug exists, we're reading freed memory here!
+        EXPECT_GE(trace_count, NUM_TRACES) << "Should find preserved traces from standby";
+    });
+
+    // Third processTraces: traces should still be preserved (copied to new scratch)
+    // This further exercises the use-after-free if the bug exists
+    storage->processTraces([&](const std::unordered_set<CallTrace*>& traces) {
+        int trace_count = 0;
+        for (CallTrace* trace : traces) {
+            if (trace == nullptr) continue;
+            if (trace->trace_id == CallTraceStorage::DROPPED_TRACE_ID) continue;
+            trace_count++;
+
+            // Access all frame data
+            volatile int sum = 0;
+            for (int i = 0; i < trace->num_frames; i++) {
+                sum += trace->frames[i].bci;
+            }
+        }
+        printf("Third processTraces: %d traces\n", trace_count);
+        EXPECT_GE(trace_count, NUM_TRACES) << "Should still find preserved traces";
+    });
+}