From aec33f3c1bef3b40805b055648120aecc06f685c Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 15 Sep 2025 13:02:21 -0400 Subject: [PATCH 1/6] Relax memory constraint on counter --- ddprof-lib/src/main/cpp/arch_dd.h | 6 ++++++ ddprof-lib/src/main/cpp/counters.h | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/ddprof-lib/src/main/cpp/arch_dd.h b/ddprof-lib/src/main/cpp/arch_dd.h index 8378fc6f2..6b801d426 100644 --- a/ddprof-lib/src/main/cpp/arch_dd.h +++ b/ddprof-lib/src/main/cpp/arch_dd.h @@ -14,6 +14,12 @@ static inline long long atomicInc(volatile long long &var, return __sync_fetch_and_add(&var, increment); } + +static inline long long atomicIncRelaxed(volatile long long &var, + long long increment = 1) { + return __atomic_fetch_add(&var, increment, __ATOMIC_RELAXED); +} + static inline u64 loadAcquire(volatile u64 &var) { return __atomic_load_n(&var, __ATOMIC_ACQUIRE); } diff --git a/ddprof-lib/src/main/cpp/counters.h b/ddprof-lib/src/main/cpp/counters.h index 1df8fe3f5..b276f029d 100644 --- a/ddprof-lib/src/main/cpp/counters.h +++ b/ddprof-lib/src/main/cpp/counters.h @@ -124,9 +124,9 @@ class Counters { static void increment(CounterId counter, long long delta = 1, int offset = 0) { #ifdef COUNTERS - atomicInc(Counters::instance() - ._counters[address(static_cast(counter) + offset)], - delta); + atomicIncRelaxed(Counters::instance() + ._counters[address(static_cast(counter) + offset)], + delta); #endif // COUNTERS } From ddc9eb00aa6124617d0f9e377d6a859c1855f6bb Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 16 Sep 2025 08:22:10 -0400 Subject: [PATCH 2/6] test --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 025512c76..53cd08afd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,7 @@ trigger_internal_build: - when: always allow_failure: false variables: - DOWNSTREAM_BRANCH: "main" + DOWNSTREAM_BRANCH: "zgu/remove_legacy_flag" UPSTREAM_PROJECT: ${CI_PROJECT_PATH} UPSTREAM_PROJECT_NAME: ${CI_PROJECT_NAME} UPSTREAM_BRANCH: ${CI_COMMIT_BRANCH} From e61aa4f94e6d5056c8a697a8ce30c79607cd2804 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 16 Sep 2025 11:57:40 -0400 Subject: [PATCH 3/6] Revert test --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 53cd08afd..025512c76 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,7 @@ trigger_internal_build: - when: always allow_failure: false variables: - DOWNSTREAM_BRANCH: "zgu/remove_legacy_flag" + DOWNSTREAM_BRANCH: "main" UPSTREAM_PROJECT: ${CI_PROJECT_PATH} UPSTREAM_PROJECT_NAME: ${CI_PROJECT_NAME} UPSTREAM_BRANCH: ${CI_COMMIT_BRANCH} From 14c815824ac92e1331a9792cc1d2438b74b18cd6 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 16 Sep 2025 14:32:18 -0400 Subject: [PATCH 4/6] Fixed heap --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 025512c76..f03fee2b3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,7 @@ trigger_internal_build: - when: always allow_failure: false variables: - DOWNSTREAM_BRANCH: "main" + DOWNSTREAM_BRANCH: "zgu/fix-heap-size" UPSTREAM_PROJECT: ${CI_PROJECT_PATH} UPSTREAM_PROJECT_NAME: ${CI_PROJECT_NAME} UPSTREAM_BRANCH: ${CI_COMMIT_BRANCH} From 55a2844b88fcfdbf5b8cb4c8b39177962fbaf0b4 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 17 Sep 2025 10:43:23 -0400 Subject: [PATCH 5/6] additional counters --- .gitlab-ci.yml | 2 +- ddprof-lib/src/main/cpp/arch_dd.h | 27 ++++++++++++------- .../src/main/cpp/callTraceHashTable.cpp | 4 +-- ddprof-lib/src/main/cpp/callTraceHashTable.h | 2 +- ddprof-lib/src/main/cpp/livenessTracker.cpp | 8 +++--- ddprof-lib/src/main/cpp/profiler.cpp | 18 ++++++------- ddprof-lib/src/main/cpp/profiler.h | 2 +- 7 files changed, 35 insertions(+), 28 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f03fee2b3..025512c76 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,7 +6,7 @@ trigger_internal_build: - when: always allow_failure: false variables: - DOWNSTREAM_BRANCH: "zgu/fix-heap-size" + DOWNSTREAM_BRANCH: "main" UPSTREAM_PROJECT: ${CI_PROJECT_PATH} UPSTREAM_PROJECT_NAME: ${CI_PROJECT_NAME} UPSTREAM_BRANCH: ${CI_COMMIT_BRANCH} diff --git a/ddprof-lib/src/main/cpp/arch_dd.h b/ddprof-lib/src/main/cpp/arch_dd.h index 6b801d426..8bddcdd2c 100644 --- a/ddprof-lib/src/main/cpp/arch_dd.h +++ b/ddprof-lib/src/main/cpp/arch_dd.h @@ -14,25 +14,32 @@ static inline long long atomicInc(volatile long long &var, return __sync_fetch_and_add(&var, increment); } - -static inline long long atomicIncRelaxed(volatile long long &var, - long long increment = 1) { +template +static inline long long atomicIncRelaxed(volatile T &var, + T increment = 1) { return __atomic_fetch_add(&var, increment, __ATOMIC_RELAXED); } -static inline u64 loadAcquire(volatile u64 &var) { - return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +// Atomic load/store (unordered) +template +static inline T load(volatile T& var) { + return __atomic_load_n(&var, __ATOMIC_RELAXED); } -static inline size_t loadAcquire(volatile size_t &var) { - return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +template +static inline void store(volatile T& var, T value) { + return __atomic_store_n(&var, value, __ATOMIC_RELAXED); } -static inline void storeRelease(volatile long long &var, long long value) { - return __atomic_store_n(&var, value, __ATOMIC_RELEASE); + +// Atomic load-acquire/release-store +template +static inline T loadAcquire(volatile T& var) { + return __atomic_load_n(&var, __ATOMIC_ACQUIRE); } -static inline void storeRelease(volatile size_t &var, size_t value) { +template +static inline void storeRelease(volatile T& var, T value) { return __atomic_store_n(&var, value, __ATOMIC_RELEASE); } diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp index 1c32150bc..c10f4fd98 100644 --- a/ddprof-lib/src/main/cpp/callTraceHashTable.cpp +++ b/ddprof-lib/src/main/cpp/callTraceHashTable.cpp @@ -301,7 +301,7 @@ u64 CallTraceHashTable::put(int num_frames, ASGCT_CallFrame *frames, if (++step >= capacity) { // Very unlikely case of a table overflow - atomicInc(_overflow); + atomicIncRelaxed(_overflow); return OVERFLOW_TRACE_ID; } // Improved version of linear probing @@ -359,7 +359,7 @@ void CallTraceHashTable::collectAndCopySelective(std::unordered_set traces.insert(&_overflow_trace); if (trace_ids_to_preserve.find(OVERFLOW_TRACE_ID) != trace_ids_to_preserve.end()) { // Copy overflow trace to target - it's a static trace so just increment overflow counter - atomicInc(target->_overflow); + atomicIncRelaxed(target->_overflow); } } } diff --git a/ddprof-lib/src/main/cpp/callTraceHashTable.h b/ddprof-lib/src/main/cpp/callTraceHashTable.h index 5dceb65b4..2541d12fb 100644 --- a/ddprof-lib/src/main/cpp/callTraceHashTable.h +++ b/ddprof-lib/src/main/cpp/callTraceHashTable.h @@ -52,7 +52,7 @@ class CallTraceHashTable { LinearAllocator _allocator; LongHashTable *_current_table; - u64 _overflow; + volatile u64 _overflow; u64 calcHash(int num_frames, ASGCT_CallFrame *frames, bool truncated); CallTrace *storeCallTrace(int num_frames, ASGCT_CallFrame *frames, diff --git a/ddprof-lib/src/main/cpp/livenessTracker.cpp b/ddprof-lib/src/main/cpp/livenessTracker.cpp index e085f3cdd..53eff94ee 100644 --- a/ddprof-lib/src/main/cpp/livenessTracker.cpp +++ b/ddprof-lib/src/main/cpp/livenessTracker.cpp @@ -26,8 +26,8 @@ constexpr int LivenessTracker::MAX_TRACKING_TABLE_SIZE; constexpr int LivenessTracker::MIN_SAMPLING_INTERVAL; void LivenessTracker::cleanup_table(bool forced) { - u64 current = loadAcquire(_last_gc_epoch); - u64 target_gc_epoch = loadAcquire(_gc_epoch); + u64 current = load(_last_gc_epoch); + u64 target_gc_epoch = load(_gc_epoch); if ((target_gc_epoch == _last_gc_epoch || !__sync_bool_compare_and_swap(&_last_gc_epoch, current, @@ -383,10 +383,10 @@ void LivenessTracker::onGC() { } // just increment the epoch - atomicInc(_gc_epoch, 1); + atomicIncRelaxed(_gc_epoch,u64(1)); if (!ddprof::HeapUsage::isLastGCUsageSupported()) { - storeRelease(_used_after_last_gc, ddprof::HeapUsage::get(false)._used); + store(_used_after_last_gc, ddprof::HeapUsage::get(false)._used); } } diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index e51b3dc70..727746367 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -559,7 +559,7 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames, return 0; } - atomicInc(_failures[-trace.num_frames]); + atomicIncRelaxed(_failures[-trace.num_frames]); trace.frames->bci = BCI_ERROR; trace.frames->method_id = (jmethodID)err_string; return trace.frames - frames + 1; @@ -607,14 +607,14 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames, } u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) { - atomicInc(_total_samples); + atomicIncRelaxed(_total_samples); u32 lock_index = getLockIndex(tid); if (!_locks[lock_index].tryLock() && !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { // Too many concurrent signals already - atomicInc(_failures[-ticks_skipped]); + atomicIncRelaxed(_failures[-ticks_skipped]); return 0; } @@ -654,14 +654,14 @@ u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event } void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type, Event *event) { - atomicInc(_total_samples); + atomicIncRelaxed(_total_samples); u32 lock_index = getLockIndex(tid); if (!_locks[lock_index].tryLock() && !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { // Too many concurrent signals already - atomicInc(_failures[-ticks_skipped]); + atomicIncRelaxed(_failures[-ticks_skipped]); return; } @@ -672,14 +672,14 @@ void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type, void Profiler::recordSample(void *ucontext, u64 counter, int tid, jint event_type, u64 call_trace_id, Event *event) { - atomicInc(_total_samples); + atomicIncRelaxed(_total_samples); u32 lock_index = getLockIndex(tid); if (!_locks[lock_index].tryLock() && !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { // Too many concurrent signals already - atomicInc(_failures[-ticks_skipped]); + atomicIncRelaxed(_failures[-ticks_skipped]); if (event_type == BCI_CPU && _cpu_engine == &perf_events) { // Need to reset PerfEvents ring buffer, even though we discard the @@ -788,7 +788,7 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) { void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, ASGCT_CallFrame *frames, bool truncated, jint event_type, Event *event) { - atomicInc(_total_samples); + atomicIncRelaxed(_total_samples); u64 call_trace_id = _call_trace_storage.put(num_frames, frames, truncated, weight); @@ -798,7 +798,7 @@ void Profiler::recordExternalSample(u64 weight, int tid, int num_frames, !_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() && !_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) { // Too many concurrent signals already - atomicInc(_failures[-ticks_skipped]); + atomicIncRelaxed(_failures[-ticks_skipped]); return; } diff --git a/ddprof-lib/src/main/cpp/profiler.h b/ddprof-lib/src/main/cpp/profiler.h index 1e8e9902f..09229d845 100644 --- a/ddprof-lib/src/main/cpp/profiler.h +++ b/ddprof-lib/src/main/cpp/profiler.h @@ -93,7 +93,7 @@ class Profiler { WaitableMutex _timer_lock; void *_timer_id; - u64 _total_samples; + volatile u64 _total_samples; u64 _failures[ASGCT_FAILURE_TYPES]; SpinLock _class_map_lock; From fd1af3b6d93bfe3d8009f2f0bc6af8143e5104e5 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 17 Sep 2025 10:51:20 -0400 Subject: [PATCH 6/6] One more --- ddprof-lib/src/main/cpp/livenessTracker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ddprof-lib/src/main/cpp/livenessTracker.cpp b/ddprof-lib/src/main/cpp/livenessTracker.cpp index 53eff94ee..1923c991f 100644 --- a/ddprof-lib/src/main/cpp/livenessTracker.cpp +++ b/ddprof-lib/src/main/cpp/livenessTracker.cpp @@ -30,8 +30,8 @@ void LivenessTracker::cleanup_table(bool forced) { u64 target_gc_epoch = load(_gc_epoch); if ((target_gc_epoch == _last_gc_epoch || - !__sync_bool_compare_and_swap(&_last_gc_epoch, current, - target_gc_epoch)) && + !__atomic_compare_exchange_n(&_last_gc_epoch, ¤t, + target_gc_epoch, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) && !forced) { // if the last processed GC epoch hasn't changed, or if we failed to update // it, there's nothing to do