From 3d6f750c5ac5ba0297424e13535447293a86aabf Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 17 Nov 2022 11:15:27 +0000 Subject: [PATCH 01/12] [improvement](tcmalloc) add moderate mode and avoid oom with a lot of cache ReleaseToSystem aggressively when there are little free memory. --- be/src/common/config.h | 2 +- be/src/common/daemon.cpp | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 554735b7c5400b..9a6750067a5ce1 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -48,7 +48,7 @@ CONF_String(priority_networks, ""); // memory mode // performance or compact -CONF_String(memory_mode, "performance"); +CONF_String(memory_mode, "moderate"); // process memory limit specified as number of bytes // ('[bB]?'), megabytes ('[mM]'), gigabytes ('[gG]'), diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 8e3cc663da5236..f8427835bc3f25 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -74,28 +74,49 @@ void Daemon::tcmalloc_gc_thread() { size_t tc_use_memory_min = MemInfo::mem_limit(); if (config::memory_mode == std::string("performance")) { + // a higher tc_use_memory_min to use memory as tcmalloc cache as much as possible tc_use_memory_min = std::max(tc_use_memory_min / 10 * 9, tc_use_memory_min - (size_t)10 * 1024 * 1024 * 1024); + } else if (config::memory_mode == std::string("compact")) { + // a limited tc_use_memory_min to limit memory used as cache of tcmalloc + tc_use_memory_min = std::min((size_t)10 * 1024 * 1024 * 1024, tc_use_memory_min >> 1); } else { + // a moderate tc_use_memory_min tc_use_memory_min >>= 1; } - while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(10))) { + int32_t interval_seconds = 2; + while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval_seconds))) { size_t used_size = 0; - size_t free_size = 0; + size_t alloc_size = 0; + MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes", + &alloc_size); MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &used_size); - MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes", &free_size); - size_t alloc_size = used_size + free_size; - LOG(INFO) << "tcmalloc.pageheap_free_bytes " << free_size - << ", generic.current_allocated_bytes " << used_size << ", tc_use_memory_min " + + LOG(INFO) << "generic.current_allocated_bytes " << used_size + << ", generic.total_physical_bytes " << alloc_size << ", tc_use_memory_min " << tc_use_memory_min; if (alloc_size > tc_use_memory_min) { - size_t max_free_size = alloc_size * 20 / 100; - if (free_size > max_free_size) { - MallocExtension::instance()->ReleaseToSystem(free_size - max_free_size); + // Limit size of cache of tcmalloc to avoid oom. + // alloc_size > mem_limit: release memory aggressively because we are reaching oom. + // alloc_size < mem_limit: limit cache size of tcmalloc under used_size * 20%. + size_t max_free_size = 0; + + if (MemInfo::mem_limit() > alloc_size) { + max_free_size = MemInfo::mem_limit() - alloc_size; + interval_seconds = 2; + } else { + interval_seconds = 1; + } + + max_free_size = std::min(used_size * 20 / 100, max_free_size); + size_t free_size = alloc_size - used_size - max_free_size; + if (free_size > 0) { + LOG(INFO) << "try to release cache of tcmalloc, bytes " << free_size; + MallocExtension::instance()->ReleaseToSystem(free_size); } } } From c69fcebb320b370aa3c01172785ca2bcff522fdc Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 17 Nov 2022 11:20:14 +0000 Subject: [PATCH 02/12] format --- be/src/common/daemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index f8427835bc3f25..56d048e6091333 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -100,7 +100,7 @@ void Daemon::tcmalloc_gc_thread() { << tc_use_memory_min; if (alloc_size > tc_use_memory_min) { - // Limit size of cache of tcmalloc to avoid oom. + // Limit size of cache of tcmalloc to avoid oom. // alloc_size > mem_limit: release memory aggressively because we are reaching oom. // alloc_size < mem_limit: limit cache size of tcmalloc under used_size * 20%. size_t max_free_size = 0; From bb1111ada506c6f5169c86d73f519a3710ffc95e Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 17 Nov 2022 13:02:17 +0000 Subject: [PATCH 03/12] simplify logic of gc memory of tcmalloc Conrol size of tcmalloc cache by release_rate and max_free_percent. --- be/src/common/daemon.cpp | 74 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 56d048e6091333..014b94d2d3d031 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -72,52 +72,52 @@ void Daemon::tcmalloc_gc_thread() { #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \ !defined(USE_JEMALLOC) - size_t tc_use_memory_min = MemInfo::mem_limit(); + // Limit size of tcmalloc cache via release_rate and max_free_percent. + // performance: release_rate = 1.0 and max_free_percent = 1000; + // compact: release_rate = 20.0 and max_free_percent = 20; + // moderate: release_rate = 5.0 and max_free_percent =40; + size_t max_free_percent = 40; + double release_rate = 5.0; if (config::memory_mode == std::string("performance")) { - // a higher tc_use_memory_min to use memory as tcmalloc cache as much as possible - tc_use_memory_min = std::max(tc_use_memory_min / 10 * 9, - tc_use_memory_min - (size_t)10 * 1024 * 1024 * 1024); + release_rate = 1.0; + max_free_percent = 1000; } else if (config::memory_mode == std::string("compact")) { - // a limited tc_use_memory_min to limit memory used as cache of tcmalloc - tc_use_memory_min = std::min((size_t)10 * 1024 * 1024 * 1024, tc_use_memory_min >> 1); - } else { - // a moderate tc_use_memory_min - tc_use_memory_min >>= 1; + release_rate = 20.0; + max_free_percent = 20; } + MallocExtension::instance()->SetMemoryReleaseRate(release_rate); int32_t interval_seconds = 2; while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval_seconds))) { - size_t used_size = 0; - size_t alloc_size = 0; + size_t used_bytes = 0; + size_t alloc_bytes = 0; MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes", - &alloc_size); + &alloc_bytes); MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", - &used_size); - - LOG(INFO) << "generic.current_allocated_bytes " << used_size - << ", generic.total_physical_bytes " << alloc_size << ", tc_use_memory_min " - << tc_use_memory_min; - - if (alloc_size > tc_use_memory_min) { - // Limit size of cache of tcmalloc to avoid oom. - // alloc_size > mem_limit: release memory aggressively because we are reaching oom. - // alloc_size < mem_limit: limit cache size of tcmalloc under used_size * 20%. - size_t max_free_size = 0; - - if (MemInfo::mem_limit() > alloc_size) { - max_free_size = MemInfo::mem_limit() - alloc_size; - interval_seconds = 2; - } else { - interval_seconds = 1; - } - - max_free_size = std::min(used_size * 20 / 100, max_free_size); - size_t free_size = alloc_size - used_size - max_free_size; - if (free_size > 0) { - LOG(INFO) << "try to release cache of tcmalloc, bytes " << free_size; - MallocExtension::instance()->ReleaseToSystem(free_size); - } + &used_bytes); + + LOG(INFO) << "generic.current_allocated_bytes " << used_bytes + << ", generic.total_physical_bytes " << alloc_bytes << ", max_free_percent " + << max_free_percent << ", release_rate " << release_rate; + + size_t cached_bytes = alloc_bytes - used_bytes; + size_t to_free_bytes = cached_bytes - (used_bytes * max_free_percent / 100); + if (MemInfo::mem_limit() <= alloc_bytes) { + // We are reaching oom, so release cache aggressively. + // Ideally, we should reuse cache and not allocate from system any more, + // however, it is hard to set limit on cache of tcmalloc and doris + // use mmap in vectorized mode. + to_free_bytes = cached_bytes; + MallocExtension::instance()->SetMemoryReleaseRate(100.0); + interval_seconds = 1; + } else if (interval_seconds == 1) { + MallocExtension::instance()->SetMemoryReleaseRate(release_rate); + interval_seconds = 2; + } + if (to_free_bytes > 0) { + LOG(INFO) << "try to release cache of tcmalloc, bytes " << to_free_bytes; + MallocExtension::instance()->ReleaseToSystem(to_free_bytes); } } #endif From 93b6d2a2b415499e6109b914ac773976b5ddf6e9 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 17 Nov 2022 13:39:48 +0000 Subject: [PATCH 04/12] consider physical memory when gc tc memory --- be/src/common/daemon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 014b94d2d3d031..d6d3b3117feb26 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -103,7 +103,10 @@ void Daemon::tcmalloc_gc_thread() { size_t cached_bytes = alloc_bytes - used_bytes; size_t to_free_bytes = cached_bytes - (used_bytes * max_free_percent / 100); - if (MemInfo::mem_limit() <= alloc_bytes) { + + size_t physical_limit = MemInfo::physical_mem() * 90 / 100; + physical_limit = std::min(physical_limit, MemInfo::physical_mem() - (size_t)3 * 1024 * 1024 * 1024); + if (MemInfo::mem_limit() <= alloc_bytes || physical_limit <= alloc_bytes) { // We are reaching oom, so release cache aggressively. // Ideally, we should reuse cache and not allocate from system any more, // however, it is hard to set limit on cache of tcmalloc and doris From 7298961b0ccd209e2dee7ecd11cd952e3a54fe79 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 17 Nov 2022 15:44:07 +0000 Subject: [PATCH 05/12] optimize gc strategy --- be/src/common/daemon.cpp | 45 ++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index d6d3b3117feb26..05ca79d67b3294 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -72,18 +72,18 @@ void Daemon::tcmalloc_gc_thread() { #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \ !defined(USE_JEMALLOC) - // Limit size of tcmalloc cache via release_rate and max_free_percent. - // performance: release_rate = 1.0 and max_free_percent = 1000; - // compact: release_rate = 20.0 and max_free_percent = 20; - // moderate: release_rate = 5.0 and max_free_percent =40; - size_t max_free_percent = 40; + // Limit size of tcmalloc cache via release_rate and max_cache_percent. + // performance: release_rate = 1.0 and max_cache_percent = 1000; + // compact: release_rate = 20.0 and max_cache_percent = 20; + // moderate: release_rate = 5.0 and max_cache_percent =40; + int64_t max_cache_percent = 40; double release_rate = 5.0; if (config::memory_mode == std::string("performance")) { release_rate = 1.0; - max_free_percent = 1000; + max_cache_percent = 1000; } else if (config::memory_mode == std::string("compact")) { release_rate = 20.0; - max_free_percent = 20; + max_cache_percent = 20; } MallocExtension::instance()->SetMemoryReleaseRate(release_rate); @@ -91,29 +91,42 @@ void Daemon::tcmalloc_gc_thread() { while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval_seconds))) { size_t used_bytes = 0; size_t alloc_bytes = 0; + double memory_pressure = 0; MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes", &alloc_bytes); MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &used_bytes); - LOG(INFO) << "generic.current_allocated_bytes " << used_bytes - << ", generic.total_physical_bytes " << alloc_bytes << ", max_free_percent " - << max_free_percent << ", release_rate " << release_rate; + int64_t cached_bytes = alloc_bytes - used_bytes; + int64_t to_free_bytes = cached_bytes - (used_bytes * max_cache_percent / 100); + + int64_t physical_limit = MemInfo::physical_mem() * 90 / 100; + physical_limit = std::min(physical_limit, MemInfo::hard_mem_limit() * 90 / 100); + memory_pressure = (double)alloc_bytes / physical_limit; + memory_pressure = std::max(memory_pressure, (double) alloc_bytes / MemInfo::mem_limit() * 90 / 100); - size_t cached_bytes = alloc_bytes - used_bytes; - size_t to_free_bytes = cached_bytes - (used_bytes * max_free_percent / 100); + LOG(INFO) << "generic.current_allocated_bytes " << used_bytes + << ", generic.total_physical_bytes " << alloc_bytes << ", max_cache_percent " + << max_cache_percent << ", release_rate " << release_rate << ", memory_pressure " + << memory_pressure; - size_t physical_limit = MemInfo::physical_mem() * 90 / 100; - physical_limit = std::min(physical_limit, MemInfo::physical_mem() - (size_t)3 * 1024 * 1024 * 1024); - if (MemInfo::mem_limit() <= alloc_bytes || physical_limit <= alloc_bytes) { + if (memory_pressure >= 0.95) { // We are reaching oom, so release cache aggressively. // Ideally, we should reuse cache and not allocate from system any more, // however, it is hard to set limit on cache of tcmalloc and doris // use mmap in vectorized mode. - to_free_bytes = cached_bytes; + int64_t min_free_bytes = std::max(alloc_bytes - MemInfo::mem_limit() *90 / 100, + alloc_bytes - physical_limit); + to_free_bytes = std::max(to_free_bytes, min_free_bytes); + to_free_bytes = std::max(to_free_bytes, cached_bytes * 80 / 100); MallocExtension::instance()->SetMemoryReleaseRate(100.0); interval_seconds = 1; + LOG(INFO) << "memory is under pressure, release bytes " << to_free_bytes; + } else if (memory_pressure >= 0.8) { + to_free_bytes = std::max(to_free_bytes, cached_bytes * 20 / 100); + MallocExtension::instance()->SetMemoryReleaseRate(50.0); + interval_seconds = 1; } else if (interval_seconds == 1) { MallocExtension::instance()->SetMemoryReleaseRate(release_rate); interval_seconds = 2; From 4fd62a7407aa336df5c00f7efb2458b5720c662f Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Fri, 18 Nov 2022 10:10:28 +0000 Subject: [PATCH 06/12] refine --- be/src/common/daemon.cpp | 73 +++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 05ca79d67b3294..0f72a696acee7b 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -78,62 +78,73 @@ void Daemon::tcmalloc_gc_thread() { // moderate: release_rate = 5.0 and max_cache_percent =40; int64_t max_cache_percent = 40; double release_rate = 5.0; + double pressure_limit = 0.85; if (config::memory_mode == std::string("performance")) { release_rate = 1.0; max_cache_percent = 1000; + pressure_limit = 0.9; } else if (config::memory_mode == std::string("compact")) { release_rate = 20.0; max_cache_percent = 20; + pressure_limit = 0.8; } MallocExtension::instance()->SetMemoryReleaseRate(release_rate); - int32_t interval_seconds = 2; - while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval_seconds))) { - size_t used_bytes = 0; - size_t alloc_bytes = 0; - double memory_pressure = 0; + size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(), MemInfo::mem_limit()); + int last_seconds = 0; + + while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(1))) { + size_t tc_used_bytes = 0; + size_t tc_alloc_bytes = 0; + size_t rss = PerfCounters::get_vm_rss(); MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes", - &alloc_bytes); + &tc_alloc_bytes); MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", - &used_bytes); + &tc_used_bytes); + int64_t tc_cached_bytes = tc_alloc_bytes - tc_used_bytes; + int64_t to_free_bytes = (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent / 100); - int64_t cached_bytes = alloc_bytes - used_bytes; - int64_t to_free_bytes = cached_bytes - (used_bytes * max_cache_percent / 100); - - int64_t physical_limit = MemInfo::physical_mem() * 90 / 100; - physical_limit = std::min(physical_limit, MemInfo::hard_mem_limit() * 90 / 100); - memory_pressure = (double)alloc_bytes / physical_limit; - memory_pressure = std::max(memory_pressure, (double) alloc_bytes / MemInfo::mem_limit() * 90 / 100); + double memory_pressure = 0; + int64_t alloc_bytes = std::max(rss, tc_alloc_bytes); + memory_pressure = (double)alloc_bytes / physical_limit_bytes; - LOG(INFO) << "generic.current_allocated_bytes " << used_bytes - << ", generic.total_physical_bytes " << alloc_bytes << ", max_cache_percent " - << max_cache_percent << ", release_rate " << release_rate << ", memory_pressure " - << memory_pressure; + LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes + << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss + << ", max_cache_percent " << max_cache_percent << ", release_rate " << release_rate + << ", memory_pressure " << memory_pressure << ", physical_limit_bytes " + << physical_limit_bytes; - if (memory_pressure >= 0.95) { + if (memory_pressure > pressure_limit) { // We are reaching oom, so release cache aggressively. // Ideally, we should reuse cache and not allocate from system any more, // however, it is hard to set limit on cache of tcmalloc and doris // use mmap in vectorized mode. - int64_t min_free_bytes = std::max(alloc_bytes - MemInfo::mem_limit() *90 / 100, - alloc_bytes - physical_limit); + int64_t min_free_bytes = alloc_bytes - physical_limit_bytes + (int64_t)2 * 1024 * 1024 * 1024; to_free_bytes = std::max(to_free_bytes, min_free_bytes); - to_free_bytes = std::max(to_free_bytes, cached_bytes * 80 / 100); + LOG(INFO) << "release bytes " << to_free_bytes; MallocExtension::instance()->SetMemoryReleaseRate(100.0); - interval_seconds = 1; - LOG(INFO) << "memory is under pressure, release bytes " << to_free_bytes; - } else if (memory_pressure >= 0.8) { - to_free_bytes = std::max(to_free_bytes, cached_bytes * 20 / 100); + last_seconds = 5; + } else if (memory_pressure >= (pressure_limit - 0.05)) { + to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 5 / 100); MallocExtension::instance()->SetMemoryReleaseRate(50.0); - interval_seconds = 1; - } else if (interval_seconds == 1) { + last_seconds = 5; + } else if (memory_pressure >= 0.7 && release_rate < 10.0) { + MallocExtension::instance()->SetMemoryReleaseRate(10.0); + } else if (memory_pressure >= 0.6 && release_rate < 5.0) { + MallocExtension::instance()->SetMemoryReleaseRate(5.0); + } else { MallocExtension::instance()->SetMemoryReleaseRate(release_rate); - interval_seconds = 2; } if (to_free_bytes > 0) { - LOG(INFO) << "try to release cache of tcmalloc, bytes " << to_free_bytes; - MallocExtension::instance()->ReleaseToSystem(to_free_bytes); + last_seconds += 1; + if (last_seconds >= 30) { + LOG(INFO) << "try to release cache of tcmalloc, bytes " << to_free_bytes; + MallocExtension::instance()->ReleaseToSystem(to_free_bytes); + last_seconds = 0; + } + } else { + last_seconds = 0; } } #endif From f6e2b3774771621b3d711e31035ef940e02036cc Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Wed, 23 Nov 2022 03:29:32 +0000 Subject: [PATCH 07/12] refactor --- be/src/common/daemon.cpp | 113 ++++++++++++------ be/src/runtime/memory/mem_tracker_limiter.cpp | 1 + be/src/runtime/memory/mem_tracker_limiter.h | 6 + be/src/service/doris_main.cpp | 19 ++- 4 files changed, 89 insertions(+), 50 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 0f72a696acee7b..16a81243d02d51 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -76,24 +76,40 @@ void Daemon::tcmalloc_gc_thread() { // performance: release_rate = 1.0 and max_cache_percent = 1000; // compact: release_rate = 20.0 and max_cache_percent = 20; // moderate: release_rate = 5.0 and max_cache_percent =40; - int64_t max_cache_percent = 40; - double release_rate = 5.0; - double pressure_limit = 0.85; + // mem_pressure [0, 0.4], release_rate=1.0 + // mem_pressure [0.4, 0.5], release_rate=5.0 + // mem_pressure [0.5, 0.6], release_rate=10.0 + // mem_pressure [0.6, 0.7], release_rate=30.0 + // mem_pressure [0.7, 0.8], release_rate=30.0 + int64_t max_cache_percent = 60; + double release_rates[10] = { 1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0 }; + int64_t pressure_limit = 90; + bool is_performance_mode = false; + size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(), MemInfo::mem_limit()); + if (config::memory_mode == std::string("performance")) { - release_rate = 1.0; - max_cache_percent = 1000; - pressure_limit = 0.9; + max_cache_percent = 100; + pressure_limit = 90; + is_performance_mode = true; + physical_limit_bytes = std::min(MemInfo::mem_limit(), MemInfo::physical_mem()); } else if (config::memory_mode == std::string("compact")) { - release_rate = 20.0; max_cache_percent = 20; - pressure_limit = 0.8; + pressure_limit = 80; } - MallocExtension::instance()->SetMemoryReleaseRate(release_rate); - size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(), MemInfo::mem_limit()); - int last_seconds = 0; + int last_ms = 0; + const int kMaxLastMs = 30000; + const int kIntervalMs = 10; + size_t init_aggressive_decommit = 0; + size_t current_aggressive_decommit = 0; + size_t expected_aggressive_decommit = 0; + int64_t last_memory_pressure = 0; - while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(1))) { + MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit", + &init_aggressive_decommit); + current_aggressive_decommit = init_aggressive_decommit; + + while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(kIntervalMs))) { size_t tc_used_bytes = 0; size_t tc_alloc_bytes = 0; size_t rss = PerfCounters::get_vm_rss(); @@ -105,46 +121,65 @@ void Daemon::tcmalloc_gc_thread() { int64_t tc_cached_bytes = tc_alloc_bytes - tc_used_bytes; int64_t to_free_bytes = (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent / 100); - double memory_pressure = 0; + int64_t memory_pressure = 0; int64_t alloc_bytes = std::max(rss, tc_alloc_bytes); - memory_pressure = (double)alloc_bytes / physical_limit_bytes; - - LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes - << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss - << ", max_cache_percent " << max_cache_percent << ", release_rate " << release_rate - << ", memory_pressure " << memory_pressure << ", physical_limit_bytes " - << physical_limit_bytes; + memory_pressure = alloc_bytes * 100 / physical_limit_bytes; + expected_aggressive_decommit = init_aggressive_decommit; if (memory_pressure > pressure_limit) { // We are reaching oom, so release cache aggressively. // Ideally, we should reuse cache and not allocate from system any more, // however, it is hard to set limit on cache of tcmalloc and doris // use mmap in vectorized mode. - int64_t min_free_bytes = alloc_bytes - physical_limit_bytes + (int64_t)2 * 1024 * 1024 * 1024; - to_free_bytes = std::max(to_free_bytes, min_free_bytes); - LOG(INFO) << "release bytes " << to_free_bytes; - MallocExtension::instance()->SetMemoryReleaseRate(100.0); - last_seconds = 5; - } else if (memory_pressure >= (pressure_limit - 0.05)) { - to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 5 / 100); - MallocExtension::instance()->SetMemoryReleaseRate(50.0); - last_seconds = 5; - } else if (memory_pressure >= 0.7 && release_rate < 10.0) { - MallocExtension::instance()->SetMemoryReleaseRate(10.0); - } else if (memory_pressure >= 0.6 && release_rate < 5.0) { - MallocExtension::instance()->SetMemoryReleaseRate(5.0); + if (last_memory_pressure <= pressure_limit) { + int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 9 / 10; + to_free_bytes = std::max(to_free_bytes, min_free_bytes); + to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 100); + to_free_bytes = std::min(to_free_bytes, tc_cached_bytes); + expected_aggressive_decommit = 1; + } else { + // release rate is enough. + to_free_bytes = 0; + } + last_ms = kMaxLastMs; + } else if (memory_pressure > (pressure_limit - 10)) { + if (last_memory_pressure <= (pressure_limit - 10)) { + to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 / 100); + } else { + to_free_bytes = 0; + } + } + + int release_rate_index = memory_pressure / 10; + double release_rate = 1.0; + if (release_rate_index >= sizeof(release_rates)) { + release_rate = 2000.0; } else { - MallocExtension::instance()->SetMemoryReleaseRate(release_rate); + release_rate = release_rates[release_rate_index]; } + MallocExtension::instance()->SetMemoryReleaseRate(release_rate); + + if ((current_aggressive_decommit != expected_aggressive_decommit) && !is_performance_mode) { + MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit", + expected_aggressive_decommit); + current_aggressive_decommit = expected_aggressive_decommit; + } + + last_memory_pressure = memory_pressure; if (to_free_bytes > 0) { - last_seconds += 1; - if (last_seconds >= 30) { - LOG(INFO) << "try to release cache of tcmalloc, bytes " << to_free_bytes; + last_ms += kIntervalMs; + if (last_ms >= kMaxLastMs) { + LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes + << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss + << ", max_cache_percent " << max_cache_percent << ", release_rate " << release_rate + << ", memory_pressure " << memory_pressure << ", physical_limit_bytes " + << physical_limit_bytes << ", to_free_bytes " << to_free_bytes + << ", current_aggressive_decommit " << current_aggressive_decommit; MallocExtension::instance()->ReleaseToSystem(to_free_bytes); - last_seconds = 0; + last_ms = 0; } } else { - last_seconds = 0; + last_ms = 0; } } #endif diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 9d40acbdd5df88..c1ef640c7b5df7 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -41,6 +41,7 @@ struct TrackerLimiterGroup { static std::vector mem_tracker_limiter_pool(1000); std::atomic MemTrackerLimiter::_enable_print_log_process_usage {true}; +bool MemTrackerLimiter::_oom_avoidance {true}; MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_t byte_limit, RuntimeProfile* profile) { diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 3c8876f40847db..8685cdf9532ce4 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -79,6 +79,9 @@ class MemTrackerLimiter final : public MemTracker { ~MemTrackerLimiter(); static bool sys_mem_exceed_limit_check(int64_t bytes) { + if (!_oom_avoidance) { + return false; + } // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`. // This is independent of the consumption value of the mem tracker, which counts the virtual memory // of the process malloc. @@ -109,6 +112,8 @@ class MemTrackerLimiter final : public MemTracker { // this tracker limiter. int64_t spare_capacity() const { return _limit - consumption(); } + static void disable_oom_avoidance() { _oom_avoidance = false; } + public: // If need to consume the tracker frequently, use it void cache_consume(int64_t bytes); @@ -208,6 +213,7 @@ class MemTrackerLimiter final : public MemTracker { // Avoid frequent printing. bool _enable_print_log_usage = false; static std::atomic _enable_print_log_process_usage; + static bool _oom_avoidance; // Iterator into mem_tracker_limiter_pool for this object. Stored to have O(1) remove. std::list::iterator _tracker_limiter_group_it; diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index f43d95eaa9a8dd..d9bae539049861 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -323,21 +323,18 @@ int main(int argc, char** argv) { #if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \ !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC) // Change the total TCMalloc thread cache size if necessary. - size_t total_thread_cache_bytes; - if (!MallocExtension::instance()->GetNumericProperty("tcmalloc.max_total_thread_cache_bytes", - &total_thread_cache_bytes)) { - fprintf(stderr, "Failed to get TCMalloc total thread cache size.\n"); - } const size_t kDefaultTotalThreadCacheBytes = 1024 * 1024 * 1024; - if (total_thread_cache_bytes < kDefaultTotalThreadCacheBytes) { - if (!MallocExtension::instance()->SetNumericProperty( - "tcmalloc.max_total_thread_cache_bytes", kDefaultTotalThreadCacheBytes)) { - fprintf(stderr, "Failed to change TCMalloc total thread cache size.\n"); - return -1; - } + if (!MallocExtension::instance()->SetNumericProperty( + "tcmalloc.max_total_thread_cache_bytes", kDefaultTotalThreadCacheBytes)) { + fprintf(stderr, "Failed to change TCMalloc total thread cache size.\n"); + return -1; } #endif + if (doris::config::memory_mode == std::string("performance")) { + doris::MemTrackerLimiter::disable_oom_avoidance(); + } + std::vector paths; auto olap_res = doris::parse_conf_store_paths(doris::config::storage_root_path, &paths); if (!olap_res) { From 8eddc101fc70d4564932c87d09193343c607e632 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Wed, 23 Nov 2022 06:13:59 +0000 Subject: [PATCH 08/12] fix --- be/src/common/daemon.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 16a81243d02d51..6002d1ee5ed529 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -73,14 +73,7 @@ void Daemon::tcmalloc_gc_thread() { !defined(USE_JEMALLOC) // Limit size of tcmalloc cache via release_rate and max_cache_percent. - // performance: release_rate = 1.0 and max_cache_percent = 1000; - // compact: release_rate = 20.0 and max_cache_percent = 20; - // moderate: release_rate = 5.0 and max_cache_percent =40; - // mem_pressure [0, 0.4], release_rate=1.0 - // mem_pressure [0.4, 0.5], release_rate=5.0 - // mem_pressure [0.5, 0.6], release_rate=10.0 - // mem_pressure [0.6, 0.7], release_rate=30.0 - // mem_pressure [0.7, 0.8], release_rate=30.0 + // We adjust release_rate according to memory_pressure, which is usage percent of memory. int64_t max_cache_percent = 60; double release_rates[10] = { 1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0 }; int64_t pressure_limit = 90; From 34865ace95e706a9c41dbc8b13ff186ae1c33b1f Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 24 Nov 2022 06:29:58 +0000 Subject: [PATCH 09/12] format --- be/src/common/daemon.cpp | 26 ++++++++++++++------------ be/src/service/doris_main.cpp | 4 ++-- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 6002d1ee5ed529..e19b09cc773f21 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -75,7 +75,7 @@ void Daemon::tcmalloc_gc_thread() { // Limit size of tcmalloc cache via release_rate and max_cache_percent. // We adjust release_rate according to memory_pressure, which is usage percent of memory. int64_t max_cache_percent = 60; - double release_rates[10] = { 1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0 }; + double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0}; int64_t pressure_limit = 90; bool is_performance_mode = false; size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(), MemInfo::mem_limit()); @@ -96,7 +96,7 @@ void Daemon::tcmalloc_gc_thread() { size_t init_aggressive_decommit = 0; size_t current_aggressive_decommit = 0; size_t expected_aggressive_decommit = 0; - int64_t last_memory_pressure = 0; + int64_t last_memory_pressure = 0; MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit", &init_aggressive_decommit); @@ -112,11 +112,12 @@ void Daemon::tcmalloc_gc_thread() { MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes", &tc_used_bytes); int64_t tc_cached_bytes = tc_alloc_bytes - tc_used_bytes; - int64_t to_free_bytes = (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent / 100); + int64_t to_free_bytes = + (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent / 100); - int64_t memory_pressure = 0; + int64_t memory_pressure = 0; int64_t alloc_bytes = std::max(rss, tc_alloc_bytes); - memory_pressure = alloc_bytes * 100 / physical_limit_bytes; + memory_pressure = alloc_bytes * 100 / physical_limit_bytes; expected_aggressive_decommit = init_aggressive_decommit; if (memory_pressure > pressure_limit) { @@ -124,7 +125,7 @@ void Daemon::tcmalloc_gc_thread() { // Ideally, we should reuse cache and not allocate from system any more, // however, it is hard to set limit on cache of tcmalloc and doris // use mmap in vectorized mode. - if (last_memory_pressure <= pressure_limit) { + if (last_memory_pressure <= pressure_limit) { int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 9 / 10; to_free_bytes = std::max(to_free_bytes, min_free_bytes); to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 100); @@ -146,9 +147,9 @@ void Daemon::tcmalloc_gc_thread() { int release_rate_index = memory_pressure / 10; double release_rate = 1.0; if (release_rate_index >= sizeof(release_rates)) { - release_rate = 2000.0; + release_rate = 2000.0; } else { - release_rate = release_rates[release_rate_index]; + release_rate = release_rates[release_rate_index]; } MallocExtension::instance()->SetMemoryReleaseRate(release_rate); @@ -164,10 +165,11 @@ void Daemon::tcmalloc_gc_thread() { if (last_ms >= kMaxLastMs) { LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss - << ", max_cache_percent " << max_cache_percent << ", release_rate " << release_rate - << ", memory_pressure " << memory_pressure << ", physical_limit_bytes " - << physical_limit_bytes << ", to_free_bytes " << to_free_bytes - << ", current_aggressive_decommit " << current_aggressive_decommit; + << ", max_cache_percent " << max_cache_percent << ", release_rate " + << release_rate << ", memory_pressure " << memory_pressure + << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes " + << to_free_bytes << ", current_aggressive_decommit " + << current_aggressive_decommit; MallocExtension::instance()->ReleaseToSystem(to_free_bytes); last_ms = 0; } diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index d9bae539049861..793fd4d4c828f6 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -324,8 +324,8 @@ int main(int argc, char** argv) { !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC) // Change the total TCMalloc thread cache size if necessary. const size_t kDefaultTotalThreadCacheBytes = 1024 * 1024 * 1024; - if (!MallocExtension::instance()->SetNumericProperty( - "tcmalloc.max_total_thread_cache_bytes", kDefaultTotalThreadCacheBytes)) { + if (!MallocExtension::instance()->SetNumericProperty("tcmalloc.max_total_thread_cache_bytes", + kDefaultTotalThreadCacheBytes)) { fprintf(stderr, "Failed to change TCMalloc total thread cache size.\n"); return -1; } From 5229c1ccf17520d0fc14771f514bf20477d6f3b2 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 24 Nov 2022 12:37:05 +0000 Subject: [PATCH 10/12] format --- be/src/common/daemon.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index e19b09cc773f21..1112b7622800c1 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -164,12 +164,12 @@ void Daemon::tcmalloc_gc_thread() { last_ms += kIntervalMs; if (last_ms >= kMaxLastMs) { LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes - << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss - << ", max_cache_percent " << max_cache_percent << ", release_rate " - << release_rate << ", memory_pressure " << memory_pressure - << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes " - << to_free_bytes << ", current_aggressive_decommit " - << current_aggressive_decommit; + << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss + << ", max_cache_percent " << max_cache_percent << ", release_rate " + << release_rate << ", memory_pressure " << memory_pressure + << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes " + << to_free_bytes << ", current_aggressive_decommit " + << current_aggressive_decommit; MallocExtension::instance()->ReleaseToSystem(to_free_bytes); last_ms = 0; } From 25d3f171fb89c2d5ae98aa1e4c7638ae71983505 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Thu, 24 Nov 2022 13:42:47 +0000 Subject: [PATCH 11/12] format --- be/src/common/daemon.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 1112b7622800c1..fcf96e6ee23ff0 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -163,13 +163,13 @@ void Daemon::tcmalloc_gc_thread() { if (to_free_bytes > 0) { last_ms += kIntervalMs; if (last_ms >= kMaxLastMs) { - LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes - << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss - << ", max_cache_percent " << max_cache_percent << ", release_rate " - << release_rate << ", memory_pressure " << memory_pressure - << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes " - << to_free_bytes << ", current_aggressive_decommit " - << current_aggressive_decommit; + LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes + << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss + << ", max_cache_percent " << max_cache_percent << ", release_rate " + << release_rate << ", memory_pressure " << memory_pressure + << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes " + << to_free_bytes << ", current_aggressive_decommit " + << current_aggressive_decommit; MallocExtension::instance()->ReleaseToSystem(to_free_bytes); last_ms = 0; } From 3f3fa1bb254ab1204d298f3d02e8c0c2771e0c6b Mon Sep 17 00:00:00 2001 From: Yongqiang YANG Date: Mon, 28 Nov 2022 03:58:15 +0000 Subject: [PATCH 12/12] format --- be/src/common/daemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index fcf96e6ee23ff0..9a828b3d7a6990 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -164,7 +164,7 @@ void Daemon::tcmalloc_gc_thread() { last_ms += kIntervalMs; if (last_ms >= kMaxLastMs) { LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes - << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss + << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss << ", max_cache_percent " << max_cache_percent << ", release_rate " << release_rate << ", memory_pressure " << memory_pressure << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes "