From c6d083a2e6beba9319e07b983b597aef3889ac2d Mon Sep 17 00:00:00 2001 From: yiguolei Date: Tue, 11 Oct 2022 15:08:43 +0800 Subject: [PATCH 1/6] [improvement](memory) disable page cache and chunk allocator, optimize memory allocate size --- be/src/common/config.h | 14 +++-- be/src/runtime/exec_env_init.cpp | 14 ++--- be/src/runtime/mem_pool.cpp | 5 +- be/src/runtime/mem_pool.h | 6 +-- be/src/runtime/memory/chunk_allocator.cpp | 62 ++++++++++++----------- be/src/util/bit_util.h | 10 +++- be/src/vec/common/arena.h | 11 ++-- be/src/vec/common/pod_array.h | 31 ++++++++---- 8 files changed, 89 insertions(+), 64 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 021f5cd680107a..0b8ac4af9f0048 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -239,7 +239,7 @@ CONF_Int32(storage_page_cache_shard_size, "16"); // all storage page cache will be divided into data_page_cache and index_page_cache CONF_Int32(index_page_cache_percentage, "10"); // whether to disable page cache feature in storage -CONF_Bool(disable_storage_page_cache, "false"); +CONF_Bool(disable_storage_page_cache, "true"); CONF_Bool(enable_storage_vectorization, "true"); @@ -439,14 +439,20 @@ CONF_Bool(disable_mem_pools, "false"); // increase this variable can improve performance, // but will acquire more free memory which can not be used by other modules. CONF_mString(chunk_reserved_bytes_limit, "10%"); -// 1024, The minimum chunk allocator size (in bytes) -CONF_Int32(min_chunk_reserved_bytes, "1024"); + +// Whether using chunk allocator to cache memory chunk +CONF_Bool(disable_chunk_allocator, "true"); // Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache. // For high concurrent queries, using Chunk Allocator with vectorized Allocator can reduce the impact // of gperftools tcmalloc central lock. // Jemalloc or google tcmalloc have core cache, Chunk Allocator may no longer be needed after replacing // gperftools tcmalloc. -CONF_mBool(disable_chunk_allocator_in_vec, "false"); +CONF_mBool(disable_chunk_allocator_in_vec, "true"); + +// Both MemPool and vectorized engine's podarray allocator, vectorized engine's arena will try to allocate memory as power of two. +// But if the memory is very large then power of two is also very large. This config means if the allocated memory's size is larger +// than this limit then all allocators will not use RoundUpToPowerOfTwo to allocate memory. +CONF_mInt64(memory_linear_growth_threshold, "134217728"); // 128Mb // The probing algorithm of partitioned hash table. // Enable quadratic probing hash table diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index a55f5477782980..ca342f1f5e9911 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -185,6 +185,8 @@ Status ExecEnv::_init(const std::vector& store_paths) { } Status ExecEnv::_init_mem_tracker() { + LOG(INFO) << "Physical memory is: " + << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES); // 1. init global memory limit. int64_t global_memory_limit_bytes = 0; bool is_percent = false; @@ -199,9 +201,7 @@ Status ExecEnv::_init_mem_tracker() { if (global_memory_limit_bytes > MemInfo::physical_mem()) { LOG(WARNING) << "Memory limit " << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) - << " exceeds physical memory of " - << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) - << ". Using physical memory instead"; + << " exceeds physical memory, using physical memory instead"; global_memory_limit_bytes = MemInfo::physical_mem(); } _process_mem_tracker = @@ -308,12 +308,6 @@ Status ExecEnv::_init_mem_tracker() { RETURN_IF_ERROR(_tmp_file_mgr->init()); // 5. init chunk allocator - if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) { - ss << "Config min_chunk_reserved_bytes must be a power-of-two: " - << config::min_chunk_reserved_bytes; - return Status::InternalError(ss.str()); - } - int64_t chunk_reserved_bytes_limit = ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, MemInfo::physical_mem(), &is_percent); @@ -323,8 +317,6 @@ Status ExecEnv::_init_mem_tracker() { << config::chunk_reserved_bytes_limit; return Status::InternalError(ss.str()); } - chunk_reserved_bytes_limit = - BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes); ChunkAllocator::init_instance(chunk_reserved_bytes_limit); LOG(INFO) << "Chunk allocator memory limit: " << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES) diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index c2b709162c2929..f53dd4746c2a32 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -131,8 +131,9 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) { DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE); chunk_size = std::max(min_size, next_chunk_size_); } - - chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); + if (chunk_size < config::memory_linear_growth_threshold) { + chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); + } if (check_limits && !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( chunk_size)) { diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h index 41240ab375834d..a1db3e6c5dbcdb 100644 --- a/be/src/runtime/mem_pool.h +++ b/be/src/runtime/mem_pool.h @@ -231,9 +231,9 @@ class MemPool { // I refers to https://github.com/mcgov/asan_alignment_example. ChunkInfo& info = chunks_[current_chunk_idx_]; - int64_t aligned_allocated_bytes = - BitUtil::RoundUpToPowerOf2(info.allocated_bytes + DEFAULT_PADDING_SIZE, alignment); - if (aligned_allocated_bytes + size <= info.chunk.size) { + int64_t aligned_allocated_bytes = BitUtil::RoundUpToMultiplyOfFactor( + info.allocated_bytes + DEFAULT_PADDING_SIZE, alignment); + if (aligned_allocated_bytes + size + DEFAULT_PADDING_SIZE <= info.chunk.size) { // Ensure the requested alignment is respected. int64_t padding = aligned_allocated_bytes - info.allocated_bytes; uint8_t* result = info.chunk.data + aligned_allocated_bytes; diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 43acc79538182f..469e5ef7ab198e 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -154,35 +154,37 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { CHECK((size > 0 && (size & (size - 1)) == 0)); - // fast path: allocate from current core arena - int core_id = CpuInfo::get_current_core(); - chunk->size = size; - chunk->core_id = core_id; - - if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { - DCHECK_GE(_reserved_bytes, 0); - _reserved_bytes.fetch_sub(size); - chunk_pool_local_core_alloc_count->increment(1); - // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. - THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); - return Status::OK(); - } - // Second path: try to allocate from other core's arena - // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. - // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. - // After that, allocate from current core arena as much as possible. - if (_reserved_bytes > _steal_arena_limit) { - ++core_id; - for (int i = 1; i < _arenas.size(); ++i, ++core_id) { - if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { - DCHECK_GE(_reserved_bytes, 0); - _reserved_bytes.fetch_sub(size); - chunk_pool_other_core_alloc_count->increment(1); - // reset chunk's core_id to other - chunk->core_id = core_id % _arenas.size(); - // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. - THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); - return Status::OK(); + if (!config::disable_chunk_allocator) { + // fast path: allocate from current core arena + int core_id = CpuInfo::get_current_core(); + chunk->size = size; + chunk->core_id = core_id; + + if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); + _reserved_bytes.fetch_sub(size); + chunk_pool_local_core_alloc_count->increment(1); + // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. + THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); + return Status::OK(); + } + // Second path: try to allocate from other core's arena + // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. + // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. + // After that, allocate from current core arena as much as possible. + if (_reserved_bytes > _steal_arena_limit) { + ++core_id; + for (int i = 1; i < _arenas.size(); ++i, ++core_id) { + if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { + DCHECK_GE(_reserved_bytes, 0); + _reserved_bytes.fetch_sub(size); + chunk_pool_other_core_alloc_count->increment(1); + // reset chunk's core_id to other + chunk->core_id = core_id % _arenas.size(); + // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. + THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); + return Status::OK(); + } } } } @@ -204,7 +206,7 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { void ChunkAllocator::free(const Chunk& chunk) { DCHECK(chunk.core_id != -1); CHECK((chunk.size & (chunk.size - 1)) == 0); - if (config::disable_mem_pools) { + if (config::disable_chunk_allocator) { SystemAllocator::free(chunk.data, chunk.size); return; } diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h index 28534b139b6d9b..f68586df64754a 100644 --- a/be/src/util/bit_util.h +++ b/be/src/util/bit_util.h @@ -43,6 +43,8 @@ class BitUtil { return value / divisor + (value % divisor != 0); } + static inline size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; } + // Returns 'value' rounded up to the nearest multiple of 'factor' static inline int64_t round_up(int64_t value, int64_t factor) { return (value + (factor - 1)) / factor * factor; @@ -304,8 +306,12 @@ class BitUtil { } /// Returns 'value' rounded up to the nearest multiple of 'factor' when factor is - /// a power of two - static inline int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) { + /// a power of two, for example + /// Factor has to be a power of two + /// factor = 16, value = 10 --> result = 16 + /// factor = 16, value = 17 --> result = 32 + /// factor = 16, value = 33 --> result = 48 + static inline int64_t RoundUpToMultiplyOfFactor(int64_t value, int64_t factor) { DCHECK((factor > 0) && ((factor & (factor - 1)) == 0)); return (value + (factor - 1)) & ~(factor - 1); } diff --git a/be/src/vec/common/arena.h b/be/src/vec/common/arena.h index e136bae1438fe3..8042d5618dd097 100644 --- a/be/src/vec/common/arena.h +++ b/be/src/vec/common/arena.h @@ -127,11 +127,16 @@ class Arena : private boost::noncopyable { public: Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, - size_t linear_growth_threshold_ = 128 * 1024 * 1024) + size_t linear_growth_threshold_ = -1) : growth_factor(growth_factor_), - linear_growth_threshold(linear_growth_threshold_), head(new Chunk(initial_size_, nullptr)), - size_in_bytes(head->size()) {} + size_in_bytes(head->size()) { + if (linear_growth_threshold_ < 0) { + linear_growth_threshold = config::memory_linear_growth_threshold; + } else { + linear_growth_threshold = linear_growth_threshold_; + } + } ~Arena() { delete head; } diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index 0b979fd6a8d28f..2dcd0a1689fa40 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -30,6 +30,8 @@ #include #include +#include "common/config.h" +#include "util/bit_util.h" #include "vec/common/allocator.h" #include "vec/common/bit_helpers.h" #include "vec/common/memcpy_small.h" @@ -120,8 +122,16 @@ class PODArrayBase : private boost::noncopyable, } } + inline size_t round_up_memory_size(size_t required_capacity) { + if (required_capacity > config::memory_linear_growth_threshold) { + return BitUtil::round_up_to_page_size(required_capacity); + } else { + return round_up_to_power_of_two_or_zero(required_capacity); + } + } + void alloc_for_num_elements(size_t num_elements) { - alloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(num_elements))); + alloc(round_up_memory_size(minimum_memory_for_elements(num_elements))); } template @@ -189,8 +199,10 @@ class PODArrayBase : private boost::noncopyable, realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), minimum_memory_for_elements(1)), std::forward(allocator_params)...); - } else + } else { + // There is still a power of 2 expansion here, this method is used in push back method realloc(allocated_bytes() * 2, std::forward(allocator_params)...); + } } #ifndef NDEBUG @@ -228,9 +240,10 @@ class PODArrayBase : private boost::noncopyable, template void reserve(size_t n, TAllocatorParams&&... allocator_params) { - if (n > capacity()) - realloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(n)), + if (n > capacity()) { + realloc(round_up_memory_size(minimum_memory_for_elements(n)), std::forward(allocator_params)...); + } } template @@ -444,9 +457,10 @@ class PODArray : public PODArrayBase void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { size_t required_capacity = this->size() + (from_end - from_begin); - if (required_capacity > this->capacity()) - this->reserve(round_up_to_power_of_two_or_zero(required_capacity), - std::forward(allocator_params)...); + if (required_capacity > this->capacity()) { + // Reserve function will try to allocate power of two memory size, so that not need expand it here + this->reserve(required_capacity, std::forward(allocator_params)...); + } } /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. @@ -623,8 +637,7 @@ class PODArray : public PODArrayBase void assign(It1 from_begin, It2 from_end) { size_t required_capacity = from_end - from_begin; - if (required_capacity > this->capacity()) - this->reserve(round_up_to_power_of_two_or_zero(required_capacity)); + if (required_capacity > this->capacity()) this->reserve(required_capacity); size_t bytes_to_copy = this->byte_size(required_capacity); memcpy(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); From 2ced248bafa7bb94e3aba1d802472fa6a245a6e0 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Wed, 12 Oct 2022 15:56:14 +0800 Subject: [PATCH 2/6] fix bugs --- be/src/vec/common/pod_array.h | 86 ++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index 2dcd0a1689fa40..79c890a9b9d594 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -122,16 +122,9 @@ class PODArrayBase : private boost::noncopyable, } } - inline size_t round_up_memory_size(size_t required_capacity) { - if (required_capacity > config::memory_linear_growth_threshold) { - return BitUtil::round_up_to_page_size(required_capacity); - } else { - return round_up_to_power_of_two_or_zero(required_capacity); - } - } - + /// Not round up, keep the size just as the application pass in like std::vector void alloc_for_num_elements(size_t num_elements) { - alloc(round_up_memory_size(minimum_memory_for_elements(num_elements))); + alloc(minimum_memory_for_elements(num_elements)); } template @@ -191,6 +184,7 @@ class PODArrayBase : private boost::noncopyable, return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold); } + /// This method is called by push back or emplace back, this is the same behaviour with std::vector template void reserve_for_next_size(TAllocatorParams&&... allocator_params) { if (size() == 0) { @@ -240,50 +234,54 @@ class PODArrayBase : private boost::noncopyable, template void reserve(size_t n, TAllocatorParams&&... allocator_params) { - if (n > capacity()) { - realloc(round_up_memory_size(minimum_memory_for_elements(n)), + if (n > capacity()) + realloc(minimum_memory_for_elements(n), std::forward(allocator_params)...); - } } +} - template - void resize(size_t n, TAllocatorParams&&... allocator_params) { - reserve(n, std::forward(allocator_params)...); - resize_assume_reserved(n); - } +template +void resize(size_t n, TAllocatorParams&&... allocator_params) { + reserve(n, std::forward(allocator_params)...); + resize_assume_reserved(n); +} - void resize_assume_reserved(const size_t n) { - c_end = c_start + byte_size(n); - reset_peak(); - } +void resize_assume_reserved(const size_t n) { + c_end = c_start + byte_size(n); + reset_peak(); +} - const char* raw_data() const { return c_start; } +const char* raw_data() const { + return c_start; +} - template - void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) { - if (UNLIKELY(c_end == c_end_of_storage)) - reserve_for_next_size(std::forward(allocator_params)...); +template +void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) { + if (UNLIKELY(c_end == c_end_of_storage)) + reserve_for_next_size(std::forward(allocator_params)...); - memcpy(c_end, ptr, ELEMENT_SIZE); - c_end += byte_size(1); - reset_peak(); - } + memcpy(c_end, ptr, ELEMENT_SIZE); + c_end += byte_size(1); + reset_peak(); +} - void protect() { +void protect() { #ifndef NDEBUG - protect_impl(PROT_READ); - mprotected = true; + protect_impl(PROT_READ); + mprotected = true; #endif - } +} - void unprotect() { +void unprotect() { #ifndef NDEBUG - if (mprotected) protect_impl(PROT_WRITE); - mprotected = false; + if (mprotected) protect_impl(PROT_WRITE); + mprotected = false; #endif - } +} - ~PODArrayBase() { dealloc(); } +~PODArrayBase() { + dealloc(); +} }; template size() + (from_end - from_begin); if (required_capacity > this->capacity()) { - // Reserve function will try to allocate power of two memory size, so that not need expand it here - this->reserve(required_capacity, std::forward(allocator_params)...); + // std::vector's insert method will expand if required capactiy is larger than current + this->reserve(round_up_to_power_of_two_or_zero(required_capacity), + std::forward(allocator_params)...); } } @@ -637,7 +636,10 @@ class PODArray : public PODArrayBase void assign(It1 from_begin, It2 from_end) { size_t required_capacity = from_end - from_begin; - if (required_capacity > this->capacity()) this->reserve(required_capacity); + if (required_capacity > this->capacity()) { + // std::vector assign just expand the capacity to the required capacity + this->reserve(required_capacity); + } size_t bytes_to_copy = this->byte_size(required_capacity); memcpy(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); From 5e1cc77a18e55070d4bd43a7479e132c4152c9de Mon Sep 17 00:00:00 2001 From: yiguolei Date: Wed, 12 Oct 2022 18:07:49 +0800 Subject: [PATCH 3/6] fix bugs --- be/src/vec/common/pod_array.h | 59 ++++++++++++++++------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h index 79c890a9b9d594..6d844340efc694 100644 --- a/be/src/vec/common/pod_array.h +++ b/be/src/vec/common/pod_array.h @@ -238,50 +238,45 @@ class PODArrayBase : private boost::noncopyable, realloc(minimum_memory_for_elements(n), std::forward(allocator_params)...); } -} -template -void resize(size_t n, TAllocatorParams&&... allocator_params) { - reserve(n, std::forward(allocator_params)...); - resize_assume_reserved(n); -} + template + void resize(size_t n, TAllocatorParams&&... allocator_params) { + reserve(n, std::forward(allocator_params)...); + resize_assume_reserved(n); + } -void resize_assume_reserved(const size_t n) { - c_end = c_start + byte_size(n); - reset_peak(); -} + void resize_assume_reserved(const size_t n) { + c_end = c_start + byte_size(n); + reset_peak(); + } -const char* raw_data() const { - return c_start; -} + const char* raw_data() const { return c_start; } -template -void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) { - if (UNLIKELY(c_end == c_end_of_storage)) - reserve_for_next_size(std::forward(allocator_params)...); + template + void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) { + if (UNLIKELY(c_end == c_end_of_storage)) + reserve_for_next_size(std::forward(allocator_params)...); - memcpy(c_end, ptr, ELEMENT_SIZE); - c_end += byte_size(1); - reset_peak(); -} + memcpy(c_end, ptr, ELEMENT_SIZE); + c_end += byte_size(1); + reset_peak(); + } -void protect() { + void protect() { #ifndef NDEBUG - protect_impl(PROT_READ); - mprotected = true; + protect_impl(PROT_READ); + mprotected = true; #endif -} + } -void unprotect() { + void unprotect() { #ifndef NDEBUG - if (mprotected) protect_impl(PROT_WRITE); - mprotected = false; + if (mprotected) protect_impl(PROT_WRITE); + mprotected = false; #endif -} + } -~PODArrayBase() { - dealloc(); -} + ~PODArrayBase() { dealloc(); } }; template Date: Thu, 13 Oct 2022 14:58:42 +0800 Subject: [PATCH 4/6] fix bugs --- be/src/runtime/memory/chunk_allocator.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index 469e5ef7ab198e..09df7e23d4048e 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -154,12 +154,11 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { CHECK((size > 0 && (size & (size - 1)) == 0)); + int core_id = CpuInfo::get_current_core(); + chunk->core_id = core_id; + chunk->size = size; if (!config::disable_chunk_allocator) { // fast path: allocate from current core arena - int core_id = CpuInfo::get_current_core(); - chunk->size = size; - chunk->core_id = core_id; - if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { DCHECK_GE(_reserved_bytes, 0); _reserved_bytes.fetch_sub(size); From 30514964a9b39f0edb668909ac53a081af580713 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Fri, 14 Oct 2022 14:10:44 +0800 Subject: [PATCH 5/6] fix bugs --- be/src/common/config.h | 3 +++ be/src/runtime/exec_env_init.cpp | 10 ++++++++-- be/src/runtime/mem_pool.cpp | 16 ++-------------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 0b8ac4af9f0048..4670e8a4b942e8 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -440,6 +440,9 @@ CONF_Bool(disable_mem_pools, "false"); // but will acquire more free memory which can not be used by other modules. CONF_mString(chunk_reserved_bytes_limit, "10%"); +// 1024, The minimum chunk allocator size (in bytes) +CONF_Int32(min_chunk_reserved_bytes, "1024"); + // Whether using chunk allocator to cache memory chunk CONF_Bool(disable_chunk_allocator, "true"); // Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index ca342f1f5e9911..68532cafef2bbe 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -185,8 +185,6 @@ Status ExecEnv::_init(const std::vector& store_paths) { } Status ExecEnv::_init_mem_tracker() { - LOG(INFO) << "Physical memory is: " - << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES); // 1. init global memory limit. int64_t global_memory_limit_bytes = 0; bool is_percent = false; @@ -308,6 +306,12 @@ Status ExecEnv::_init_mem_tracker() { RETURN_IF_ERROR(_tmp_file_mgr->init()); // 5. init chunk allocator + if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) { + ss << "Config min_chunk_reserved_bytes must be a power-of-two: " + << config::min_chunk_reserved_bytes; + return Status::InternalError(ss.str()); + } + int64_t chunk_reserved_bytes_limit = ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, MemInfo::physical_mem(), &is_percent); @@ -317,6 +321,8 @@ Status ExecEnv::_init_mem_tracker() { << config::chunk_reserved_bytes_limit; return Status::InternalError(ss.str()); } + chunk_reserved_bytes_limit = + BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes); ChunkAllocator::init_instance(chunk_reserved_bytes_limit); LOG(INFO) << "Chunk allocator memory limit: " << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES) diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index f53dd4746c2a32..7e80e7e5b458ab 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -119,21 +119,9 @@ Status MemPool::find_chunk(size_t min_size, bool check_limits) { } // Didn't find a big enough free chunk - need to allocate new chunk. - size_t chunk_size = 0; DCHECK_LE(next_chunk_size_, MAX_CHUNK_SIZE); - - if (config::disable_mem_pools) { - // Disable pooling by sizing the chunk to fit only this allocation. - // Make sure the alignment guarantees are respected. - // This will generate too many small chunks. - chunk_size = std::max(min_size, alignof(max_align_t)); - } else { - DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE); - chunk_size = std::max(min_size, next_chunk_size_); - } - if (chunk_size < config::memory_linear_growth_threshold) { - chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); - } + DCHECK_GE(next_chunk_size_, INITIAL_CHUNK_SIZE); + size_t chunk_size = BitUtil::RoundUpToPowerOfTwo(std::max(min_size, next_chunk_size_)); if (check_limits && !thread_context()->_thread_mem_tracker_mgr->limiter_mem_tracker_raw()->check_limit( chunk_size)) { From f8ad6c0eaf1b96549b16587401da34427f5fa440 Mon Sep 17 00:00:00 2001 From: yiguolei Date: Sat, 15 Oct 2022 10:29:52 +0800 Subject: [PATCH 6/6] fix bugs --- be/src/common/config.h | 3 --- be/src/runtime/exec_env_init.cpp | 11 ++--------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index 4670e8a4b942e8..0b8ac4af9f0048 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -440,9 +440,6 @@ CONF_Bool(disable_mem_pools, "false"); // but will acquire more free memory which can not be used by other modules. CONF_mString(chunk_reserved_bytes_limit, "10%"); -// 1024, The minimum chunk allocator size (in bytes) -CONF_Int32(min_chunk_reserved_bytes, "1024"); - // Whether using chunk allocator to cache memory chunk CONF_Bool(disable_chunk_allocator, "true"); // Disable Chunk Allocator in Vectorized Allocator, this will reduce memory cache. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 68532cafef2bbe..327a0307e75f9b 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -305,13 +305,6 @@ Status ExecEnv::_init_mem_tracker() { RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); RETURN_IF_ERROR(_tmp_file_mgr->init()); - // 5. init chunk allocator - if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) { - ss << "Config min_chunk_reserved_bytes must be a power-of-two: " - << config::min_chunk_reserved_bytes; - return Status::InternalError(ss.str()); - } - int64_t chunk_reserved_bytes_limit = ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, MemInfo::physical_mem(), &is_percent); @@ -321,8 +314,8 @@ Status ExecEnv::_init_mem_tracker() { << config::chunk_reserved_bytes_limit; return Status::InternalError(ss.str()); } - chunk_reserved_bytes_limit = - BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes); + // Has to round to multiple of page size(4096 bytes), chunk allocator will also check this + chunk_reserved_bytes_limit = BitUtil::RoundDown(chunk_reserved_bytes_limit, 4096); ChunkAllocator::init_instance(chunk_reserved_bytes_limit); LOG(INFO) << "Chunk allocator memory limit: " << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES)