-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[improvement](memory) disable page cache and chunk allocator, optimize memory allocate size #13285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c6d083a
2ced248
5e1cc77
afe8c8d
3051496
f8ad6c0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -199,9 +199,7 @@ Status ExecEnv::_init_mem_tracker() { | |
| if (global_memory_limit_bytes > MemInfo::physical_mem()) { | ||
| LOG(WARNING) << "Memory limit " | ||
| << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) | ||
| << " exceeds physical memory of " | ||
| << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) | ||
| << ". Using physical memory instead"; | ||
xinyiZzz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| << " exceeds physical memory, using physical memory instead"; | ||
| global_memory_limit_bytes = MemInfo::physical_mem(); | ||
| } | ||
| _process_mem_tracker = | ||
|
|
@@ -307,13 +305,6 @@ Status ExecEnv::_init_mem_tracker() { | |
| RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); | ||
| RETURN_IF_ERROR(_tmp_file_mgr->init()); | ||
|
|
||
| // 5. init chunk allocator | ||
| if (!BitUtil::IsPowerOf2(config::min_chunk_reserved_bytes)) { | ||
| ss << "Config min_chunk_reserved_bytes must be a power-of-two: " | ||
| << config::min_chunk_reserved_bytes; | ||
| return Status::InternalError(ss.str()); | ||
| } | ||
|
|
||
| int64_t chunk_reserved_bytes_limit = | ||
| ParseUtil::parse_mem_spec(config::chunk_reserved_bytes_limit, global_memory_limit_bytes, | ||
| MemInfo::physical_mem(), &is_percent); | ||
|
|
@@ -323,8 +314,8 @@ Status ExecEnv::_init_mem_tracker() { | |
| << config::chunk_reserved_bytes_limit; | ||
| return Status::InternalError(ss.str()); | ||
| } | ||
| chunk_reserved_bytes_limit = | ||
| BitUtil::RoundDown(chunk_reserved_bytes_limit, config::min_chunk_reserved_bytes); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The 4096 is the minimum chunk size currently allocated by the chunk allocator A separate conf
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes ... I will move back min_chunk_reserved_bytes
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can remove min_chunk_reserved_bytes, const 4096 is fine, the user will not modify it
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I remove it. |
||
| // Has to round to multiple of page size(4096 bytes), chunk allocator will also check this | ||
| chunk_reserved_bytes_limit = BitUtil::RoundDown(chunk_reserved_bytes_limit, 4096); | ||
| ChunkAllocator::init_instance(chunk_reserved_bytes_limit); | ||
| LOG(INFO) << "Chunk allocator memory limit: " | ||
| << PrettyPrinter::print(chunk_reserved_bytes_limit, TUnit::BYTES) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -154,35 +154,36 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) | |
| Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { | ||
| CHECK((size > 0 && (size & (size - 1)) == 0)); | ||
|
|
||
| // fast path: allocate from current core arena | ||
| int core_id = CpuInfo::get_current_core(); | ||
| chunk->size = size; | ||
| chunk->core_id = core_id; | ||
|
|
||
| if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { | ||
| DCHECK_GE(_reserved_bytes, 0); | ||
| _reserved_bytes.fetch_sub(size); | ||
| chunk_pool_local_core_alloc_count->increment(1); | ||
| // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. | ||
| THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); | ||
| return Status::OK(); | ||
| } | ||
| // Second path: try to allocate from other core's arena | ||
| // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. | ||
| // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. | ||
| // After that, allocate from current core arena as much as possible. | ||
| if (_reserved_bytes > _steal_arena_limit) { | ||
| ++core_id; | ||
| for (int i = 1; i < _arenas.size(); ++i, ++core_id) { | ||
| if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { | ||
| DCHECK_GE(_reserved_bytes, 0); | ||
| _reserved_bytes.fetch_sub(size); | ||
| chunk_pool_other_core_alloc_count->increment(1); | ||
| // reset chunk's core_id to other | ||
| chunk->core_id = core_id % _arenas.size(); | ||
| // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. | ||
| THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); | ||
| return Status::OK(); | ||
| chunk->size = size; | ||
| if (!config::disable_chunk_allocator) { | ||
| // fast path: allocate from current core arena | ||
| if (_arenas[core_id]->pop_free_chunk(size, &chunk->data)) { | ||
| DCHECK_GE(_reserved_bytes, 0); | ||
| _reserved_bytes.fetch_sub(size); | ||
| chunk_pool_local_core_alloc_count->increment(1); | ||
| // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. | ||
| THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); | ||
| return Status::OK(); | ||
| } | ||
| // Second path: try to allocate from other core's arena | ||
| // When the reserved bytes is greater than the limit, the chunk is stolen from other arena. | ||
| // Otherwise, it is allocated from the system first, which can reserve enough memory as soon as possible. | ||
| // After that, allocate from current core arena as much as possible. | ||
| if (_reserved_bytes > _steal_arena_limit) { | ||
| ++core_id; | ||
| for (int i = 1; i < _arenas.size(); ++i, ++core_id) { | ||
| if (_arenas[core_id % _arenas.size()]->pop_free_chunk(size, &chunk->data)) { | ||
| DCHECK_GE(_reserved_bytes, 0); | ||
| _reserved_bytes.fetch_sub(size); | ||
| chunk_pool_other_core_alloc_count->increment(1); | ||
| // reset chunk's core_id to other | ||
| chunk->core_id = core_id % _arenas.size(); | ||
| // transfer the memory ownership of allocate from ChunkAllocator::tracker to the tls tracker. | ||
| THREAD_MEM_TRACKER_TRANSFER_FROM(size, _mem_tracker.get()); | ||
| return Status::OK(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -204,7 +205,7 @@ Status ChunkAllocator::allocate(size_t size, Chunk* chunk) { | |
| void ChunkAllocator::free(const Chunk& chunk) { | ||
| DCHECK(chunk.core_id != -1); | ||
| CHECK((chunk.size & (chunk.size - 1)) == 0); | ||
| if (config::disable_mem_pools) { | ||
| if (config::disable_chunk_allocator) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it better to move the condition into
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not use this. I will remove disable_mem_pools in the future after non-vectorized engine is removed. It is very confused with MemPool.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Try removing |
||
| SystemAllocator::free(chunk.data, chunk.size); | ||
| return; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,8 @@ | |
| #include <cstddef> | ||
| #include <memory> | ||
|
|
||
| #include "common/config.h" | ||
| #include "util/bit_util.h" | ||
| #include "vec/common/allocator.h" | ||
| #include "vec/common/bit_helpers.h" | ||
| #include "vec/common/memcpy_small.h" | ||
|
|
@@ -120,8 +122,9 @@ class PODArrayBase : private boost::noncopyable, | |
| } | ||
| } | ||
|
|
||
| /// Not round up, keep the size just as the application pass in like std::vector | ||
| void alloc_for_num_elements(size_t num_elements) { | ||
| alloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(num_elements))); | ||
| alloc(minimum_memory_for_elements(num_elements)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Allocating in powers of 2 has a positive impact on performance, if you wish to reduce memory usage, join
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think this should be a config, because if it is a config, we do not know when to open the config since it is a macro. Actually, there are two types of memory allocation in PODArray:
For most cases, we should reserve or resize memory size before push back, then we could reduce memory reallocation or memory copy. |
||
| } | ||
|
|
||
| template <typename... TAllocatorParams> | ||
|
|
@@ -181,6 +184,7 @@ class PODArrayBase : private boost::noncopyable, | |
| return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold); | ||
| } | ||
|
|
||
| /// This method is called by push back or emplace back, this is the same behaviour with std::vector | ||
| template <typename... TAllocatorParams> | ||
| void reserve_for_next_size(TAllocatorParams&&... allocator_params) { | ||
| if (size() == 0) { | ||
|
|
@@ -189,8 +193,10 @@ class PODArrayBase : private boost::noncopyable, | |
| realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), | ||
| minimum_memory_for_elements(1)), | ||
| std::forward<TAllocatorParams>(allocator_params)...); | ||
| } else | ||
| } else { | ||
| // There is still a power of 2 expansion here, this method is used in push back method | ||
| realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...); | ||
| } | ||
| } | ||
|
|
||
| #ifndef NDEBUG | ||
|
|
@@ -229,7 +235,7 @@ class PODArrayBase : private boost::noncopyable, | |
| template <typename... TAllocatorParams> | ||
| void reserve(size_t n, TAllocatorParams&&... allocator_params) { | ||
| if (n > capacity()) | ||
| realloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(n)), | ||
| realloc(minimum_memory_for_elements(n), | ||
xinyiZzz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| std::forward<TAllocatorParams>(allocator_params)...); | ||
| } | ||
|
|
||
|
|
@@ -444,9 +450,11 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r | |
| template <typename It1, typename It2, typename... TAllocatorParams> | ||
| void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { | ||
| size_t required_capacity = this->size() + (from_end - from_begin); | ||
| if (required_capacity > this->capacity()) | ||
| if (required_capacity > this->capacity()) { | ||
| // std::vector's insert method will expand if required capactiy is larger than current | ||
| this->reserve(round_up_to_power_of_two_or_zero(required_capacity), | ||
| std::forward<TAllocatorParams>(allocator_params)...); | ||
| } | ||
| } | ||
|
|
||
| /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. | ||
|
|
@@ -623,8 +631,10 @@ class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_r | |
| template <typename It1, typename It2> | ||
| void assign(It1 from_begin, It2 from_end) { | ||
| size_t required_capacity = from_end - from_begin; | ||
| if (required_capacity > this->capacity()) | ||
| this->reserve(round_up_to_power_of_two_or_zero(required_capacity)); | ||
| if (required_capacity > this->capacity()) { | ||
| // std::vector assign just expand the capacity to the required capacity | ||
| this->reserve(required_capacity); | ||
| } | ||
|
|
||
| size_t bytes_to_copy = this->byte_size(required_capacity); | ||
| memcpy(this->c_start, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
disable_chunk_allocator_in_mem_poolThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, I will remove mempool after we removed non-vectorized engine. MemPool is used as MemPool.cpp, it is like a arena. The config disable_mem_pools is also very confused. I will remove them.