diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index c7557ea44361e2..29219fc61dd17b 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -517,7 +517,7 @@ void TaskWorkerPool::_alter_tablet(const TAgentTaskRequest& agent_task_req, int6 string process_name; switch (task_type) { case TTaskType::ALTER: - process_name = "alter"; + process_name = "AlterTablet"; break; default: std::string task_name; diff --git a/be/src/common/config.h b/be/src/common/config.h index 3fdb5557544068..893c5bdcc4015c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -604,10 +604,23 @@ CONF_mInt32(remote_storage_read_buffer_mb, "16"); // Default level of MemTracker to show in web page // now MemTracker support two level: -// RELEASE: 0 -// DEBUG: 1 +// OVERVIEW: 0 +// TASK: 1 +// INSTANCE: 2 +// VERBOSE: 3 // the level equal or lower than mem_tracker_level will show in web page -CONF_Int16(mem_tracker_level, "0"); +CONF_mInt16(mem_tracker_level, "0"); + +// The minimum length when TCMalloc Hook consumes/releases MemTracker, consume size +// smaller than this value will continue to accumulate. specified as number of bytes. +// Decreasing this value will increase the frequency of consume/release. +// Increasing this value will cause MemTracker statistics to be inaccurate. +CONF_mInt32(mem_tracker_consume_min_size_bytes, "2097152"); + +// When MemTracker is a negative value, it is considered that a memory leak has occurred, +// but the actual MemTracker records inaccurately will also cause a negative value, +// so this feature is in the experimental stage. +CONF_mBool(memory_leak_detection, "false"); // The version information of the tablet will be stored in the memory // in an adjacency graph data structure. diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 044feda7cf6158..36e4f84e36c03f 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -95,17 +95,6 @@ void Daemon::memory_maintenance_thread() { if (env != nullptr) { BufferPool* buffer_pool = env->buffer_pool(); if (buffer_pool != nullptr) buffer_pool->Maintenance(); - - // The process limit as measured by our trackers may get out of sync with the - // process usage if memory is allocated or freed without updating a MemTracker. - // The metric is refreshed whenever memory is consumed or released via a MemTracker, - // so on a system with queries executing it will be refreshed frequently. However - // if the system is idle, we need to refresh the tracker occasionally since - // untracked memory may be allocated or freed, e.g. by background threads. - if (env->process_mem_tracker() != nullptr && - !env->process_mem_tracker()->is_consumption_metric_null()) { - env->process_mem_tracker()->RefreshConsumptionFromMetric(); - } } } } diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp index df1b4cea275537..e4a20f2b998ac6 100644 --- a/be/src/exec/analytic_eval_node.cpp +++ b/be/src/exec/analytic_eval_node.cpp @@ -201,7 +201,7 @@ Status AnalyticEvalNode::open(RuntimeState* state) { "Failed to acquire initial read buffer for analytic function " "evaluation. Reducing query concurrency or increasing the memory limit may " "help this query to complete successfully."); - return mem_tracker()->MemLimitExceeded(state, msg, -1); + RETURN_LIMIT_EXCEEDED(mem_tracker(), state, msg); } DCHECK_EQ(_evaluators.size(), _fn_ctxs.size()); diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index ab004b17f0e2c1..5c277e36d1301b 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -42,7 +42,7 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, #if BE_TEST _mem_tracker(new MemTracker()), #else - _mem_tracker(MemTracker::CreateTracker( + _mem_tracker(MemTracker::create_tracker( -1, "BaseScanner:" + std::to_string(state->load_job_id()), state->instance_mem_tracker())), #endif diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index 11928480d85f3d..344ca3f95e2515 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -318,7 +318,7 @@ Status BrokerScanNode::scanner_scan(const TBrokerScanRange& scan_range, // 1. too many batches in queue, or // 2. at least one batch in queue and memory exceed limit. (_batch_queue.size() >= _max_buffered_batches || - (mem_tracker()->AnyLimitExceeded(MemLimit::HARD) && !_batch_queue.empty()))) { + (mem_tracker()->any_limit_exceeded() && !_batch_queue.empty()))) { _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); } // Process already set failed, so we just return OK diff --git a/be/src/exec/broker_scanner.cpp b/be/src/exec/broker_scanner.cpp index 96e9e315c16588..e7b85a013fac6a 100644 --- a/be/src/exec/broker_scanner.cpp +++ b/be/src/exec/broker_scanner.cpp @@ -35,7 +35,6 @@ #include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/stream_load/load_stream_mgr.h" #include "runtime/stream_load/stream_load_pipe.h" diff --git a/be/src/exec/broker_scanner.h b/be/src/exec/broker_scanner.h index d6deb7d64f1cd6..b831539584b75d 100644 --- a/be/src/exec/broker_scanner.h +++ b/be/src/exec/broker_scanner.h @@ -46,7 +46,6 @@ class ExprContext; class TupleDescriptor; class TupleRow; class RowDescriptor; -class MemTracker; class RuntimeProfile; class StreamLoadPipe; diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp index 8ef9b662f8fb65..e605c91f01c58f 100644 --- a/be/src/exec/cross_join_node.cpp +++ b/be/src/exec/cross_join_node.cpp @@ -64,7 +64,7 @@ Status CrossJoinNode::construct_build_side(RuntimeState* state) { RETURN_IF_ERROR(child(1)->get_next(state, batch, &eos)); // to prevent use too many memory - RETURN_IF_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1."); SCOPED_TIMER(_build_timer); _build_batches.add_row_batch(batch); diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index a0ef6336e54382..5a4eba9f1dd734 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -187,8 +187,8 @@ Status DataSink::init(const TDataSink& thrift_sink) { Status DataSink::prepare(RuntimeState* state) { _expr_mem_tracker = - MemTracker::CreateTracker(-1, _name + ":Expr:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker()); + MemTracker::create_tracker(-1, _name + ":Expr:" + std::to_string(state->load_job_id()), + state->instance_mem_tracker()); return Status::OK(); } diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 6e149e09d8d2dd..1f8ecabed608e8 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -352,11 +352,12 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, // obj[FIELD_ID] must not be nullptr std::string _id = obj[FIELD_ID].GetString(); size_t len = _id.length(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(len)); + Status rst; + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(len, &rst)); if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", len, "string slot"); - return tuple_pool->mem_tracker()->MemLimitExceeded(nullptr, details, len); + RETURN_LIMIT_EXCEEDED(tuple_pool->mem_tracker(), nullptr, details, len, rst); } memcpy(buffer, _id.data(), len); reinterpret_cast(slot)->ptr = buffer; @@ -410,11 +411,12 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple, } } size_t val_size = val.length(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + Status rst; + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute( ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); - return tuple_pool->mem_tracker()->MemLimitExceeded(nullptr, details, val_size); + RETURN_LIMIT_EXCEEDED(tuple_pool->mem_tracker(), nullptr, details, val_size, rst); } memcpy(buffer, val.data(), val_size); reinterpret_cast(slot)->ptr = buffer; diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index fe3d67b80daf2d..9a914b90c2361a 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -47,7 +47,7 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple _mem_tracker(new MemTracker()), #else _mem_tracker( - MemTracker::CreateTracker(-1, "EsHttpScanner:" + std::to_string(state->load_job_id()), + MemTracker::create_tracker(-1, "EsHttpScanner:" + std::to_string(state->load_job_id()), state->instance_mem_tracker())), #endif _mem_pool(_mem_tracker.get()), diff --git a/be/src/exec/es_scan_node.cpp b/be/src/exec/es_scan_node.cpp index fad266993beb74..4ba08bf554c626 100644 --- a/be/src/exec/es_scan_node.cpp +++ b/be/src/exec/es_scan_node.cpp @@ -771,11 +771,12 @@ Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple, } const string& val = col.string_vals[val_idx]; size_t val_size = val.size(); - char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size)); + Status rst; + char* buffer = reinterpret_cast(tuple_pool->try_allocate_unaligned(val_size, &rst)); if (UNLIKELY(buffer == nullptr)) { std::string details = strings::Substitute( ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow", val_size, "string slot"); - return tuple_pool->mem_tracker()->MemLimitExceeded(nullptr, details, val_size); + RETURN_LIMIT_EXCEEDED(tuple_pool->mem_tracker(), nullptr, details, val_size, rst); } memcpy(buffer, val.data(), val_size); reinterpret_cast(slot)->ptr = buffer; diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp index d79aceb9c8b5b4..2217d6afdb1fb8 100644 --- a/be/src/exec/except_node.cpp +++ b/be/src/exec/except_node.cpp @@ -63,7 +63,7 @@ Status ExceptNode::open(RuntimeState* state) { while (!eos) { RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos)); - RETURN_IF_LIMIT_EXCEEDED(state, " Except , while probing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, " Except , while probing the hash table."); for (int j = 0; j < _probe_batch->num_rows(); ++j) { _hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j)); if (_hash_tbl_iterator != _hash_tbl->end()) { diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 06ded1db1cba49..8367908bb624c0 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -201,12 +201,11 @@ Status ExecNode::prepare(RuntimeState* state) { std::bind(&RuntimeProfile::units_per_second, _rows_returned_counter, runtime_profile()->total_time_counter()), ""); - _mem_tracker = MemTracker::CreateTracker(_runtime_profile.get(), -1, - "ExecNode:" + _runtime_profile->name(), - state->instance_mem_tracker()); - _expr_mem_tracker = MemTracker::CreateTracker(-1, "ExecNode:Exprs:" + _runtime_profile->name(), - _mem_tracker); - _expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get())); + _mem_tracker = MemTracker::create_tracker(-1, "ExecNode:" + _runtime_profile->name(), + state->instance_mem_tracker(), + MemTrackerLevel::VERBOSE, _runtime_profile.get()); + _expr_mem_tracker = MemTracker::create_tracker(-1, "ExecNode:Exprs:" + _runtime_profile->name(), + _mem_tracker); if (_vconjunct_ctx_ptr) { RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, row_desc(), expr_mem_tracker())); @@ -268,10 +267,6 @@ Status ExecNode::close(RuntimeState* state) { if (_vconjunct_ctx_ptr) (*_vconjunct_ctx_ptr)->close(state); Expr::close(_conjunct_ctxs, state); - if (expr_mem_pool() != nullptr) { - _expr_mem_pool->free_all(); - } - if (_buffer_pool_client.is_registered()) { VLOG_FILE << _id << " returning reservation " << _resource_profile.min_reservation; state->initial_reservations()->Return(&_buffer_pool_client, diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index 7cad50018d848a..1644ba5165db3c 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -196,8 +196,6 @@ class ExecNode { std::shared_ptr expr_mem_tracker() const { return _expr_mem_tracker; } - MemPool* expr_mem_pool() const { return _expr_mem_pool.get(); } - // Extract node id from p->name(). static int get_node_id_from_profile(RuntimeProfile* p); @@ -306,14 +304,9 @@ class ExecNode { /// Account for peak memory used by this node std::shared_ptr _mem_tracker; - - /// MemTracker used by 'expr_mem_pool_'. + // MemTracker used by all Expr. std::shared_ptr _expr_mem_tracker; - /// MemPool for allocating data structures used by expression evaluators in this node. - /// Created in Prepare(). - std::unique_ptr _expr_mem_pool; - RuntimeProfile::Counter* _rows_returned_counter; RuntimeProfile::Counter* _rows_returned_rate; // Account for peak memory used by this node @@ -377,25 +370,6 @@ class ExecNode { bool _is_closed; }; -#define LIMIT_EXCEEDED(tracker, state, msg) \ - do { \ - stringstream str; \ - str << "Memory exceed limit. " << msg << " "; \ - str << "Backend: " << BackendOptions::get_localhost() << ", "; \ - str << "fragment: " << print_id(state->fragment_instance_id()) << " "; \ - str << "Used: " << tracker->consumption() << ", Limit: " << tracker->limit() << ". "; \ - str << "You can change the limit by session variable exec_mem_limit."; \ - return Status::MemoryLimitExceeded(str.str()); \ - } while (false) - -#define RETURN_IF_LIMIT_EXCEEDED(state, msg) \ - do { \ - /* if (UNLIKELY(MemTracker::limit_exceeded(*(state)->mem_trackers()))) { */ \ - MemTracker* tracker = state->instance_mem_tracker()->find_limit_exceeded_tracker(); \ - if (tracker != nullptr) { \ - LIMIT_EXCEEDED(tracker, state, msg); \ - } \ - } while (false) } // namespace doris #endif diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 291edbc4c34928..491c719e533cc1 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -302,7 +302,7 @@ Status HashJoinNode::get_next(RuntimeState* state, RowBatch* out_batch, bool* eo // In most cases, no additional memory overhead will be applied for at this stage, // but if the expression calculation in this node needs to apply for additional memory, // it may cause the memory to exceed the limit. - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while execute get_next."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while execute get_next."); SCOPED_TIMER(_runtime_profile->total_time_counter()); if (reached_limit()) { @@ -770,11 +770,11 @@ Status HashJoinNode::process_build_batch(RuntimeState* state, RowBatch* build_ba _build_pool.get(), false); } } - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); } else { // take ownership of tuple data of build_batch _build_pool->acquire_data(build_batch->tuple_data_pool(), false); - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); RETURN_IF_ERROR(_hash_tbl->resize_buckets_ahead(build_batch->num_rows())); for (int i = 0; i < build_batch->num_rows(); ++i) { _hash_tbl->insert_without_check(build_batch->get_row(i)); diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index 36c3d7b76eaa9c..50f9c8c87784db 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -53,7 +53,7 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, _buckets.resize(num_buckets); _num_buckets = num_buckets; _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; - _mem_tracker->Consume(_buckets.capacity() * sizeof(Bucket)); + _mem_tracker->consume(_buckets.capacity() * sizeof(Bucket)); // Compute the layout and buffer size to store the evaluated expr results _results_buffer_size = Expr::compute_results_layout( @@ -70,7 +70,7 @@ HashTable::HashTable(const std::vector& build_expr_ctxs, _alloc_list.push_back(_current_nodes); _end_list.push_back(_current_nodes + _current_capacity * _node_byte_size); - _mem_tracker->Consume(_current_capacity * _node_byte_size); + _mem_tracker->consume(_current_capacity * _node_byte_size); if (_mem_tracker->limit_exceeded()) { mem_limit_exceeded(_current_capacity * _node_byte_size); } @@ -85,8 +85,8 @@ void HashTable::close() { for (auto ptr : _alloc_list) { free(ptr); } - _mem_tracker->Release(_total_capacity * _node_byte_size); - _mem_tracker->Release(_buckets.size() * sizeof(Bucket)); + _mem_tracker->release(_total_capacity * _node_byte_size); + _mem_tracker->release(_buckets.size() * sizeof(Bucket)); } bool HashTable::eval_row(TupleRow* row, const std::vector& ctxs) { @@ -180,7 +180,7 @@ Status HashTable::resize_buckets(int64_t num_buckets) { int64_t old_num_buckets = _num_buckets; int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - Status st = _mem_tracker->TryConsume(delta_bytes); + Status st = _mem_tracker->try_consume(delta_bytes); if (!st) { LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st.to_string(); mem_limit_exceeded(delta_bytes); @@ -244,7 +244,7 @@ void HashTable::grow_node_array() { _alloc_list.push_back(_current_nodes); _end_list.push_back(_current_nodes + alloc_size); - _mem_tracker->Consume(alloc_size); + _mem_tracker->consume(alloc_size); if (_mem_tracker->limit_exceeded()) { mem_limit_exceeded(alloc_size); } diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp index 2cbf4db1358ed5..8b327aee3ee1b0 100644 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -66,7 +66,7 @@ Status IntersectNode::open(RuntimeState* state) { while (!eos) { RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(i)->get_next(state, _probe_batch.get(), &eos)); - RETURN_IF_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, " Intersect , while probing the hash table."); for (int j = 0; j < _probe_batch->num_rows(); ++j) { _hash_tbl_iterator = _hash_tbl->find(_probe_batch->get_row(j)); if (_hash_tbl_iterator != _hash_tbl->end()) { diff --git a/be/src/exec/json_scanner.cpp b/be/src/exec/json_scanner.cpp index 67a58c42168dd1..1e08eec5992072 100644 --- a/be/src/exec/json_scanner.cpp +++ b/be/src/exec/json_scanner.cpp @@ -30,7 +30,6 @@ #include "exprs/json_functions.h" #include "gutil/strings/split.h" #include "runtime/exec_env.h" -#include "runtime/mem_tracker.h" #include "runtime/runtime_state.h" namespace doris { diff --git a/be/src/exec/json_scanner.h b/be/src/exec/json_scanner.h index 1a489a5ed890d3..91528c8351b927 100644 --- a/be/src/exec/json_scanner.h +++ b/be/src/exec/json_scanner.h @@ -47,7 +47,6 @@ class Tuple; class SlotDescriptor; class RuntimeState; class TupleDescriptor; -class MemTracker; class JsonReader; class LineReader; class FileReader; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 81d0bfbf25be41..8a6f3946e35289 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -178,6 +178,9 @@ Status OlapScanNode::prepare(RuntimeState* state) { _init_counter(state); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); + _scanner_mem_tracker = MemTracker::create_tracker(state->instance_mem_tracker()->limit(), + "Scanners", mem_tracker()); + if (_tuple_desc == nullptr) { // TODO: make sure we print all available diagnostic output to our error log return Status::InternalError("Failed to get tuple descriptor."); @@ -345,8 +348,6 @@ Status OlapScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eo << Tuple::to_string(row->get_tuple(0), *_tuple_desc); } } - __sync_fetch_and_sub(&_buffered_bytes, - row_batch->tuple_data_pool()->total_reserved_bytes()); delete materialized_batch; return Status::OK(); @@ -805,8 +806,9 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) { ++j, ++i) { scanner_ranges.push_back((*ranges)[i].get()); } - OlapScanner* scanner = new OlapScanner(state, this, _olap_scan_node.is_preaggregation, - _need_agg_finalize, *scan_range); + OlapScanner* scanner = + new OlapScanner(state, this, _olap_scan_node.is_preaggregation, + _need_agg_finalize, *scan_range, _scanner_mem_tracker); // add scanner to pool before doing prepare. // so that scanner can be automatically deconstructed if prepare failed. _scanner_pool.add(scanner); @@ -1369,13 +1371,8 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { _nice = 18 + std::max(0, 2 - (int)_olap_scanners.size() / 5); std::list olap_scanners; - int64_t mem_limit = 512 * 1024 * 1024; - // TODO(zc): use memory limit - int64_t mem_consume = __sync_fetch_and_add(&_buffered_bytes, 0); - if (state->fragment_mem_tracker() != nullptr) { - mem_limit = state->fragment_mem_tracker()->limit(); - mem_consume = state->fragment_mem_tracker()->consumption(); - } + int64_t mem_limit = _scanner_mem_tracker->limit(); + int64_t mem_consume = _scanner_mem_tracker->consumption(); int max_thread = _max_materialized_row_batches; if (config::doris_scanner_row_num > state->batch_size()) { max_thread /= config::doris_scanner_row_num / state->batch_size(); @@ -1394,13 +1391,9 @@ void OlapScanNode::transfer_thread(RuntimeState* state) { { std::unique_lock l(_scan_batches_lock); assigned_thread_num = _running_thread; - // int64_t buf_bytes = __sync_fetch_and_add(&_buffered_bytes, 0); // How many thread can apply to this query size_t thread_slot_num = 0; - mem_consume = __sync_fetch_and_add(&_buffered_bytes, 0); - if (state->fragment_mem_tracker() != nullptr) { - mem_consume = state->fragment_mem_tracker()->consumption(); - } + mem_consume = _scanner_mem_tracker->consumption(); if (mem_consume < (mem_limit * 6) / 10) { thread_slot_num = max_thread - assigned_thread_num; } else { @@ -1593,7 +1586,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { break; } RowBatch* row_batch = new RowBatch(this->row_desc(), state->batch_size(), - _runtime_state->fragment_mem_tracker().get()); + _scanner_mem_tracker.get()); row_batch->set_scanner_id(scanner->id()); status = scanner->get_batch(_runtime_state, row_batch, &eos); if (!status.ok()) { @@ -1608,8 +1601,6 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { row_batch = nullptr; } else { row_batchs.push_back(row_batch); - __sync_fetch_and_add(&_buffered_bytes, - row_batch->tuple_data_pool()->total_reserved_bytes()); } raw_rows_read = scanner->raw_rows_read(); } diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 6d8d89b8f68a67..b4c3dde1f28f15 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -244,6 +244,8 @@ class OlapScanNode : public ScanNode { TResourceInfo* _resource_info; int64_t _buffered_bytes; + // Count the memory consumption of Rowset Reader and Tablet Reader in OlapScanner. + std::shared_ptr _scanner_mem_tracker; EvalConjunctsFn _eval_conjuncts_fn; bool _need_agg_finalize = true; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 7c39f25ee2435a..71cd4c3a445caf 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -39,7 +39,8 @@ namespace doris { OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range) + bool need_agg_finalize, const TPaloScanRange& scan_range, + const std::shared_ptr& tracker) : _runtime_state(runtime_state), _parent(parent), _tuple_desc(parent->_tuple_desc), @@ -48,10 +49,8 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _aggregation(aggregation), _need_agg_finalize(need_agg_finalize), _version(-1), - _mem_tracker(MemTracker::CreateTracker( - runtime_state->fragment_mem_tracker()->limit(), "OlapScanner", - runtime_state->fragment_mem_tracker(), true, true, MemTrackerLevel::VERBOSE)) { -} + _mem_tracker(MemTracker::create_tracker(tracker->limit(), + tracker->label() + ":OlapScanner", tracker)) {} Status OlapScanner::prepare( const TPaloScanRange& scan_range, const std::vector& key_ranges, diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index 0c684d9851b378..d8af820a75ba6e 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -47,7 +47,8 @@ class OlapScanNode; class OlapScanner { public: OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range); + bool need_agg_finalize, const TPaloScanRange& scan_range, + const std::shared_ptr& tracker); virtual ~OlapScanner() = default; diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 25031c3016ba38..a20f77573badb5 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -24,7 +24,6 @@ #include "exprs/expr.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" -#include "runtime/mem_tracker.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index a30c2f5a478312..d3b9a1ab289ad8 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -151,8 +151,7 @@ Status PartitionedAggregationNode::init(const TPlanNode& tnode, RuntimeState* st DCHECK_EQ(intermediate_tuple_desc_->slots().size(), output_tuple_desc_->slots().size()); const RowDescriptor& row_desc = child(0)->row_desc(); - RETURN_IF_ERROR(Expr::create(tnode.agg_node.grouping_exprs, row_desc, state, &grouping_exprs_, - mem_tracker())); + RETURN_IF_ERROR(Expr::create(tnode.agg_node.grouping_exprs, row_desc, state, &grouping_exprs_)); // Construct build exprs from intermediate_row_desc_ for (int i = 0; i < grouping_exprs_.size(); ++i) { SlotDescriptor* desc = intermediate_tuple_desc_->slots()[i]; @@ -236,8 +235,8 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(PartitionedHashTableCtx::Create( _pool, state, build_exprs_, grouping_exprs_, true, vector(build_exprs_.size(), true), state->fragment_hash_seed(), - MAX_PARTITION_DEPTH, 1, expr_mem_pool(), expr_results_pool_.get(), - expr_mem_tracker(), build_row_desc, row_desc, &ht_ctx_)); + MAX_PARTITION_DEPTH, 1, nullptr, expr_results_pool_.get(), expr_mem_tracker(), + build_row_desc, row_desc, &ht_ctx_)); } // AddCodegenDisabledMessage(state); return Status::OK(); @@ -402,13 +401,14 @@ Status PartitionedAggregationNode::CopyStringData(const SlotDescriptor& slot_des Tuple* tuple = batch_iter.get()->get_tuple(0); StringValue* sv = reinterpret_cast(tuple->get_slot(slot_desc.tuple_offset())); if (sv == nullptr || sv->len == 0) continue; - char* new_ptr = reinterpret_cast(pool->try_allocate(sv->len)); + Status rst; + char* new_ptr = reinterpret_cast(pool->try_allocate(sv->len, &rst)); if (UNLIKELY(new_ptr == nullptr)) { string details = Substitute( "Cannot perform aggregation at node with id $0." " Failed to allocate $1 output bytes.", _id, sv->len); - return pool->mem_tracker()->MemLimitExceeded(state_, details, sv->len); + RETURN_LIMIT_EXCEEDED(pool->mem_tracker(), state_, details, sv->len, rst); } memcpy(new_ptr, sv->ptr, sv->len); sv->ptr = new_ptr; @@ -847,8 +847,7 @@ Status PartitionedAggregationNode::Partition::Spill(bool more_aggregate_rows) { // TODO(ml): enable spill std::stringstream msg; msg << "New partitioned Aggregation in spill"; - LIMIT_EXCEEDED(parent->state_->query_mem_tracker(), parent->state_, msg.str()); - // RETURN_IF_ERROR(parent->state_->StartSpilling(parent->mem_tracker())); + RETURN_LIMIT_EXCEEDED(parent->state_->query_mem_tracker(), parent->state_, msg.str()); RETURN_IF_ERROR(SerializeStreamForSpilling()); @@ -919,7 +918,8 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( const int fixed_size = intermediate_tuple_desc_->byte_size(); const int varlen_size = GroupingExprsVarlenSize(); const int tuple_data_size = fixed_size + varlen_size; - uint8_t* tuple_data = pool->try_allocate(tuple_data_size); + Status rst; + uint8_t* tuple_data = pool->try_allocate(tuple_data_size, &rst); if (UNLIKELY(tuple_data == nullptr)) { stringstream str; str << "Memory exceed limit. Cannot perform aggregation at node with id $0. Failed " @@ -930,7 +930,7 @@ Tuple* PartitionedAggregationNode::ConstructIntermediateTuple( << ", Limit: " << pool->mem_tracker()->limit() << ". " << "You can change the limit by session variable exec_mem_limit."; string details = Substitute(str.str(), _id, tuple_data_size); - *status = pool->mem_tracker()->MemLimitExceeded(state_, details, tuple_data_size); + *status = pool->mem_tracker()->mem_limit_exceeded(state_, details, tuple_data_size, rst); return nullptr; } memset(tuple_data, 0, fixed_size); diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index b8cbdaab631b3c..d197148b61ffde 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -310,13 +310,12 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - Status st = tracker->TryConsume(mem_usage); - WARN_IF_ERROR(st, "PartitionedHashTableCtx::ExprValuesCache failed"); + Status st = tracker->check_limit(mem_usage); if (UNLIKELY(!st)) { capacity_ = 0; string details = Substitute( - "PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes.", mem_usage); - return tracker->MemLimitExceeded(state, details, mem_usage); + "PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes", mem_usage); + RETURN_LIMIT_EXCEEDED(tracker, state, details, mem_usage, st); } int expr_values_size = expr_values_bytes_per_row_ * capacity_; @@ -349,7 +348,7 @@ void PartitionedHashTableCtx::ExprValuesCache::Close(const std::shared_ptrRelease(mem_usage); + tracker->release(mem_usage); } int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, int expr_values_bytes_per_row, diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp index 5958c2569d6dc2..827e30a03c92cf 100644 --- a/be/src/exec/set_operation_node.cpp +++ b/be/src/exec/set_operation_node.cpp @@ -156,7 +156,7 @@ Status SetOperationNode::open(RuntimeState* state) { RETURN_IF_ERROR(child(0)->get_next(state, &build_batch, &eos)); // take ownership of tuple data of build_batch _build_pool->acquire_data(build_batch.tuple_data_pool(), false); - RETURN_IF_LIMIT_EXCEEDED(state, " SetOperation, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, " SetOperation, while constructing the hash table."); // build hash table and remove duplicate items RETURN_IF_ERROR(_hash_tbl->resize_buckets_ahead(build_batch.num_rows())); for (int i = 0; i < build_batch.num_rows(); ++i) { diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 2a4c43a4c7304d..483ecbdccb0829 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -18,7 +18,6 @@ #include "exec/tablet_info.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "runtime/row_batch.h" #include "runtime/tuple_row.h" #include "util/random.h" @@ -164,8 +163,7 @@ OlapTablePartitionParam::OlapTablePartitionParam(std::shared_ptrtuple_desc()->slots()), - _mem_tracker(MemTracker::CreateTracker(-1, "OlapTablePartitionParam")) { + _mem_tracker(MemTracker::create_tracker(-1, "OlapTablePartitionParam")) { for (auto slot : _slots) { _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()}); } } VOlapTablePartitionParam::~VOlapTablePartitionParam() { - _mem_tracker->Release(_mem_usage); + _mem_tracker->release(_mem_usage); } Status VOlapTablePartitionParam::init() { @@ -539,7 +537,7 @@ Status VOlapTablePartitionParam::init() { } _mem_usage = _partition_block.allocated_bytes(); - _mem_tracker->Consume(_mem_usage); + _mem_tracker->consume(_mem_usage); return Status::OK(); } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index 644c799dbea07f..bdd3cea5a0fc05 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -36,7 +36,6 @@ namespace doris { class MemPool; -class MemTracker; class RowBatch; struct OlapTableIndexSchema { @@ -201,7 +200,6 @@ class OlapTablePartitionParam { std::vector _distributed_slot_descs; ObjectPool _obj_pool; - std::shared_ptr _mem_tracker; std::unique_ptr _mem_pool; std::vector _partitions; std::unique_ptr> diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 0645da9380df73..757abe2071781e 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -246,8 +246,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { // But there is still some unfinished things, we do mem limit here temporarily. // _cancelled may be set by rpc callback, and it's possible that _cancelled might be set in any of the steps below. // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close(). - while (!_cancelled && _parent->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) && - _pending_batches_num > 0) { + while (!_cancelled && _parent->_mem_tracker->any_limit_exceeded() && _pending_batches_num > 0) { SCOPED_ATOMIC_TIMER(&_mem_exceeded_block_ns); SleepFor(MonoDelta::FromMilliseconds(10)); } @@ -295,8 +294,7 @@ Status NodeChannel::add_row(BlockRow& block_row, int64_t tablet_id) { // But there is still some unfinished things, we do mem limit here temporarily. // _cancelled may be set by rpc callback, and it's possible that _cancelled might be set in any of the steps below. // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close(). - while (!_cancelled && _parent->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) && - _pending_batches_num > 0) { + while (!_cancelled && _parent->_mem_tracker->any_limit_exceeded() && _pending_batches_num > 0) { SCOPED_ATOMIC_TIMER(&_mem_exceeded_block_ns); SleepFor(MonoDelta::FromMilliseconds(10)); } @@ -708,9 +706,8 @@ Status OlapTableSink::prepare(RuntimeState* state) { // profile must add to state's object pool _profile = state->obj_pool()->add(new RuntimeProfile("OlapTableSink")); _mem_tracker = - MemTracker::CreateTracker(-1, "OlapTableSink:" + std::to_string(state->load_job_id()), - state->instance_mem_tracker(), true, false); - + MemTracker::create_tracker(-1, "OlapTableSink:" + std::to_string(state->load_job_id()), + state->instance_mem_tracker()); SCOPED_TIMER(_profile->total_time_counter()); // Prepare the exprs to run. diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index cbb4bc9d5d27f0..e4d13e2005ad80 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -27,8 +27,6 @@ #include "gen_cpp/PlanNodes_types.h" #include "util/runtime_profile.h" -// - namespace doris { UnionNode::UnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) diff --git a/be/src/exprs/agg_fn.h b/be/src/exprs/agg_fn.h index aa15a67c89848e..684c937cce8f91 100644 --- a/be/src/exprs/agg_fn.h +++ b/be/src/exprs/agg_fn.h @@ -27,7 +27,6 @@ namespace doris { using doris_udf::FunctionContext; class MemPool; -class MemTracker; class ObjectPool; class RuntimeState; class Tuple; diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 2b0679cf4c3fe2..7d790d536184b3 100644 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -269,7 +269,7 @@ Status AggFnEvaluator::open(RuntimeState* state, FunctionContext* agg_fn_ctx) { void AggFnEvaluator::close(RuntimeState* state) { Expr::close(_input_exprs_ctxs, state); if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->Release(_total_mem_consumption); + _mem_tracker->release(_total_mem_consumption); } } @@ -440,7 +440,7 @@ void AggFnEvaluator::update_mem_limlits(int len) { _accumulated_mem_consumption += len; // per 16M , update mem_tracker one time if (UNLIKELY(_accumulated_mem_consumption > 16777216)) { - _mem_tracker->Consume(_accumulated_mem_consumption); + _mem_tracker->consume(_accumulated_mem_consumption); _total_mem_consumption += _accumulated_mem_consumption; _accumulated_mem_consumption = 0; } diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index fabdb505cccb36..7cf49edbf432e7 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -38,9 +38,10 @@ Status allocate_any_val(RuntimeState* state, MemPool* pool, const TypeDescriptor const std::string& mem_limit_exceeded_msg, AnyVal** result) { const int anyval_size = AnyValUtil::any_val_size(type); const int anyval_alignment = AnyValUtil::any_val_alignment(type); - *result = reinterpret_cast(pool->try_allocate_aligned(anyval_size, anyval_alignment)); + Status rst; + *result = reinterpret_cast(pool->try_allocate_aligned(anyval_size, anyval_alignment, &rst)); if (*result == nullptr) { - return pool->mem_tracker()->MemLimitExceeded(state, mem_limit_exceeded_msg, anyval_size); + RETURN_LIMIT_EXCEEDED(pool->mem_tracker(), state, mem_limit_exceeded_msg, anyval_size, rst); } memset(static_cast(*result), 0, anyval_size); return Status::OK(); diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloomfilter_predicate.h index 2af9e32211e007..a55a1ce6ab95b0 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloomfilter_predicate.h @@ -97,11 +97,9 @@ class BloomFilterFuncBase : public IBloomFilterFuncBase { public: BloomFilterFuncBase(MemTracker* tracker) : _tracker(tracker), _inited(false) {} - virtual ~BloomFilterFuncBase() { - if (_tracker != nullptr) { - _tracker->Release(_bloom_filter_alloced); - } - } + // Do not release _bloom_filter_alloced, this does not affect the final statistic. + // RuntimeFilterMgr._tracker will be destructed first in ~RuntimeState. + virtual ~BloomFilterFuncBase() {} Status init(int64_t expect_num, double fpp) override { size_t filter_size = BloomFilterAdaptor::optimal_bit_num(expect_num, fpp); @@ -115,7 +113,7 @@ class BloomFilterFuncBase : public IBloomFilterFuncBase { _bloom_filter_alloced = bloom_filter_length; _bloom_filter.reset(BloomFilterAdaptor::create()); RETURN_IF_ERROR(_bloom_filter->init(bloom_filter_length)); - _tracker->Consume(_bloom_filter_alloced); + _tracker->consume(_bloom_filter_alloced); _inited = true; return Status::OK(); } @@ -138,7 +136,7 @@ class BloomFilterFuncBase : public IBloomFilterFuncBase { } _bloom_filter_alloced = len; - _tracker->Consume(_bloom_filter_alloced); + _tracker->consume(_bloom_filter_alloced); return _bloom_filter->init(data, len); } diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 742906895946c0..d4c00f75215814 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -822,8 +822,7 @@ void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) { } Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - ObjectPool* pool, Expr** scalar_expr, - const std::shared_ptr& tracker) { + ObjectPool* pool, Expr** scalar_expr) { *scalar_expr = nullptr; Expr* root; RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root)); @@ -846,12 +845,11 @@ Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeSt } Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, std::vector* exprs, - const std::shared_ptr& tracker) { + RuntimeState* state, ObjectPool* pool, std::vector* exprs) { exprs->clear(); for (const TExpr& texpr : texprs) { Expr* expr; - RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr, tracker)); + RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr)); DCHECK(expr != nullptr); exprs->push_back(expr); } @@ -859,14 +857,13 @@ Status Expr::create(const std::vector& texprs, const RowDescriptor& row_d } Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - Expr** scalar_expr, const std::shared_ptr& tracker) { - return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr, tracker); + Expr** scalar_expr) { + return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr); } Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs, - const std::shared_ptr& tracker) { - return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs, tracker); + RuntimeState* state, std::vector* exprs) { + return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs); } Status Expr::create_tree(const TExpr& texpr, ObjectPool* pool, Expr* root) { diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index f8b4aa286fba4f..88619e0fa32f0e 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -178,23 +178,21 @@ class Expr { /// tuple row descriptor of the input tuple row. On failure, 'expr' is set to nullptr and /// the expr tree (if created) will be closed. Error status will be returned too. static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - ObjectPool* pool, Expr** expr, const std::shared_ptr& tracker); + ObjectPool* pool, Expr** expr); /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is /// the tuple row descriptor of the input tuple row. Returns error status on failure. static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - Expr** expr, const std::shared_ptr& tracker); + Expr** expr); /// Convenience functions creating multiple ScalarExpr. static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, std::vector* exprs, - const std::shared_ptr& tracker); + RuntimeState* state, ObjectPool* pool, std::vector* exprs); /// Convenience functions creating multiple ScalarExpr. static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs, - const std::shared_ptr& tracker); + RuntimeState* state, std::vector* exprs); /// Convenience function for preparing multiple expr trees. /// Allocations from 'ctxs' will be counted against 'tracker'. diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 40e93ee66a14fc..46aa9c158514b6 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -49,15 +49,13 @@ ExprContext::~ExprContext() { } } -// TODO(zc): memory tracker Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc, const std::shared_ptr& tracker) { - DCHECK(tracker != nullptr) << std::endl << get_stack_trace(); + DCHECK(!_prepared); + _mem_tracker = tracker; DCHECK(_pool.get() == nullptr); _prepared = true; - // TODO: use param tracker to replace instance_mem_tracker, be careful about tracker's life cycle - // _pool.reset(new MemPool(new MemTracker(-1))); - _pool.reset(new MemPool(state->instance_mem_tracker().get())); + _pool.reset(new MemPool(_mem_tracker.get())); return _root->prepare(state, row_desc, this); } @@ -123,6 +121,7 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx) { (*new_ctx)->_is_clone = true; (*new_ctx)->_prepared = true; (*new_ctx)->_opened = true; + (*new_ctx)->_mem_tracker = _mem_tracker; return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); } @@ -142,6 +141,7 @@ Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx, Expr* root (*new_ctx)->_is_clone = true; (*new_ctx)->_prepared = true; (*new_ctx)->_opened = true; + (*new_ctx)->_mem_tracker = _mem_tracker; return root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); } @@ -371,10 +371,11 @@ Status ExprContext::get_const_value(RuntimeState* state, Expr& expr, AnyVal** co StringVal* sv = reinterpret_cast(*const_val); if (!sv->is_null && sv->len > 0) { // Make sure the memory is owned by this evaluator. - char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len)); + Status rst; + char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len, &rst)); if (ptr_copy == nullptr) { - return _pool->mem_tracker()->MemLimitExceeded( - state, "Could not allocate constant string value", sv->len); + RETURN_LIMIT_EXCEEDED(_pool->mem_tracker(), state, + "Could not allocate constant string value", sv->len, rst); } memcpy(ptr_copy, sv->ptr, sv->len); sv->ptr = reinterpret_cast(ptr_copy); diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index f176240f720f2b..a6593e37bdbeed 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -170,6 +170,9 @@ class ExprContext { /// TODO: revisit this FunctionContext** _fn_contexts_ptr; + // Used to create _pool, if change to raw pointer later, be careful about tracker's life cycle. + std::shared_ptr _mem_tracker; + /// Pool backing fn_contexts_. Counts against the runtime state's UDF mem tracker. std::unique_ptr _pool; diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index 851291708b665d..f5e0ec0e65a098 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -88,19 +88,13 @@ typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); -NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, - const std::shared_ptr& tracker, bool is_clone) - : _total_mem_consumption(0), - _accumulated_mem_consumption(0), +NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, bool is_clone) + : _accumulated_mem_consumption(0), is_clone_(is_clone), agg_fn_(agg_fn), - mem_pool_(mem_pool), - _mem_tracker(tracker) {} + mem_pool_(mem_pool) {} NewAggFnEvaluator::~NewAggFnEvaluator() { - if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->Release(_total_mem_consumption); - } DCHECK(closed_); } @@ -120,7 +114,7 @@ Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, Objec // Create a new AggFn evaluator. NewAggFnEvaluator* agg_fn_eval = - pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, tracker, false)); + pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, false)); agg_fn_eval->agg_fn_ctx_.reset(FunctionContextImpl::create_context( state, mem_pool, agg_fn.GetIntermediateTypeDesc(), agg_fn.GetOutputTypeDesc(), @@ -631,7 +625,7 @@ void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, const SlotDescriptor& ds void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** cloned_eval) const { DCHECK(opened_); - *cloned_eval = pool->add(new NewAggFnEvaluator(agg_fn_, mem_pool, _mem_tracker, true)); + *cloned_eval = pool->add(new NewAggFnEvaluator(agg_fn_, mem_pool, true)); (*cloned_eval)->agg_fn_ctx_.reset(agg_fn_ctx_->impl()->clone(mem_pool)); DCHECK_EQ((*cloned_eval)->input_evals_.size(), 0); (*cloned_eval)->input_evals_ = input_evals_; diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h index 36bdc2f21c4dbc..462c4705a174c5 100644 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ b/be/src/exprs/new_agg_fn_evaluator.h @@ -188,7 +188,6 @@ class NewAggFnEvaluator { static std::string DebugString(const std::vector& evals); private: - uint64_t _total_mem_consumption; uint64_t _accumulated_mem_consumption; // index if has multi count distinct @@ -209,8 +208,6 @@ class NewAggFnEvaluator { /// Owned by the exec node which owns this evaluator. MemPool* mem_pool_ = nullptr; - std::shared_ptr _mem_tracker; // saved c'tor param - /// This contains runtime state such as constant input arguments to the aggregate /// functions and a FreePool from which the intermediate values are allocated. /// Owned by this evaluator. @@ -231,8 +228,7 @@ class NewAggFnEvaluator { doris_udf::AnyVal* staging_merge_input_val_ = nullptr; /// Use Create() instead. - NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, - const std::shared_ptr& tracker, bool is_clone); + NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, bool is_clone); /// Return the intermediate type of the aggregate function. inline const SlotDescriptor& intermediate_slot_desc() const; diff --git a/be/src/gutil/strings/numbers.cc b/be/src/gutil/strings/numbers.cc index 5027dea46b89d4..6cc76d24850ffa 100644 --- a/be/src/gutil/strings/numbers.cc +++ b/be/src/gutil/strings/numbers.cc @@ -1479,6 +1479,41 @@ string ItoaKMGT(int64 i) { return StringPrintf("%s%" PRId64 "%s", sign, val, suffix); } +string AccurateItoaKMGT(int64 i) { + const char *sign = ""; + if (i < 0) { + // We lose some accuracy if the caller passes LONG_LONG_MIN, but + // that's OK as this function is only for human readability + if (i == numeric_limits::min()) i++; + sign = "-"; + i = -i; + } + + string ret = StringPrintf("%s", sign); + int64 val; + if ((val = (i >> 40)) > 1) { + ret += StringPrintf(" %" PRId64 "%s", val, "T"); + i = i - (val << 40); + } + if ((val = (i >> 30)) > 1) { + ret += StringPrintf(" %" PRId64 "%s", val, "G"); + i = i - (val << 30); + } + if ((val = (i >> 20)) > 1) { + ret += StringPrintf(" %" PRId64 "%s", val, "M"); + i = i - (val << 20); + } + if ((val = (i >> 10)) > 1) { + ret += StringPrintf(" %" PRId64 "%s", val, "K"); + i = i - (val << 10); + } else { + ret += StringPrintf(" %" PRId64 "%s", i, "K"); + } + + return ret; +} + + // DEPRECATED(wadetregaskis). // These are non-inline because some BUILD files turn on -Wformat-non-literal. diff --git a/be/src/gutil/strings/numbers.h b/be/src/gutil/strings/numbers.h index 00a10d37a81ee5..01540d29008683 100644 --- a/be/src/gutil/strings/numbers.h +++ b/be/src/gutil/strings/numbers.h @@ -474,8 +474,12 @@ char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size); // e.g. 3000 -> 2K 57185920 -> 45M // // Return value: string +// +// AccurateItoaKMGT() +// Description: preserve accuracy // ---------------------------------------------------------------------- string ItoaKMGT(int64 i); +string AccurateItoaKMGT(int64 i); // ---------------------------------------------------------------------- // ParseDoubleRange() diff --git a/be/src/http/action/compaction_action.cpp b/be/src/http/action/compaction_action.cpp index 64f104210ca526..b9548a320d2d61 100644 --- a/be/src/http/action/compaction_action.cpp +++ b/be/src/http/action/compaction_action.cpp @@ -225,8 +225,7 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet OLAPStatus status = OLAP_SUCCESS; if (compaction_type == PARAM_COMPACTION_BASE) { - std::string tracker_label = "CompactionAction:BaseCompaction:" + std::to_string(syscall(__NR_gettid)); - BaseCompaction base_compaction(tablet, tracker_label, _compaction_mem_tracker); + BaseCompaction base_compaction(tablet, _compaction_mem_tracker); OLAPStatus res = base_compaction.compact(); if (res != OLAP_SUCCESS && res != OLAP_ERR_BE_NO_SUITABLE_VERSION) { DorisMetrics::instance()->base_compaction_request_failed->increment(1); @@ -235,8 +234,7 @@ OLAPStatus CompactionAction::_execute_compaction_callback(TabletSharedPtr tablet } status = res; } else if (compaction_type == PARAM_COMPACTION_CUMULATIVE) { - std::string tracker_label = "CompactionAction:CumulativeCompaction:" + std::to_string(syscall(__NR_gettid)); - CumulativeCompaction cumulative_compaction(tablet, tracker_label, _compaction_mem_tracker); + CumulativeCompaction cumulative_compaction(tablet, _compaction_mem_tracker); OLAPStatus res = cumulative_compaction.compact(); if (res != OLAP_SUCCESS && res != OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSION) { DorisMetrics::instance()->cumulative_compaction_request_failed->increment(1); diff --git a/be/src/http/action/compaction_action.h b/be/src/http/action/compaction_action.h index a989c9b8293f6b..8138279aeeb298 100644 --- a/be/src/http/action/compaction_action.h +++ b/be/src/http/action/compaction_action.h @@ -39,10 +39,11 @@ const std::string PARAM_COMPACTION_CUMULATIVE = "cumulative"; /// See compaction-action.md for details. class CompactionAction : public HttpHandler { public: - CompactionAction(CompactionActionType type) - : _type(type) { - _compaction_mem_tracker = type == RUN_COMPACTION ? - MemTracker::CreateTracker(-1, "ManualCompaction", nullptr, false, false, MemTrackerLevel::TASK) : nullptr; + CompactionAction(CompactionActionType type) : _type(type) { + _compaction_mem_tracker = + type == RUN_COMPACTION ? MemTracker::create_tracker(-1, "ManualCompaction", nullptr, + MemTrackerLevel::TASK) + : nullptr; } virtual ~CompactionAction() {} diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index d8416970dc00fe..2b7803344b6a9d 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -144,12 +144,22 @@ void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstr (*output) << "\n"; std::vector> trackers; - MemTracker::ListTrackers(&trackers); + MemTracker::list_process_trackers(&trackers); for (const shared_ptr& tracker : trackers) { string parent = tracker->parent() == nullptr ? "none" : tracker->parent()->label(); - string limit_str = tracker->limit() == -1 ? "none" : ItoaKMGT(tracker->limit()); - string current_consumption_str = ItoaKMGT(tracker->consumption()); - string peak_consumption_str = ItoaKMGT(tracker->peak_consumption()); + string limit_str; + string current_consumption_str; + string peak_consumption_str; + if (!config::memory_leak_detection) { + limit_str = tracker->limit() == -1 ? "none" : ItoaKMGT(tracker->limit()); + current_consumption_str = ItoaKMGT(tracker->consumption()); + peak_consumption_str = ItoaKMGT(tracker->peak_consumption()); + } else { + limit_str = tracker->limit() == -1 ? "none" : AccurateItoaKMGT(tracker->limit()); + current_consumption_str = AccurateItoaKMGT(tracker->consumption()); + peak_consumption_str = AccurateItoaKMGT(tracker->peak_consumption()); + } + int64_t use_count = tracker.use_count(); (*output) << strings::Substitute( "$0$1$2" // id, parent, limit diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 5173efd7f15965..141beed99f9617 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -488,7 +488,7 @@ struct AggregateFuncTraitsdata = reinterpret_cast(hll); - mem_pool->mem_tracker()->Consume(hll->memory_consumed()); + mem_pool->mem_tracker()->consume(hll->memory_consumed()); agg_pool->add(hll); } @@ -534,7 +534,7 @@ struct AggregateFuncTraitssize = 0; auto bitmap = new BitmapValue(src_slice->data); - mem_pool->mem_tracker()->Consume(sizeof(BitmapValue)); + mem_pool->mem_tracker()->consume(sizeof(BitmapValue)); dst_slice->data = (char*)bitmap; agg_pool->add(bitmap); diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index f1722d328a4302..c34fb63d2da0f5 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -22,9 +22,8 @@ namespace doris { -BaseCompaction::BaseCompaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker) - : Compaction(tablet, label, parent_tracker) {} +BaseCompaction::BaseCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker) + : Compaction(tablet, "BaseCompaction:" + std::to_string(tablet->tablet_id()), parent_tracker) {} BaseCompaction::~BaseCompaction() {} diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h index 54088ea48d7dfe..61a50e62641bb6 100644 --- a/be/src/olap/base_compaction.h +++ b/be/src/olap/base_compaction.h @@ -29,8 +29,7 @@ namespace doris { class BaseCompaction : public Compaction { public: - BaseCompaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker); + BaseCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker); ~BaseCompaction() override; OLAPStatus prepare_compact() override; diff --git a/be/src/olap/collect_iterator.h b/be/src/olap/collect_iterator.h index fb6747790e83b2..228d629fcbbe94 100644 --- a/be/src/olap/collect_iterator.h +++ b/be/src/olap/collect_iterator.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "olap/olap_define.h" #include "olap/row_cursor.h" #include "olap/rowset/rowset_reader.h" diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index edb7559752eb55..f02b60813fe788 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -28,11 +28,14 @@ namespace doris { Compaction::Compaction(TabletSharedPtr tablet, const std::string& label, const std::shared_ptr& parent_tracker) - : _mem_tracker(MemTracker::CreateTracker(-1, label, parent_tracker, true, false, MemTrackerLevel::TASK)), - _readers_tracker(MemTracker::CreateTracker(-1, "CompactionReaderTracker:" + std::to_string(tablet->tablet_id()), _mem_tracker, - true, false)), - _writer_tracker(MemTracker::CreateTracker(-1, "CompationWriterTracker:" + std::to_string(tablet->tablet_id()), _mem_tracker, - true, false)), + : _mem_tracker( + MemTracker::create_tracker(-1, label, parent_tracker, MemTrackerLevel::INSTANCE)), + _readers_tracker(MemTracker::create_tracker( + -1, "CompactionReaderTracker:" + std::to_string(tablet->tablet_id()), + _mem_tracker)), + _writer_tracker(MemTracker::create_tracker( + -1, "CompationWriterTracker:" + std::to_string(tablet->tablet_id()), + _mem_tracker)), _tablet(tablet), _input_rowsets_size(0), _input_row_num(0), @@ -173,9 +176,9 @@ OLAPStatus Compaction::construct_input_rowset_readers() { for (auto& rowset : _input_rowsets) { RowsetReaderSharedPtr rs_reader; RETURN_NOT_OK(rowset->create_reader( - MemTracker::CreateTracker( + MemTracker::create_tracker( -1, "Compaction:RowsetReader:" + rowset->rowset_id().to_string(), - _readers_tracker, true, true), + _readers_tracker), &rs_reader)); _input_rs_readers.push_back(std::move(rs_reader)); } diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index 67efa073635b93..58ff0cb3bf068b 100644 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -23,9 +23,8 @@ namespace doris { -CumulativeCompaction::CumulativeCompaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker) - : Compaction(tablet, label, parent_tracker) {} +CumulativeCompaction::CumulativeCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker) + : Compaction(tablet, "CumulativeCompaction:" + std::to_string(tablet->tablet_id()), parent_tracker) {} CumulativeCompaction::~CumulativeCompaction() {} diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h index c1d742de9f03d7..c5c991a43ee137 100644 --- a/be/src/olap/cumulative_compaction.h +++ b/be/src/olap/cumulative_compaction.h @@ -27,8 +27,7 @@ namespace doris { class CumulativeCompaction : public Compaction { public: - CumulativeCompaction(TabletSharedPtr tablet, const std::string& label, - const std::shared_ptr& parent_tracker); + CumulativeCompaction(TabletSharedPtr tablet, const std::shared_ptr& parent_tracker); ~CumulativeCompaction() override; OLAPStatus prepare_compact() override; diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index c0b17b210522eb..10607108876f61 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -97,8 +97,8 @@ OLAPStatus DeltaWriter::init() { return OLAP_ERR_TABLE_NOT_FOUND; } - _mem_tracker = MemTracker::CreateTracker(-1, "DeltaWriter:" + std::to_string(_tablet->tablet_id()), - _parent_mem_tracker); + _mem_tracker = MemTracker::create_tracker( + -1, "DeltaWriter:" + std::to_string(_tablet->tablet_id()), _parent_mem_tracker); // check tablet version number if (_tablet->version_count() > config::max_tablet_version_num) { LOG(WARNING) << "failed to init delta writer. version count: " << _tablet->version_count() @@ -289,7 +289,10 @@ OLAPStatus DeltaWriter::close_wait(google::protobuf::RepeatedPtrFieldwait()); - DCHECK_EQ(_mem_tracker->consumption(), 0); + // Cannot directly DCHECK_EQ(_mem_tracker->consumption(), 0); + // In allocate/free of mem_pool, the consume_cache of _mem_tracker will be called, + // and _untracked_mem must be flushed first. + MemTracker::memory_leak_check(_mem_tracker.get()); // use rowset meta manager to save meta _cur_rowset = _rowset_writer->build(); @@ -332,7 +335,7 @@ OLAPStatus DeltaWriter::cancel() { // cancel and wait all memtables in flush queue to be finished _flush_token->cancel(); } - DCHECK_EQ(_mem_tracker->consumption(), 0); + MemTracker::memory_leak_check(_mem_tracker.get()); _is_cancelled = true; return OLAP_SUCCESS; } diff --git a/be/src/olap/fs/block_manager.h b/be/src/olap/fs/block_manager.h index 58b15e5b8e8bb0..9a9f9c32a91452 100644 --- a/be/src/olap/fs/block_manager.h +++ b/be/src/olap/fs/block_manager.h @@ -30,7 +30,6 @@ namespace doris { class BlockId; class Env; -class MemTracker; struct Slice; namespace fs { @@ -181,10 +180,6 @@ struct CreateBlockOptions { struct BlockManagerOptions { BlockManagerOptions() = default; - // The memory tracker under which all new memory trackers will be parented. - // If nullptr, new memory trackers will be parented to the root tracker. - std::shared_ptr parent_mem_tracker; - // If false, metrics will not be produced. bool enable_metric = false; diff --git a/be/src/olap/fs/file_block_manager.cpp b/be/src/olap/fs/file_block_manager.cpp index 8e54df99d6de07..72b0c43d374583 100644 --- a/be/src/olap/fs/file_block_manager.cpp +++ b/be/src/olap/fs/file_block_manager.cpp @@ -32,7 +32,6 @@ #include "olap/fs/block_id.h" #include "olap/fs/block_manager_metrics.h" #include "olap/storage_engine.h" -#include "runtime/mem_tracker.h" #include "util/doris_metrics.h" #include "util/file_cache.h" #include "util/metrics.h" @@ -367,9 +366,7 @@ Status FileReadableBlock::readv(uint64_t offset, const Slice* results, size_t re FileBlockManager::FileBlockManager(Env* env, BlockManagerOptions opts) : _env(DCHECK_NOTNULL(env)), - _opts(std::move(opts)), - _mem_tracker(MemTracker::CreateTracker(-1, "FileBlockManager", _opts.parent_mem_tracker, - false, false, MemTrackerLevel::OVERVIEW)) { + _opts(std::move(opts)) { if (_opts.enable_metric) { _metrics.reset(new internal::BlockManagerMetrics()); } diff --git a/be/src/olap/fs/file_block_manager.h b/be/src/olap/fs/file_block_manager.h index cf8e49d9a3f434..faf053e10069c3 100644 --- a/be/src/olap/fs/file_block_manager.h +++ b/be/src/olap/fs/file_block_manager.h @@ -31,7 +31,6 @@ namespace doris { class BlockId; class Env; -class MemTracker; class RandomAccessFile; namespace fs { @@ -113,10 +112,6 @@ class FileBlockManager : public BlockManager { // May be null if instantiated without metrics. std::unique_ptr _metrics; - // Tracks memory consumption of any allocations numerous enough to be - // interesting. - std::shared_ptr _mem_tracker; - // DISALLOW_COPY_AND_ASSIGN(FileBlockManager); // Underlying cache instance. Caches opened files. diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp index 1b8f176637ac96..0d31955aad3844 100644 --- a/be/src/olap/generic_iterators.cpp +++ b/be/src/olap/generic_iterators.cpp @@ -210,7 +210,7 @@ class MergeIterator : public RowwiseIterator { MergeIterator(std::vector iters, std::shared_ptr parent, int sequence_id_idx) : _origin_iters(std::move(iters)), _sequence_id_idx(sequence_id_idx), _merge_heap(MergeContextComparator(_sequence_id_idx)) { // use for count the mem use of Block use in Merge - _mem_tracker = MemTracker::CreateTracker(-1, "MergeIterator", std::move(parent), false); + _mem_tracker = MemTracker::create_tracker(-1, "MergeIterator", std::move(parent)); } ~MergeIterator() override { @@ -325,7 +325,7 @@ class UnionIterator : public RowwiseIterator { // Client should not use iterators any more. UnionIterator(std::vector &v, std::shared_ptr parent) : _origin_iters(v.begin(), v.end()) { - _mem_tracker = MemTracker::CreateTracker(-1, "UnionIterator", parent, false); + _mem_tracker = MemTracker::create_tracker(-1, "UnionIterator", parent); } ~UnionIterator() override { diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index ca73ab1d8ba1fc..5d2151f97ecd9b 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -438,12 +438,10 @@ uint32_t ShardedLRUCache::_shard(uint32_t hash) { return hash >> (32 - kNumShardBits); } -ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, - std::shared_ptr parent) +ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type) : _name(name), _last_id(1), - _mem_tracker(MemTracker::CreateTracker(-1, name, parent, true, false, - MemTrackerLevel::OVERVIEW)) { + _mem_tracker(MemTracker::create_tracker(-1, name, nullptr, MemTrackerLevel::OVERVIEW)) { const size_t per_shard = (total_capacity + (kNumShards - 1)) / kNumShards; for (int s = 0; s < kNumShards; s++) { _shards[s] = new LRUCache(type); @@ -467,7 +465,7 @@ ShardedLRUCache::~ShardedLRUCache() { } _entity->deregister_hook(_name); DorisMetrics::instance()->metric_registry()->deregister_entity(_entity); - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); } Cache::Handle* ShardedLRUCache::insert(const CacheKey& key, void* value, size_t charge, @@ -541,17 +539,15 @@ void ShardedLRUCache::update_cache_metrics() const { hit_ratio->set_value(total_lookup_count == 0 ? 0 : ((double)total_hit_count / total_lookup_count)); - _mem_tracker->Consume(total_usage - _mem_tracker->consumption()); + _mem_tracker->consume(total_usage - _mem_tracker->consumption()); } -Cache* new_lru_cache(const std::string& name, size_t capacity, - std::shared_ptr parent_tracker) { - return new ShardedLRUCache(name, capacity, LRUCacheType::SIZE, parent_tracker); +Cache* new_lru_cache(const std::string& name, size_t capacity) { + return new ShardedLRUCache(name, capacity, LRUCacheType::SIZE); } -Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type, - std::shared_ptr parent_tracker) { - return new ShardedLRUCache(name, capacity, type, parent_tracker); +Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type) { + return new ShardedLRUCache(name, capacity, type); } } // namespace doris diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 0c4bf69795a5d9..e666a45e0b7d38 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -56,11 +56,9 @@ enum LRUCacheType { // Create a new cache with a specified name and a fixed SIZE capacity. // This implementation of Cache uses a least-recently-used eviction policy. -extern Cache* new_lru_cache(const std::string& name, size_t capacity, - std::shared_ptr parent_tracekr = nullptr); +extern Cache* new_lru_cache(const std::string& name, size_t capacity); -extern Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type, - std::shared_ptr parent_tracekr = nullptr); +extern Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type); class CacheKey { public: @@ -362,8 +360,7 @@ static const int kNumShards = 1 << kNumShardBits; class ShardedLRUCache : public Cache { public: - explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, - std::shared_ptr parent); + explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type); // TODO(fdy): 析构时清除所有cache元素 virtual ~ShardedLRUCache(); virtual Handle* insert(const CacheKey& key, void* value, size_t charge, diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp index 835842abb17dc8..69623575e23f96 100644 --- a/be/src/olap/memtable.cpp +++ b/be/src/olap/memtable.cpp @@ -38,7 +38,7 @@ MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet _tablet_schema(tablet_schema), _slot_descs(slot_descs), _keys_type(keys_type), - _mem_tracker(MemTracker::CreateTracker(-1, "MemTable", parent_tracker)), + _mem_tracker(MemTracker::create_tracker(-1, "MemTable", parent_tracker)), _buffer_mem_pool(new MemPool(_mem_tracker.get())), _table_mem_pool(new MemPool(_mem_tracker.get())), _schema_size(_schema->schema_size()), diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h index 4282ba5c06d81d..6849bf45f4e4bb 100644 --- a/be/src/olap/memtable.h +++ b/be/src/olap/memtable.h @@ -46,6 +46,7 @@ class MemTable { int64_t tablet_id() const { return _tablet_id; } size_t memory_usage() const { return _mem_tracker->consumption(); } + std::shared_ptr mem_tracker() { return _mem_tracker; } void insert(const Tuple* tuple); /// Flush OLAPStatus flush(); diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp index ec332f0f6caf12..66dfaaf5974a79 100644 --- a/be/src/olap/merger.cpp +++ b/be/src/olap/merger.cpp @@ -48,9 +48,7 @@ OLAPStatus Merger::merge_rowsets(TabletSharedPtr tablet, ReaderType reader_type, "failed to init row cursor when merging rowsets of tablet " + tablet->full_name()); row_cursor.allocate_memory_for_string_type(tablet->tablet_schema()); - // TODO(yingchun): monitor - std::shared_ptr tracker(new MemTracker(-1)); - std::unique_ptr mem_pool(new MemPool(tracker.get())); + std::unique_ptr mem_pool(new MemPool("Merger:merge_rowsets")); // The following procedure would last for long time, half of one day, etc. int64_t output_rows = 0; diff --git a/be/src/olap/olap_index.cpp b/be/src/olap/olap_index.cpp index 14ae7e4434cb0d..85f0625889c74c 100644 --- a/be/src/olap/olap_index.cpp +++ b/be/src/olap/olap_index.cpp @@ -40,8 +40,7 @@ MemIndex::MemIndex() _index_size(0), _data_size(0), _num_rows(0), - _tracker(new MemTracker(-1)), - _mem_pool(new MemPool(_tracker.get())) {} + _mem_pool(new MemPool("MemIndex")) {} MemIndex::~MemIndex() { _num_entries = 0; diff --git a/be/src/olap/olap_index.h b/be/src/olap/olap_index.h index 1b9c704c41007f..11e22d2b67f89c 100644 --- a/be/src/olap/olap_index.h +++ b/be/src/olap/olap_index.h @@ -291,7 +291,6 @@ class MemIndex { size_t _num_rows; std::vector* _short_key_columns; - std::shared_ptr _tracker; std::unique_ptr _mem_pool; DISALLOW_COPY_AND_ASSIGN(MemIndex); }; diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp index 76dd0542a85f4e..37ef78925be8b4 100644 --- a/be/src/olap/page_cache.cpp +++ b/be/src/olap/page_cache.cpp @@ -29,14 +29,19 @@ void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_ StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage) : _index_cache_percentage(index_cache_percentage), - _mem_tracker(MemTracker::CreateTracker(capacity, "StoragePageCache", nullptr, true, true, MemTrackerLevel::OVERVIEW)) { + _mem_tracker(MemTracker::create_tracker(capacity, "StoragePageCache", nullptr, + MemTrackerLevel::OVERVIEW)) { if (index_cache_percentage == 0) { - _data_page_cache = std::unique_ptr(new_lru_cache("DataPageCache", capacity, _mem_tracker)); + _data_page_cache = + std::unique_ptr(new_lru_cache("DataPageCache", capacity)); } else if (index_cache_percentage == 100) { - _index_page_cache = std::unique_ptr(new_lru_cache("IndexPageCache", capacity, _mem_tracker)); + _index_page_cache = + std::unique_ptr(new_lru_cache("IndexPageCache", capacity)); } else if (index_cache_percentage > 0 && index_cache_percentage < 100) { - _data_page_cache = std::unique_ptr(new_lru_cache("DataPageCache", capacity * (100 - index_cache_percentage) / 100, _mem_tracker)); - _index_page_cache = std::unique_ptr(new_lru_cache("IndexPageCache", capacity * index_cache_percentage / 100, _mem_tracker)); + _data_page_cache = std::unique_ptr(new_lru_cache( + "DataPageCache", capacity * (100 - index_cache_percentage) / 100)); + _index_page_cache = std::unique_ptr(new_lru_cache( + "IndexPageCache", capacity * index_cache_percentage / 100)); } else { CHECK(false) << "invalid index page cache percentage"; } diff --git a/be/src/olap/push_handler.cpp b/be/src/olap/push_handler.cpp index 6ddaa37de03b92..9b85206b370d7f 100644 --- a/be/src/olap/push_handler.cpp +++ b/be/src/olap/push_handler.cpp @@ -905,7 +905,7 @@ OLAPStatus PushBrokerReader::init(const Schema* schema, const TBrokerScanRange& } _runtime_profile = _runtime_state->runtime_profile(); _runtime_profile->set_name("PushBrokerReader"); - _mem_tracker = MemTracker::CreateTracker(-1, "PushBrokerReader", + _mem_tracker = MemTracker::create_tracker(-1, "PushBrokerReader", _runtime_state->instance_mem_tracker()); _mem_pool.reset(new MemPool(_mem_tracker.get())); _counter.reset(new ScannerCounter()); diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index ec0f4fa9db6df5..363d330596d38a 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -107,9 +107,7 @@ TabletReader::~TabletReader() { } OLAPStatus TabletReader::init(const ReaderParams& read_params) { - // TODO(yingchun): monitor - _tracker.reset(new MemTracker(-1, read_params.tablet->full_name())); - _predicate_mem_pool.reset(new MemPool(_tracker.get())); + _predicate_mem_pool.reset(new MemPool(read_params.tablet->full_name())); OLAPStatus res = _init_params(read_params); if (res != OLAP_SUCCESS) { diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 3137e0612dd39e..de45f749c4ff13 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -185,7 +185,6 @@ class TabletReader { TabletSharedPtr tablet() { return _tablet; } - std::shared_ptr _tracker; std::unique_ptr _predicate_mem_pool; std::set _load_bf_columns; std::set _load_bf_all_columns; diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp index d6f522093a60cd..1b041c80d00c61 100644 --- a/be/src/olap/row_block.cpp +++ b/be/src/olap/row_block.cpp @@ -39,7 +39,7 @@ namespace doris { RowBlock::RowBlock(const TabletSchema* schema, const std::shared_ptr& parent_tracker) : _capacity(0), _schema(schema) { - _tracker = MemTracker::CreateTracker(-1, "RowBlock", parent_tracker, true, true, MemTrackerLevel::VERBOSE); + _tracker = MemTracker::create_tracker(-1, "RowBlock", parent_tracker, MemTrackerLevel::VERBOSE); _mem_pool.reset(new MemPool(_tracker.get())); } diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp index 5d43c94ce3b213..4d0a3bba8d2e07 100644 --- a/be/src/olap/row_block2.cpp +++ b/be/src/olap/row_block2.cpp @@ -39,7 +39,7 @@ RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity, std::shared_ptr< : _schema(schema), _capacity(capacity), _column_vector_batches(_schema.num_columns()), - _tracker(MemTracker::CreateTracker(-1, "RowBlockV2", std::move(parent))), + _tracker(MemTracker::create_tracker(-1, "RowBlockV2", std::move(parent))), _pool(new MemPool(_tracker.get())), _selection_vector(nullptr) { for (auto cid : _schema.column_ids()) { diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h index e76bb9465d44f5..7c5b5933307f15 100644 --- a/be/src/olap/rowset/alpha_rowset_reader.h +++ b/be/src/olap/rowset/alpha_rowset_reader.h @@ -61,7 +61,6 @@ class AlphaRowsetReader : public RowsetReader { OLAPStatus init(RowsetReaderContext* read_context) override; // read next block data - // If parent_tracker is not null, the block we get from next_block() will have the parent_tracker. // It's ok, because we only get ref here, the block's owner is this reader. OLAPStatus next_block(RowBlock** block) override; diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index 55a8938dbfb7f5..ad349b795900c6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -37,7 +37,6 @@ class BetaRowsetReader : public RowsetReader { OLAPStatus init(RowsetReaderContext* read_context) override; - // If parent_tracker is not null, the block we get from next_block() will have the parent_tracker. // It's ok, because we only get ref here, the block's owner is this reader. OLAPStatus next_block(RowBlock** block) override; OLAPStatus next_block(vectorized::Block* block) override; diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index 8e952d48a6ab63..463752f63c708b 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -32,7 +32,6 @@ namespace doris { class DataDir; -class MemTracker; class OlapTuple; class RowCursor; class Rowset; diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp index 192caaa8789b44..06ec2521ceda4a 100644 --- a/be/src/olap/rowset/segment_reader.cpp +++ b/be/src/olap/rowset/segment_reader.cpp @@ -58,7 +58,7 @@ SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group _is_using_mmap(false), _is_data_loaded(false), _buffer_size(0), - _tracker(MemTracker::CreateTracker(-1, "SegmentReader:" + file, parent_tracker, false)), + _tracker(MemTracker::create_tracker(-1, "SegmentReader:" + file, parent_tracker)), _mem_pool(new MemPool(_tracker.get())), _shared_buffer(nullptr), _lru_cache(lru_cache), @@ -86,10 +86,6 @@ SegmentReader::~SegmentReader() { _lru_cache = nullptr; _file_handler.close(); - if (_is_data_loaded && _runtime_state != nullptr) { - MemTracker::update_limits(_buffer_size * -1, _runtime_state->mem_trackers()); - } - for (auto& it : _streams) { delete it.second; } @@ -249,13 +245,6 @@ OLAPStatus SegmentReader::seek_to_block(uint32_t first_block, uint32_t last_bloc return res; } - if (_runtime_state != nullptr) { - MemTracker::update_limits(_buffer_size, _runtime_state->mem_trackers()); - if (MemTracker::limit_exceeded(_runtime_state->mem_trackers())) { - return OLAP_ERR_FETCH_MEMORY_EXCEEDED; - } - } - _is_data_loaded = true; } @@ -836,10 +825,6 @@ OLAPStatus SegmentReader::_reset_readers() { for (std::map::iterator it = _streams.begin(); it != _streams.end(); ++it) { - if (_runtime_state != nullptr) { - MemTracker::update_limits(-1 * it->second->get_buffer_size(), - _runtime_state->mem_trackers()); - } delete it->second; } @@ -850,10 +835,6 @@ OLAPStatus SegmentReader::_reset_readers() { if ((*it) == nullptr) { continue; } - if (_runtime_state != nullptr) { - MemTracker::update_limits(-1 * (*it)->get_buffer_size(), - _runtime_state->mem_trackers()); - } delete (*it); } diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index 7fc11da95915fb..a56ff9463efefb 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -37,8 +37,7 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options) _data_page_builder(nullptr), _dict_builder(nullptr), _encoding_type(DICT_ENCODING), - _tracker(new MemTracker()), - _pool(_tracker.get()) { + _pool("BinaryDictPageBuilder") { // initially use DICT_ENCODING // TODO: the data page builder type can be created by Factory according to user config _data_page_builder.reset(new BitshufflePageBuilder(options)); diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h index 54754beab1db9f..f5630ade0261cd 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.h +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h @@ -32,7 +32,6 @@ #include "olap/rowset/segment_v2/options.h" #include "olap/types.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "olap/rowset/segment_v2/bitshuffle_page.h" namespace doris { @@ -91,7 +90,6 @@ class BinaryDictPageBuilder : public PageBuilder { // used to remember the insertion order of dict keys std::vector _dict_items; // TODO(zc): rethink about this mem pool - std::shared_ptr _tracker; MemPool _pool; faststring _buffer; faststring _first_value; diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h index 91f7bdd379bb91..a40ba8e008008d 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h @@ -25,7 +25,6 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/indexed_column_reader.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" namespace doris { @@ -71,8 +70,7 @@ class BitmapIndexIterator { _dict_column_iter(reader->_dict_column_reader.get()), _bitmap_column_iter(reader->_bitmap_column_reader.get()), _current_rowid(0), - _tracker(new MemTracker()), - _pool(new MemPool(_tracker.get())) {} + _pool(new MemPool("BitmapIndexIterator")) {} bool has_null_bitmap() const { return _reader->_has_null; } @@ -109,7 +107,6 @@ class BitmapIndexIterator { IndexedColumnIterator _dict_column_iter; IndexedColumnIterator _bitmap_column_iter; rowid_t _current_rowid; - std::shared_ptr _tracker; std::unique_ptr _pool; }; diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp index 6e6d44b075c6a2..bab828b9a63d1d 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp @@ -26,7 +26,6 @@ #include "olap/rowset/segment_v2/indexed_column_writer.h" #include "olap/types.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "util/faststring.h" #include "util/slice.h" @@ -67,8 +66,7 @@ class BitmapIndexWriterImpl : public BitmapIndexWriter { explicit BitmapIndexWriterImpl(std::shared_ptr typeinfo) : _typeinfo(typeinfo), _reverted_index_size(0), - _tracker(new MemTracker()), - _pool(_tracker.get()) {} + _pool("BitmapIndexWriterImpl") {} ~BitmapIndexWriterImpl() = default; @@ -186,7 +184,6 @@ class BitmapIndexWriterImpl : public BitmapIndexWriter { roaring::Roaring _null_bitmap; // unique value to its row id list MemoryIndexType _mem_index; - std::shared_ptr _tracker; MemPool _pool; }; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index cdf6b9a2a39de0..aaef457cab2b03 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -27,7 +27,6 @@ #include "olap/rowset/segment_v2/indexed_column_reader.h" #include "olap/rowset/segment_v2/row_ranges.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" namespace doris { @@ -69,8 +68,7 @@ class BloomFilterIndexIterator { explicit BloomFilterIndexIterator(BloomFilterIndexReader* reader) : _reader(reader), _bloom_filter_iter(reader->_bloom_filter_reader.get()), - _tracker(new MemTracker()), - _pool(new MemPool(_tracker.get())) {} + _pool(new MemPool("BloomFilterIndexIterator")) {} // Read bloom filter at the given ordinal into `bf`. Status read_bloom_filter(rowid_t ordinal, std::unique_ptr* bf); @@ -80,7 +78,6 @@ class BloomFilterIndexIterator { private: BloomFilterIndexReader* _reader; IndexedColumnIterator _bloom_filter_iter; - std::shared_ptr _tracker; std::unique_ptr _pool; }; diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp index 72485ece21b7fb..45071e7f37c4f8 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp @@ -28,7 +28,6 @@ #include "olap/rowset/segment_v2/indexed_column_writer.h" #include "olap/types.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "util/faststring.h" #include "util/slice.h" @@ -72,8 +71,7 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { std::shared_ptr typeinfo) : _bf_options(bf_options), _typeinfo(typeinfo), - _tracker(new MemTracker(-1, "BloomFilterIndexWriterImpl")), - _pool(_tracker.get()), + _pool("BloomFilterIndexWriterImpl"), _has_null(false), _bf_buffer_size(0) {} @@ -163,7 +161,6 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { private: BloomFilterOptions _bf_options; std::shared_ptr _typeinfo; - std::shared_ptr _tracker; MemPool _pool; bool _has_null; uint64_t _bf_buffer_size; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 20d291859619dd..50716e640d0a65 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -458,7 +458,7 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool FileColumnIterator::FileColumnIterator(ColumnReader* reader) : _reader(reader) {} FileColumnIterator::~FileColumnIterator() { - _opts.mem_tracker->Release(_opts.mem_tracker->consumption()); + _opts.mem_tracker->release(_opts.mem_tracker->consumption()); } Status FileColumnIterator::seek_to_first() { diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 0c20b75fe93d48..dead2d9ee17df9 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -385,8 +385,7 @@ class DefaultValueColumnIterator : public ColumnIterator { _schema_length(schema_length), _is_default_value_null(false), _type_size(0), - _tracker(new MemTracker()), - _pool(new MemPool(_tracker.get())) {} + _pool(new MemPool("DefaultValueColumnIterator")) {} Status init(const ColumnIteratorOptions& opts) override; @@ -422,7 +421,6 @@ class DefaultValueColumnIterator : public ColumnIterator { bool _is_default_value_null; size_t _type_size; void* _mem_value = nullptr; - std::shared_ptr _tracker; std::unique_ptr _pool; // current rowid diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index b98f4883ca4e09..1f328f2b9a9144 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -142,9 +142,6 @@ class ColumnWriter { private: std::unique_ptr _field; bool _is_nullable; - -protected: - std::shared_ptr _mem_tracker; }; class FlushPageCallback { diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp index 450d12ac6eaa95..e2e8cff4af6190 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp @@ -41,8 +41,7 @@ IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& optio : _options(options), _typeinfo(typeinfo), _wblock(wblock), - _mem_tracker(new MemTracker()), - _mem_pool(_mem_tracker.get()), + _mem_pool("IndexedColumnWriter"), _num_values(0), _num_data_pages(0), _value_key_coder(nullptr), diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h index cb219a78e9cf3a..92f4ed91b4947e 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h @@ -27,7 +27,6 @@ #include "olap/rowset/segment_v2/common.h" #include "olap/rowset/segment_v2/page_pointer.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "util/slice.h" namespace doris { @@ -91,7 +90,6 @@ class IndexedColumnWriter { std::shared_ptr _typeinfo; fs::WritableBlock* _wblock; // only used for `_first_value` - std::shared_ptr _mem_tracker; MemPool _mem_pool; ordinal_t _num_values; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 7c313ddc469ae3..2489770e6f4ffc 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -51,15 +51,15 @@ Segment::Segment(const FilePathDesc& path_desc, uint32_t segment_id, const TabletSchema* tablet_schema) : _path_desc(path_desc), _segment_id(segment_id), _tablet_schema(tablet_schema) { #ifndef BE_TEST - _mem_tracker = MemTracker::CreateTracker( - -1, "Segment", StorageEngine::instance()->tablet_mem_tracker(), false); + _mem_tracker = MemTracker::create_tracker( + -1, "Segment", StorageEngine::instance()->tablet_mem_tracker()); #else - _mem_tracker = MemTracker::CreateTracker(-1, "Segment", nullptr, false); + _mem_tracker = MemTracker::create_tracker(-1, "Segment"); #endif } Segment::~Segment() { - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); } Status Segment::_open() { @@ -129,7 +129,7 @@ Status Segment::_parse_footer() { _path_desc.filepath, file_size, 12 + footer_length)); } - _mem_tracker->Consume(footer_length); + _mem_tracker->consume(footer_length); std::string footer_buf; footer_buf.resize(footer_length); @@ -173,7 +173,7 @@ Status Segment::_load_index() { DCHECK_EQ(footer.type(), SHORT_KEY_PAGE); DCHECK(footer.has_short_key_page_footer()); - _mem_tracker->Consume(body.get_size()); + _mem_tracker->consume(body.get_size()); _sk_index_decoder.reset(new ShortKeyIndexDecoder); return _sk_index_decoder->parse(body, footer.short_key_page_footer()); }); @@ -216,8 +216,7 @@ Status Segment::new_column_iterator(uint32_t cid, std::shared_ptr pa tablet_column.has_default_value(), tablet_column.default_value(), tablet_column.is_nullable(), type_info, tablet_column.length())); ColumnIteratorOptions iter_opts; - iter_opts.mem_tracker = - MemTracker::CreateTracker(-1, "DefaultColumnIterator", parent, false); + iter_opts.mem_tracker = MemTracker::create_tracker(-1, "DefaultColumnIterator", parent); RETURN_IF_ERROR(default_value_iter->init(iter_opts)); *iter = default_value_iter.release(); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 6a8febb36b11d6..d428dfd8fda5c2 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -101,7 +101,7 @@ SegmentIterator::SegmentIterator(std::shared_ptr segment, const Schema& _lazy_materialization_read(false), _inited(false) { // use for count the mem use of ColumnIterator - _mem_tracker = MemTracker::CreateTracker(-1, "SegmentIterator", std::move(parent), false); + _mem_tracker = MemTracker::create_tracker(-1, "SegmentIterator", std::move(parent)); } SegmentIterator::~SegmentIterator() { @@ -209,7 +209,7 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra iter_opts.stats = _opts.stats; iter_opts.rblock = _rblock.get(); iter_opts.mem_tracker = - MemTracker::CreateTracker(-1, "ColumnIterator", _mem_tracker, false); + MemTracker::create_tracker(-1, "ColumnIterator", _mem_tracker); RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts)); } } @@ -341,7 +341,7 @@ Status SegmentIterator::_init_return_column_iterators() { iter_opts.use_page_cache = _opts.use_page_cache; iter_opts.rblock = _rblock.get(); iter_opts.mem_tracker = - MemTracker::CreateTracker(-1, "ColumnIterator", _mem_tracker, false); + MemTracker::create_tracker(-1, "ColumnIterator", _mem_tracker); RETURN_IF_ERROR(_column_iterators[cid]->init(iter_opts)); } } diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index adbfef96940a14..93c5e885b03aab 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -38,13 +38,13 @@ const uint32_t k_segment_magic_length = 4; SegmentWriter::SegmentWriter(fs::WritableBlock* wblock, uint32_t segment_id, const TabletSchema* tablet_schema, const SegmentWriterOptions& opts, std::shared_ptr parent) - : _segment_id(segment_id), _tablet_schema(tablet_schema), _opts(opts), _wblock(wblock), _mem_tracker(MemTracker::CreateTracker( - -1, "Segment-" + std::to_string(segment_id), parent, false)) { + : _segment_id(segment_id), _tablet_schema(tablet_schema), _opts(opts), _wblock(wblock), _mem_tracker(MemTracker::create_tracker( + -1, "Segment-" + std::to_string(segment_id), parent)) { CHECK_NOTNULL(_wblock); } SegmentWriter::~SegmentWriter() { - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); }; void SegmentWriter::init_column_meta(ColumnMetaPB* meta, uint32_t* column_id, @@ -129,7 +129,7 @@ uint64_t SegmentWriter::estimate_segment_size() { size += _index_builder->size(); // update the mem_tracker of segment size - _mem_tracker->Consume(size - _mem_tracker->consumption()); + _mem_tracker->consume(size - _mem_tracker->consumption()); return size; } diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp index b237887e5af259..439f9030ceb60d 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp +++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp @@ -25,14 +25,13 @@ #include "olap/rowset/segment_v2/indexed_column_writer.h" #include "olap/types.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" namespace doris { namespace segment_v2 { ZoneMapIndexWriter::ZoneMapIndexWriter(Field* field) - : _field(field), _tracker(new MemTracker(-1, "ZoneMapIndexWriter")), _pool(_tracker.get()) { + : _field(field), _pool("ZoneMapIndexWriter") { _page_zone_map.min_value = _field->allocate_zone_map_value(&_pool); _page_zone_map.max_value = _field->allocate_zone_map_value(&_pool); _reset_zone_map(&_page_zone_map); @@ -129,8 +128,7 @@ Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) { RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory)); IndexedColumnIterator iter(&reader); - auto tracker = std::make_shared(-1, "temp in ZoneMapIndexReader"); - MemPool pool(tracker.get()); + MemPool pool("ZoneMapIndexReader ColumnBlock"); _page_zone_maps.resize(reader.num_values()); // read and cache all page zone maps diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index 0c129c5bd94ecd..f8ddfbb3525b03 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -27,7 +27,6 @@ #include "olap/field.h" #include "olap/rowset/segment_v2/binary_plain_page.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "util/slice.h" namespace doris { @@ -109,7 +108,6 @@ class ZoneMapIndexWriter { ZoneMap _segment_zone_map; // TODO(zc): we should replace this memory pool later, we only allocate min/max // for field. But MemPool allocate 4KB least, it will a waste for most cases. - std::shared_ptr _tracker; MemPool _pool; // serialized ZoneMapPB for each data page diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 0227a6cc608c6a..0bea1167ab86a3 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -745,7 +745,7 @@ bool RowBlockSorter::sort(RowBlock** row_block) { RowBlockAllocator::RowBlockAllocator(const TabletSchema& tablet_schema, std::shared_ptr parent, size_t memory_limitation) : _tablet_schema(tablet_schema), - _mem_tracker(MemTracker::CreateTracker(-1, "RowBlockAllocator", parent, false)), + _mem_tracker(MemTracker::create_tracker(-1, "RowBlockAllocator", parent)), _row_len(tablet_schema.row_size()), _memory_limitation(memory_limitation) { VLOG_NOTICE << "RowBlockAllocator(). row_len=" << _row_len; @@ -784,7 +784,7 @@ OLAPStatus RowBlockAllocator::allocate(RowBlock** row_block, size_t num_rows, bo row_block_info.null_supported = null_supported; (*row_block)->init(row_block_info); - _mem_tracker->Consume(row_block_size); + _mem_tracker->consume(row_block_size); VLOG_NOTICE << "RowBlockAllocator::allocate() this=" << this << ", num_rows=" << num_rows << ", m_memory_allocated=" << _mem_tracker->consumption() << ", row_block_addr=" << *row_block; @@ -797,7 +797,7 @@ void RowBlockAllocator::release(RowBlock* row_block) { return; } - _mem_tracker->Release(row_block->capacity() * _row_len); + _mem_tracker->release(row_block->capacity() * _row_len); VLOG_NOTICE << "RowBlockAllocator::release() this=" << this << ", num_rows=" << row_block->capacity() @@ -824,7 +824,7 @@ bool RowBlockMerger::merge(const std::vector& row_block_arr, RowsetWr uint64_t tmp_merged_rows = 0; RowCursor row_cursor; std::shared_ptr tracker( - MemTracker::CreateTracker(-1, "RowBlockMerger", parent, false)); + MemTracker::create_tracker(-1, "RowBlockMerger", parent)); std::unique_ptr mem_pool(new MemPool(tracker.get())); std::unique_ptr agg_object_pool(new ObjectPool()); if (row_cursor.init(_tablet->tablet_schema()) != OLAP_SUCCESS) { @@ -1420,7 +1420,7 @@ bool SchemaChangeWithSorting::_external_sorting(vector& src_row } SchemaChangeHandler::SchemaChangeHandler() - : _mem_tracker(MemTracker::CreateTracker(-1, "SchemaChange", StorageEngine::instance()->schema_change_mem_tracker())) { + : _mem_tracker(MemTracker::create_tracker(-1, "SchemaChangeHandler", StorageEngine::instance()->schema_change_mem_tracker())) { REGISTER_HOOK_METRIC(schema_change_mem_consumption, [this]() { return _mem_tracker->consumption(); }); } @@ -1532,8 +1532,11 @@ OLAPStatus SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletRe reader_context.seek_columns = &return_columns; reader_context.sequence_id_idx = reader_context.tablet_schema->sequence_col_idx(); - auto mem_tracker = MemTracker::CreateTracker(-1, "AlterTablet:" + std::to_string(base_tablet->tablet_id()) + "-" - + std::to_string(new_tablet->tablet_id()), _mem_tracker, true, false, MemTrackerLevel::TASK); + auto mem_tracker = MemTracker::create_tracker( + -1, + "AlterTablet:" + std::to_string(base_tablet->tablet_id()) + "-" + + std::to_string(new_tablet->tablet_id()), + _mem_tracker, MemTrackerLevel::TASK); do { // get history data to be converted and it will check if there is hold in base tablet diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 198b4b41543c34..2c3a95689b69bb 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -30,11 +30,9 @@ void SegmentLoader::create_global_instance(size_t capacity) { _s_instance = &instance; } -SegmentLoader::SegmentLoader(size_t capacity) - : _mem_tracker(MemTracker::CreateTracker(capacity, "SegmentLoader", nullptr, true, true, - MemTrackerLevel::OVERVIEW)) { +SegmentLoader::SegmentLoader(size_t capacity) { _cache = std::unique_ptr( - new_typed_lru_cache("SegmentCache", capacity, LRUCacheType::NUMBER, _mem_tracker)); + new_typed_lru_cache("SegmentLoader:SegmentCache", capacity, LRUCacheType::NUMBER)); } bool SegmentLoader::_lookup(const SegmentLoader::CacheKey& key, SegmentCacheHandle* handle) { diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index 2a75efa544c2e2..30cfce304d5d28 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -25,7 +25,6 @@ #include "olap/lru_cache.h" #include "olap/olap_common.h" // for rowset id #include "olap/rowset/beta_rowset.h" -#include "runtime/mem_tracker.h" #include "util/time.h" namespace doris { @@ -107,7 +106,6 @@ class SegmentLoader { static SegmentLoader* _s_instance; // A LRU cache to cache all opened segments std::unique_ptr _cache = nullptr; - std::shared_ptr _mem_tracker = nullptr; }; // A handle for a single rowset from segment lru cache. diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 209ef86fb44b6c..7db2d24d46c56b 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -112,10 +112,18 @@ StorageEngine::StorageEngine(const EngineOptions& options) _is_all_cluster_id_exist(true), _index_stream_lru_cache(nullptr), _file_cache(nullptr), - _compaction_mem_tracker(MemTracker::CreateTracker(-1, "AutoCompaction", nullptr, false, - false, MemTrackerLevel::OVERVIEW)), - _tablet_mem_tracker(MemTracker::CreateTracker(-1, "TabletHeader", nullptr, false, false, + _compaction_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::AutoCompaction", + nullptr, MemTrackerLevel::OVERVIEW)), + _tablet_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::TabletHeader", nullptr, + MemTrackerLevel::OVERVIEW)), + _schema_change_mem_tracker(MemTracker::create_tracker( + -1, "StorageEngine::SchemaChange", nullptr, MemTrackerLevel::OVERVIEW)), + _clone_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::Clone", nullptr, MemTrackerLevel::OVERVIEW)), + _batch_load_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::BatchLoad", + nullptr, MemTrackerLevel::OVERVIEW)), + _consistency_mem_tracker(MemTracker::create_tracker(-1, "StorageEngine::Consistency", + nullptr, MemTrackerLevel::OVERVIEW)), _stop_background_threads_latch(1), _tablet_manager(new TabletManager(config::tablet_map_shard_size)), _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)), @@ -1075,17 +1083,12 @@ bool StorageEngine::check_rowset_id_in_unused_rowsets(const RowsetId& rowset_id) void StorageEngine::create_cumulative_compaction( TabletSharedPtr best_tablet, std::shared_ptr& cumulative_compaction) { - std::string tracker_label = - "StorageEngine:CumulativeCompaction:" + std::to_string(best_tablet->tablet_id()); - cumulative_compaction.reset( - new CumulativeCompaction(best_tablet, tracker_label, _compaction_mem_tracker)); + cumulative_compaction.reset(new CumulativeCompaction(best_tablet, _compaction_mem_tracker)); } void StorageEngine::create_base_compaction(TabletSharedPtr best_tablet, std::shared_ptr& base_compaction) { - std::string tracker_label = - "StorageEngine:BaseCompaction:" + std::to_string(best_tablet->tablet_id()); - base_compaction.reset(new BaseCompaction(best_tablet, tracker_label, _compaction_mem_tracker)); + base_compaction.reset(new BaseCompaction(best_tablet, _compaction_mem_tracker)); } // Return json: diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 6102242bf2bd0e..0cd1675c867363 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -184,8 +184,12 @@ class StorageEngine { Status get_compaction_status_json(std::string* result); + std::shared_ptr compaction_mem_tracker() { return _compaction_mem_tracker; } std::shared_ptr tablet_mem_tracker() { return _tablet_mem_tracker; } std::shared_ptr schema_change_mem_tracker() { return _schema_change_mem_tracker; } + std::shared_ptr clone_mem_tracker() { return _clone_mem_tracker; } + std::shared_ptr batch_load_mem_tracker() { return _batch_load_mem_tracker; } + std::shared_ptr consistency_mem_tracker() { return _consistency_mem_tracker; } // check cumulative compaction config void check_cumulative_compaction_config(); @@ -322,9 +326,18 @@ class StorageEngine { // map, if we use RowsetId as the key, we need custom hash func std::unordered_map _unused_rowsets; + // Count the memory consumption of all Base and Cumulative tasks. std::shared_ptr _compaction_mem_tracker; + // Count the memory consumption of all Segment read. std::shared_ptr _tablet_mem_tracker; + // Count the memory consumption of all SchemaChange tasks. std::shared_ptr _schema_change_mem_tracker; + // Count the memory consumption of all EngineCloneTask. + std::shared_ptr _clone_mem_tracker; + // Count the memory consumption of all EngineBatchLoadTask. + std::shared_ptr _batch_load_mem_tracker; + // Count the memory consumption of all EngineChecksumTask. + std::shared_ptr _consistency_mem_tracker; CountDownLatch _stop_background_threads_latch; scoped_refptr _unused_rowset_monitor_thread; diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index ccf88c942c3b77..9e495055f22af1 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -73,8 +73,8 @@ static bool _cmp_tablet_by_create_time(const TabletSharedPtr& a, const TabletSha } TabletManager::TabletManager(int32_t tablet_map_lock_shard_size) - : _mem_tracker(MemTracker::CreateTracker(-1, "TabletMeta", nullptr, false, false, - MemTrackerLevel::OVERVIEW)), + : _mem_tracker(MemTracker::create_tracker(-1, "TabletManager", nullptr, + MemTrackerLevel::OVERVIEW)), _tablets_shards_size(tablet_map_lock_shard_size), _tablets_shards_mask(tablet_map_lock_shard_size - 1) { CHECK_GT(_tablets_shards_size, 0); @@ -85,7 +85,7 @@ TabletManager::TabletManager(int32_t tablet_map_lock_shard_size) } TabletManager::~TabletManager() { - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); } @@ -201,7 +201,7 @@ OLAPStatus TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, Schem // TODO: remove multiply 2 of tablet meta mem size // Because table schema will copy in tablet, there will be double mem cost // so here multiply 2 - _mem_tracker->Consume(tablet->tablet_meta()->mem_size() * 2); + _mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); VLOG_NOTICE << "add tablet to map successfully." << " tablet_id=" << tablet_id << ", schema_hash=" << schema_hash; @@ -1336,7 +1336,7 @@ OLAPStatus TabletManager::_drop_tablet_directly_unlocked(TTabletId tablet_id, } dropped_tablet->deregister_tablet_from_dir(); - _mem_tracker->Release(dropped_tablet->tablet_meta()->mem_size() * 2); + _mem_tracker->release(dropped_tablet->tablet_meta()->mem_size() * 2); return OLAP_SUCCESS; } diff --git a/be/src/olap/task/engine_alter_tablet_task.cpp b/be/src/olap/task/engine_alter_tablet_task.cpp index 5c26e935896b2b..f04f9a0fe4e2d1 100644 --- a/be/src/olap/task/engine_alter_tablet_task.cpp +++ b/be/src/olap/task/engine_alter_tablet_task.cpp @@ -18,6 +18,7 @@ #include "olap/task/engine_alter_tablet_task.h" #include "olap/schema_change.h" +#include "runtime/mem_tracker.h" namespace doris { diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp index c55e55538181b5..931f30e2c30d41 100644 --- a/be/src/olap/task/engine_checksum_task.cpp +++ b/be/src/olap/task/engine_checksum_task.cpp @@ -87,8 +87,7 @@ OLAPStatus EngineChecksumTask::_compute_checksum() { } RowCursor row; - std::shared_ptr tracker(new MemTracker(-1)); - std::unique_ptr mem_pool(new MemPool(tracker.get())); + std::unique_ptr mem_pool(new MemPool("EngineChecksumTask:_compute_checksum")); std::unique_ptr agg_object_pool(new ObjectPool()); res = row.init(tablet->tablet_schema(), reader_params.return_columns); if (res != OLAP_SUCCESS) { diff --git a/be/src/olap/tuple_reader.cpp b/be/src/olap/tuple_reader.cpp index 5c15c2b42f9741..93ba2513d9866a 100644 --- a/be/src/olap/tuple_reader.cpp +++ b/be/src/olap/tuple_reader.cpp @@ -30,7 +30,6 @@ #include "olap/schema.h" #include "olap/storage_engine.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "util/date_func.h" using std::nothrow; diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index c35d2e10167f23..c78111923efa65 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -68,6 +68,7 @@ set(RUNTIME_FILES disk_io_mgr_scan_range.cc buffered_block_mgr2.cc mem_tracker.cpp + mem_tracker_task_pool.cpp spill_sorter.cc sorted_run_merger.cc data_stream_recvr.cc diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc index 92edcdcabe5c3e..f6edfc6830fc25 100644 --- a/be/src/runtime/buffered_block_mgr2.cc +++ b/be/src/runtime/buffered_block_mgr2.cc @@ -100,8 +100,7 @@ class BufferedBlockMgr2::Client { DCHECK(buffer != nullptr); if (buffer->len == _mgr->max_block_size()) { ++_num_pinned_buffers; - _tracker->ConsumeLocal(buffer->len, _query_tracker.get()); - // _tracker->Consume(buffer->len); + _tracker->consume(buffer->len, _query_tracker.get()); } } @@ -110,8 +109,7 @@ class BufferedBlockMgr2::Client { if (buffer->len == _mgr->max_block_size()) { DCHECK_GT(_num_pinned_buffers, 0); --_num_pinned_buffers; - _tracker->ReleaseLocal(buffer->len, _query_tracker.get()); - // _tracker->Release(buffer->len); + _tracker->release(buffer->len, _query_tracker.get()); } } @@ -261,7 +259,7 @@ int64_t BufferedBlockMgr2::available_buffers(Client* client) const { int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const { int64_t num_buffers = _free_io_buffers.size() + _unpinned_blocks.size() + _non_local_outstanding_writes; - num_buffers += _mem_tracker->SpareCapacity(MemLimit::HARD) / max_block_size(); + num_buffers += _mem_tracker->spare_capacity() / max_block_size(); num_buffers -= _unfullfilled_reserved_buffers; return num_buffers; } @@ -324,24 +322,22 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { } int buffers_needed = BitUtil::ceil(size, max_block_size()); unique_lock lock(_lock); - Status st = _mem_tracker->TryConsume(size); + Status st = _mem_tracker->try_consume(size); WARN_IF_ERROR(st, "consume failed"); if (size < max_block_size() && st) { // For small allocations (less than a block size), just let the allocation through. - client->_tracker->ConsumeLocal(size, client->_query_tracker.get()); - // client->_tracker->Consume(size); + client->_tracker->consume(size, client->_query_tracker.get()); return true; } if (available_buffers(client) + client->_num_tmp_reserved_buffers < buffers_needed) { return false; } - st = _mem_tracker->TryConsume(size); + st = _mem_tracker->try_consume(size); WARN_IF_ERROR(st, "consume failed"); if (st) { // There was still unallocated memory, don't need to recycle allocated blocks. - client->_tracker->ConsumeLocal(size, client->_query_tracker.get()); - // client->_tracker->Consume(size); + client->_tracker->consume(size, client->_query_tracker.get()); return true; } @@ -386,7 +382,7 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { } client->_num_tmp_reserved_buffers -= additional_tmp_reservations; _unfullfilled_reserved_buffers -= additional_tmp_reservations; - _mem_tracker->Release(buffers_acquired * max_block_size()); + _mem_tracker->release(buffers_acquired * max_block_size()); return false; } @@ -394,21 +390,20 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) { _unfullfilled_reserved_buffers -= buffers_acquired; DCHECK_GE(buffers_acquired * max_block_size(), size); - _mem_tracker->Release(buffers_acquired * max_block_size()); - st = _mem_tracker->TryConsume(size); + _mem_tracker->release(buffers_acquired * max_block_size()); + st = _mem_tracker->try_consume(size); WARN_IF_ERROR(st, "consume failed"); if (!st) { return false; } - client->_tracker->ConsumeLocal(size, client->_query_tracker.get()); - // client->_tracker->Consume(size); + client->_tracker->consume(size, client->_query_tracker.get()); DCHECK(validate()) << endl << debug_internal(); return true; } void BufferedBlockMgr2::release_memory(Client* client, int64_t size) { - _mem_tracker->Release(size); - client->_tracker->ReleaseLocal(size, client->_query_tracker.get()); + _mem_tracker->release(size); + client->_tracker->release(size, client->_query_tracker.get()); } void BufferedBlockMgr2::cancel() { @@ -469,7 +464,7 @@ Status BufferedBlockMgr2::get_new_block(Client* client, Block* unpin_block, Bloc if (len > 0 && len < _max_block_size) { DCHECK(unpin_block == nullptr); - Status st = client->_tracker->TryConsume(len); + Status st = client->_tracker->try_consume(len); WARN_IF_ERROR(st, "get_new_block failed"); if (st) { // TODO: Have a cache of unused blocks of size 'len' (0, _max_block_size) @@ -600,10 +595,9 @@ BufferedBlockMgr2::~BufferedBlockMgr2() { // Free memory resources. for (BufferDescriptor* buffer : _all_io_buffers) { - _mem_tracker->Release(buffer->len); + _mem_tracker->release(buffer->len); delete[] buffer->buffer; } - DCHECK_EQ(_mem_tracker->consumption(), 0); _mem_tracker.reset(); } @@ -954,7 +948,7 @@ void BufferedBlockMgr2::delete_block(Block* block) { if (block->_buffer_desc->len != _max_block_size) { // Just delete the block for now. delete[] block->_buffer_desc->buffer; - block->_client->_tracker->Release(block->_buffer_desc->len); + block->_client->_tracker->release(block->_buffer_desc->len); delete block->_buffer_desc; block->_buffer_desc = nullptr; } else { @@ -1094,7 +1088,7 @@ Status BufferedBlockMgr2::find_buffer_for_block(Block* block, bool* in_mem) { Status BufferedBlockMgr2::find_buffer(unique_lock& lock, BufferDescriptor** buffer_desc) { *buffer_desc = nullptr; - Status st = _mem_tracker->TryConsume(_max_block_size); + Status st = _mem_tracker->try_consume(_max_block_size); WARN_IF_ERROR(st, "try to allocate a new buffer failed"); // First, try to allocate a new buffer. if (_free_io_buffers.size() < _block_write_threshold && st) { @@ -1262,9 +1256,8 @@ string BufferedBlockMgr2::debug_internal() const { << " Num available buffers: " << remaining_unreserved_buffers() << endl << " Total pinned buffers: " << _total_pinned_buffers << endl << " Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl - << " Remaining memory: " << _mem_tracker->SpareCapacity(MemLimit::HARD) - << " (#blocks=" << (_mem_tracker->SpareCapacity(MemLimit::HARD) / _max_block_size) << ")" - << endl + << " Remaining memory: " << _mem_tracker->spare_capacity() + << " (#blocks=" << (_mem_tracker->spare_capacity() / _max_block_size) << ")" << endl << " Block write threshold: " << _block_write_threshold; return ss.str(); } @@ -1295,7 +1288,7 @@ void BufferedBlockMgr2::init(DiskIoMgr* io_mgr, RuntimeProfile* parent_profile, _integrity_check_timer = ADD_TIMER(_profile.get(), "TotalIntegrityCheckTime"); // Create a new mem_tracker and allocate buffers. - _mem_tracker = MemTracker::CreateTracker(mem_limit, "BufferedBlockMgr2", parent_tracker); + _mem_tracker = MemTracker::create_tracker(mem_limit, "BufferedBlockMgr2", parent_tracker); _initialized = true; } diff --git a/be/src/runtime/buffered_tuple_stream3.cc b/be/src/runtime/buffered_tuple_stream3.cc index 9a7d8d65844b13..b30e0f66148931 100644 --- a/be/src/runtime/buffered_tuple_stream3.cc +++ b/be/src/runtime/buffered_tuple_stream3.cc @@ -18,7 +18,6 @@ #include #include "runtime/buffered_tuple_stream3.inline.h" -#include "runtime/bufferpool/reservation_tracker.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/mem_tracker.h" diff --git a/be/src/runtime/bufferpool/buffer_pool.cc b/be/src/runtime/bufferpool/buffer_pool.cc index c0660bd77b135e..3ff0a2e10ef90a 100644 --- a/be/src/runtime/bufferpool/buffer_pool.cc +++ b/be/src/runtime/bufferpool/buffer_pool.cc @@ -378,8 +378,7 @@ BufferPool::Client::Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group buffers_allocated_bytes_(0) { // Set up a child profile with buffer pool info. RuntimeProfile* child_profile = profile->create_child("Buffer pool", true, true); - reservation_.InitChildTracker(child_profile, parent_reservation, mem_tracker.get(), - reservation_limit); + reservation_.InitChildTracker(child_profile, parent_reservation, nullptr, reservation_limit); counters_.alloc_time = ADD_TIMER(child_profile, "AllocTime"); counters_.cumulative_allocations = ADD_COUNTER(child_profile, "CumulativeAllocations", TUnit::UNIT); diff --git a/be/src/runtime/bufferpool/reservation_tracker.cc b/be/src/runtime/bufferpool/reservation_tracker.cc index 4fa41d85e1b751..1e56441a0d8fd4 100644 --- a/be/src/runtime/bufferpool/reservation_tracker.cc +++ b/be/src/runtime/bufferpool/reservation_tracker.cc @@ -60,7 +60,7 @@ void ReservationTracker::InitChildTracker(RuntimeProfile* profile, ReservationTr std::lock_guard l(lock_); DCHECK(!initialized_); parent_ = parent; - mem_tracker_ = mem_tracker; + mem_tracker_ = nullptr; // TODO(zxy) remove ReservationTracker later reservation_limit_ = reservation_limit; reservation_ = 0; @@ -75,8 +75,8 @@ void ReservationTracker::InitChildTracker(RuntimeProfile* profile, ReservationTr DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent().get()); // Make sure we don't have a lower limit than the ancestor, since we don't enforce // limits at lower links. - DCHECK_EQ(mem_tracker_->GetLowestLimit(MemLimit::HARD), - parent_mem_tracker->GetLowestLimit(MemLimit::HARD)); + DCHECK_EQ(mem_tracker_->get_lowest_limit(), + parent_mem_tracker->get_lowest_limit()); } else { // Make sure we didn't leave a gap in the links. E.g. this tracker's grandparent // shouldn't have a MemTracker. @@ -110,7 +110,6 @@ void ReservationTracker::InitCounters(RuntimeProfile* profile, int64_t reservati counters_.reservation_limit = ADD_COUNTER(profile, "ReservationLimit", TUnit::BYTES); COUNTER_SET(counters_.reservation_limit, reservation_limit); } - if (mem_tracker_ != nullptr) mem_tracker_->EnableReservationReporting(counters_); } void ReservationTracker::Close() { @@ -187,14 +186,14 @@ bool ReservationTracker::TryConsumeFromMemTracker(int64_t reservation_increase) if (GetParentMemTracker() == nullptr) { // At the topmost link, which may be a MemTracker with a limit, we need to use // TryConsume() to check the limit. - Status st = mem_tracker_->TryConsume(reservation_increase); + Status st = mem_tracker_->try_consume(reservation_increase); WARN_IF_ERROR(st, "TryConsumeFromMemTracker failed"); return st.ok(); } else { // For lower links, there shouldn't be a limit to enforce, so we just need to // update the consumption of the linked MemTracker since the reservation is // already reflected in its parent. - mem_tracker_->ConsumeLocal(reservation_increase, GetParentMemTracker()); + mem_tracker_->consume(reservation_increase, GetParentMemTracker()); return true; } } @@ -203,9 +202,9 @@ void ReservationTracker::ReleaseToMemTracker(int64_t reservation_decrease) { DCHECK_GE(reservation_decrease, 0); if (mem_tracker_ == nullptr) return; if (GetParentMemTracker() == nullptr) { - mem_tracker_->Release(reservation_decrease); + mem_tracker_->release(reservation_decrease); } else { - mem_tracker_->ReleaseLocal(reservation_decrease, GetParentMemTracker()); + mem_tracker_->release(reservation_decrease, GetParentMemTracker()); } } diff --git a/be/src/runtime/cache/result_cache.h b/be/src/runtime/cache/result_cache.h index 910cc191ee3948..7e4352ac7946d1 100644 --- a/be/src/runtime/cache/result_cache.h +++ b/be/src/runtime/cache/result_cache.h @@ -33,7 +33,6 @@ #include "runtime/cache/cache_utils.h" #include "runtime/cache/result_node.h" #include "runtime/mem_pool.h" -#include "runtime/mem_tracker.h" #include "runtime/row_batch.h" #include "runtime/tuple_row.h" diff --git a/be/src/runtime/data_stream_mgr.h b/be/src/runtime/data_stream_mgr.h index be370603c0e4dc..e627de17276d0f 100644 --- a/be/src/runtime/data_stream_mgr.h +++ b/be/src/runtime/data_stream_mgr.h @@ -30,7 +30,6 @@ #include "gen_cpp/Types_types.h" // for TUniqueId #include "gen_cpp/internal_service.pb.h" #include "runtime/descriptors.h" // for PlanNodeId -#include "runtime/mem_tracker.h" #include "runtime/query_statistics.h" #include "util/runtime_profile.h" diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc index 962395ad2616ed..25366642714fcb 100644 --- a/be/src/runtime/data_stream_recvr.cc +++ b/be/src/runtime/data_stream_recvr.cc @@ -184,6 +184,9 @@ Status DataStreamRecvr::SenderQueue::get_batch(RowBatch** next_batch) { if (!_pending_closures.empty()) { auto closure_pair = _pending_closures.front(); + // TODO(zxy) There may be a problem here, pay attention later + // When the batch queue reaches the upper limit of memory, calling run to let + // brpc send data packets may cause additional memory to be released closure_pair.first->Run(); _pending_closures.pop_front(); @@ -446,7 +449,8 @@ DataStreamRecvr::DataStreamRecvr( _num_buffered_bytes(0), _profile(profile), _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) { - _mem_tracker = MemTracker::CreateTracker(_profile, -1, "DataStreamRecvr", parent_tracker); + _mem_tracker = MemTracker::create_tracker(-1, "DataStreamRecvr", parent_tracker, + MemTrackerLevel::VERBOSE, _profile); // Create one queue per sender if is_merging is true. int num_queues = is_merging ? num_senders : 1; @@ -503,8 +507,6 @@ void DataStreamRecvr::close() { _mgr->deregister_recvr(fragment_instance_id(), dest_node_id()); _mgr = nullptr; _merger.reset(); - // TODO: Maybe shared tracker doesn't need to be reset manually - _mem_tracker.reset(); } DataStreamRecvr::~DataStreamRecvr() { diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp index 681f5fc20b0db1..0fdb68f6d55f3b 100644 --- a/be/src/runtime/data_stream_sender.cpp +++ b/be/src/runtime/data_stream_sender.cpp @@ -388,9 +388,9 @@ Status DataStreamSender::prepare(RuntimeState* state) { << "])"; _profile = _pool->add(new RuntimeProfile(title.str())); SCOPED_TIMER(_profile->total_time_counter()); - _mem_tracker = MemTracker::CreateTracker( - _profile, -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), - state->instance_mem_tracker()); + _mem_tracker = MemTracker::create_tracker( + -1, "DataStreamSender:" + print_id(state->fragment_instance_id()), + state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile); if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) { std::random_device rd; diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc index 2060320ad26225..032296f73a6750 100644 --- a/be/src/runtime/disk_io_mgr.cc +++ b/be/src/runtime/disk_io_mgr.cc @@ -20,6 +20,7 @@ #include #include "runtime/disk_io_mgr_internal.h" +#include "runtime/exec_env.h" using std::string; using std::stringstream; @@ -229,13 +230,12 @@ void DiskIoMgr::BufferDescriptor::set_mem_tracker(std::shared_ptr tr if (_mem_tracker.get() == tracker.get()) { return; } - // TODO(yingchun): use TransferTo? if (_mem_tracker != nullptr) { - _mem_tracker->Release(_buffer_len); + _mem_tracker->release(_buffer_len); } _mem_tracker = std::move(tracker); - if (_mem_tracker != nullptr) { - _mem_tracker->Consume(_buffer_len); + if (tracker != nullptr) { + _mem_tracker->consume(_buffer_len); } } @@ -275,6 +275,8 @@ DiskIoMgr::DiskIoMgr() // std::min((uint64_t)config::max_cached_file_handles, FileSystemUtil::max_num_file_handles()), // &HdfsCachedFileHandle::release) { { + _mem_tracker = + MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size); _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1); int num_local_disks = (config::num_disks == 0 ? DiskInfo::num_disks() : config::num_disks); @@ -295,6 +297,8 @@ DiskIoMgr::DiskIoMgr(int num_local_disks, int threads_per_disk, int min_buffer_s // _file_handle_cache(::min(config::max_cached_file_handles, // FileSystemUtil::max_num_file_handles()), &HdfsCachedFileHandle::release) { { + _mem_tracker = + MemTracker::create_tracker(-1, "DiskIO", nullptr, MemTrackerLevel::OVERVIEW); int64_t max_buffer_size_scaled = bit_ceil(_max_buffer_size, _min_buffer_size); _free_buffers.resize(bit_log2(max_buffer_size_scaled) + 1); if (num_local_disks == 0) { @@ -359,14 +363,14 @@ DiskIoMgr::~DiskIoMgr() { */ } -Status DiskIoMgr::init(const std::shared_ptr& process_mem_tracker) { - DCHECK(process_mem_tracker != nullptr); - _process_mem_tracker = process_mem_tracker; +Status DiskIoMgr::init(const int64_t mem_limit) { + _mem_tracker->set_limit(mem_limit); // If we hit the process limit, see if we can reclaim some memory by removing // previously allocated (but unused) io buffers. - /* - * process_mem_tracker->AddGcFunction(bind(&DiskIoMgr::gc_io_buffers, this)); - */ + // TODO(zxy) After clearing the free buffer, how much impact will it have on subsequent + // queries may need to be verified. + MemTracker::get_process_tracker()->add_gc_function( + std::bind(&DiskIoMgr::gc_io_buffers, this, std::placeholders::_1)); for (int i = 0; i < _disk_queues.size(); ++i) { _disk_queues[i] = new DiskQueue(i); @@ -713,9 +717,9 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) { char* buffer = nullptr; if (_free_buffers[idx].empty()) { ++_num_allocated_buffers; - // Update the process mem usage. This is checked the next time we start + // Update the disk io mem usage. This is checked the next time we start // a read for the next reader (DiskIoMgr::GetNextScanRange) - _process_mem_tracker->Consume(*buffer_size); + _mem_tracker->consume(*buffer_size); buffer = new char[*buffer_size]; } else { buffer = _free_buffers[idx].front(); @@ -725,20 +729,23 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) { return buffer; } -void DiskIoMgr::gc_io_buffers() { +void DiskIoMgr::gc_io_buffers(int64_t bytes_to_free) { unique_lock lock(_free_buffers_lock); - int buffers_freed = 0; + int bytes_freed = 0; for (int idx = 0; idx < _free_buffers.size(); ++idx) { for (list::iterator iter = _free_buffers[idx].begin(); iter != _free_buffers[idx].end(); ++iter) { int64_t buffer_size = (1 << idx) * _min_buffer_size; - _process_mem_tracker->Release(buffer_size); + _mem_tracker->release(buffer_size); --_num_allocated_buffers; delete[] * iter; - ++buffers_freed; + bytes_freed += buffer_size; } _free_buffers[idx].clear(); + if (bytes_freed >= bytes_to_free) { + break; + } } } @@ -758,7 +765,7 @@ void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size) { if (!config::disable_mem_pools && _free_buffers[idx].size() < config::max_free_io_buffers) { _free_buffers[idx].push_back(buffer); } else { - _process_mem_tracker->Release(buffer_size); + _mem_tracker->release(buffer_size); --_num_allocated_buffers; delete[] buffer; } @@ -815,15 +822,9 @@ bool DiskIoMgr::get_next_request_range(DiskQueue* disk_queue, RequestRange** ran // We just picked a reader, check the mem limits. // TODO: we can do a lot better here. The reader can likely make progress // with fewer io buffers. - bool process_limit_exceeded = _process_mem_tracker->limit_exceeded(); - bool reader_limit_exceeded = - (*request_context)->_mem_tracker != nullptr - ? (*request_context)->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) - : false; - // bool reader_limit_exceeded = (*request_context)->_mem_tracker != nullptr - // ? (*request_context)->_mem_tracker->limit_exceeded() : false; - - if (process_limit_exceeded || reader_limit_exceeded) { + if ((*request_context)->_mem_tracker != nullptr + ? (*request_context)->_mem_tracker->any_limit_exceeded() + : false) { (*request_context)->cancel(Status::MemoryLimitExceeded("Memory limit exceeded")); } @@ -1017,11 +1018,11 @@ void DiskIoMgr::read_range(DiskQueue* disk_queue, RequestContext* reader, ScanRa int64_t buffer_size = std::min(bytes_remaining, static_cast(_max_buffer_size)); bool enough_memory = true; if (reader->_mem_tracker != nullptr) { - enough_memory = reader->_mem_tracker->SpareCapacity(MemLimit::HARD) > LOW_MEMORY; + enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY; if (!enough_memory) { // Low memory, GC and try again. gc_io_buffers(); - enough_memory = reader->_mem_tracker->SpareCapacity(MemLimit::HARD) > LOW_MEMORY; + enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY; } } diff --git a/be/src/runtime/disk_io_mgr.h b/be/src/runtime/disk_io_mgr.h index af988fb73ee067..0f7346b8a4c532 100644 --- a/be/src/runtime/disk_io_mgr.h +++ b/be/src/runtime/disk_io_mgr.h @@ -542,7 +542,7 @@ class DiskIoMgr { ~DiskIoMgr(); // Initialize the IoMgr. Must be called once before any of the other APIs. - Status init(const std::shared_ptr& process_mem_tracker); + Status init(const int64_t mem_limit); // Allocates tracking structure for a request context. // Register a new request context which is returned in *request_context. @@ -691,8 +691,7 @@ class DiskIoMgr { // Pool to allocate BufferDescriptors. ObjectPool _pool; - // Process memory tracker; needed to account for io buffers. - std::shared_ptr _process_mem_tracker; + std::shared_ptr _mem_tracker; // Number of worker(read) threads per disk. Also the max depth of queued // work to the disk. @@ -787,10 +786,9 @@ class DiskIoMgr { char* get_free_buffer(int64_t* buffer_size); // Garbage collect all unused io buffers. This is currently only triggered when the - // process wide limit is hit. This is not good enough. While it is sufficient for - // the IoMgr, other components do not trigger this GC. + // process wide limit is hit. // TODO: make this run periodically? - void gc_io_buffers(); + void gc_io_buffers(int64_t bytes_to_free = INT_MAX); // Returns a buffer to the free list. buffer_size / _min_buffer_size should be a power // of 2, and buffer_size should be <= _max_buffer_size. These constraints will be met diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index ca04bf32e2e525..36c0528ca11a49 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -20,6 +20,8 @@ #include "common/status.h" #include "olap/options.h" +#include "runtime/mem_tracker.h" +#include "runtime/mem_tracker_task_pool.h" #include "util/threadpool.h" namespace doris { @@ -45,7 +47,7 @@ class LoadPathMgr; class LoadStreamMgr; class MemTracker; class StorageEngine; -class PoolMemTrackerRegistry; +class MemTrackerTaskPool; class PriorityThreadPool; class PriorityWorkStealingThreadPool; class ReservationTracker; @@ -97,6 +99,7 @@ class ExecEnv { // declarations for classes in scoped_ptrs. ~ExecEnv(); + const bool initialized() { return _is_init; } const std::string& token() const; ExternalScanContextMgr* external_scan_context_mgr() { return _external_scan_context_mgr; } DataStreamMgr* stream_mgr() { return _stream_mgr; } @@ -116,8 +119,11 @@ class ExecEnv { return nullptr; } - std::shared_ptr process_mem_tracker() { return _mem_tracker; } - PoolMemTrackerRegistry* pool_mem_trackers() { return _pool_mem_trackers; } + std::shared_ptr query_pool_mem_tracker() { return _query_pool_mem_tracker; } + std::shared_ptr load_pool_mem_tracker() { return _load_pool_mem_tracker; } + MemTrackerTaskPool* task_pool_mem_tracker_registry() { + return _task_pool_mem_tracker_registry.get(); + } ThreadResourceMgr* thread_mgr() { return _thread_mgr; } PriorityThreadPool* scan_thread_pool() { return _scan_thread_pool; } ThreadPool* limited_scan_thread_pool() { return _limited_scan_thread_pool.get(); } @@ -155,9 +161,6 @@ class ExecEnv { RoutineLoadTaskExecutor* routine_load_task_executor() { return _routine_load_task_executor; } HeartbeatFlags* heartbeat_flags() { return _heartbeat_flags; } - // The root tracker should be set before calling ExecEnv::init(); - void set_root_mem_tracker(std::shared_ptr root_tracker); - private: Status _init(const std::vector& store_paths); void _destroy(); @@ -184,10 +187,14 @@ class ExecEnv { ClientCache* _frontend_client_cache = nullptr; ClientCache* _broker_client_cache = nullptr; ClientCache* _extdatasource_client_cache = nullptr; - std::shared_ptr _mem_tracker; - PoolMemTrackerRegistry* _pool_mem_trackers = nullptr; ThreadResourceMgr* _thread_mgr = nullptr; + // The ancestor for all querys tracker. + std::shared_ptr _query_pool_mem_tracker = nullptr; + // The ancestor for all load tracker. + std::shared_ptr _load_pool_mem_tracker = nullptr; + std::unique_ptr _task_pool_mem_tracker_registry; + // The following two thread pools are used in different scenarios. // _scan_thread_pool is a priority thread pool. // Scanner threads for common queries will use this thread pool, diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index c2e500b1931d56..0d483eaa4cb31e 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -44,6 +44,7 @@ #include "runtime/load_channel_mgr.h" #include "runtime/load_path_mgr.h" #include "runtime/mem_tracker.h" +#include "runtime/mem_tracker_task_pool.h" #include "runtime/result_buffer_mgr.h" #include "runtime/result_queue_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" @@ -73,6 +74,8 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_thread_num, MetricUnit DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(send_batch_thread_pool_queue_size, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(query_mem_consumption, MetricUnit::BYTES, "", mem_consumption, Labels({{"type", "query"}})); +DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "", mem_consumption, + Labels({{"type", "load"}})); Status ExecEnv::init(ExecEnv* env, const std::vector& store_paths) { return env->_init(store_paths); @@ -99,7 +102,7 @@ Status ExecEnv::_init(const std::vector& store_paths) { _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host); _extdatasource_client_cache = new ExtDataSourceServiceClientCache(config::max_client_cache_size_per_host); - _pool_mem_trackers = new PoolMemTrackerRegistry(); + _task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool()); _thread_mgr = new ThreadResourceMgr(); if (config::doris_enable_scanner_thread_pool_per_disk && config::doris_scanner_thread_pool_thread_num >= store_paths.size() && @@ -163,7 +166,7 @@ Status ExecEnv::_init(const std::vector& store_paths) { _small_file_mgr->init(); _init_mem_tracker(); - RETURN_IF_ERROR(_load_channel_mgr->init(_mem_tracker->limit())); + RETURN_IF_ERROR(_load_channel_mgr->init(MemTracker::get_process_tracker()->limit())); _heartbeat_flags = new HeartbeatFlags(); _register_metrics(); _is_init = true; @@ -190,10 +193,16 @@ Status ExecEnv::_init_mem_tracker() { << ". Using physical memory instead"; global_memory_limit_bytes = MemInfo::physical_mem(); } - _mem_tracker = MemTracker::CreateTracker(global_memory_limit_bytes, "Process", - MemTracker::GetRootTracker(), false, false, - MemTrackerLevel::OVERVIEW); - REGISTER_HOOK_METRIC(query_mem_consumption, [this]() { return _mem_tracker->consumption(); }); + MemTracker::get_process_tracker()->set_limit(global_memory_limit_bytes); + _query_pool_mem_tracker = + MemTracker::create_tracker(global_memory_limit_bytes, "QueryPool", MemTracker::get_process_tracker(), + MemTrackerLevel::OVERVIEW); + REGISTER_HOOK_METRIC(query_mem_consumption, + [this]() { return _query_pool_mem_tracker->consumption(); }); + _load_pool_mem_tracker = MemTracker::create_tracker( + global_memory_limit_bytes, "LoadPool", MemTracker::get_process_tracker(), MemTrackerLevel::OVERVIEW); + REGISTER_HOOK_METRIC(load_mem_consumption, + [this]() { return _load_pool_mem_tracker->consumption(); }); LOG(INFO) << "Using global memory limit: " << PrettyPrinter::print(global_memory_limit_bytes, TUnit::BYTES) << ", origin config value: " << config::mem_limit; @@ -258,7 +267,7 @@ Status ExecEnv::_init_mem_tracker() { SegmentLoader::create_global_instance(config::segment_cache_capacity); // 4. init other managers - RETURN_IF_ERROR(_disk_io_mgr->init(_mem_tracker)); + RETURN_IF_ERROR(_disk_io_mgr->init(global_memory_limit_bytes)); RETURN_IF_ERROR(_tmp_file_mgr->init()); // TODO(zc): The current memory usage configuration is a bit confusing, @@ -317,7 +326,6 @@ void ExecEnv::_destroy() { SAFE_DELETE(_etl_thread_pool); SAFE_DELETE(_scan_thread_pool); SAFE_DELETE(_thread_mgr); - SAFE_DELETE(_pool_mem_trackers); SAFE_DELETE(_broker_client_cache); SAFE_DELETE(_extdatasource_client_cache); SAFE_DELETE(_frontend_client_cache); @@ -331,6 +339,7 @@ void ExecEnv::_destroy() { SAFE_DELETE(_heartbeat_flags); DEREGISTER_HOOK_METRIC(query_mem_consumption); + DEREGISTER_HOOK_METRIC(load_mem_consumption); _is_init = false; } diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp index 9cc9f4c293792f..37cb719117e60c 100644 --- a/be/src/runtime/export_sink.cpp +++ b/be/src/runtime/export_sink.cpp @@ -28,7 +28,6 @@ #include "exprs/expr.h" #include "exprs/expr_context.h" #include "gutil/strings/numbers.h" -#include "runtime/mem_tracker.h" #include "runtime/mysql_table_sink.h" #include "runtime/row_batch.h" #include "runtime/runtime_state.h" @@ -72,10 +71,8 @@ Status ExportSink::prepare(RuntimeState* state) { _profile = state->obj_pool()->add(new RuntimeProfile(title.str())); SCOPED_TIMER(_profile->total_time_counter()); - _mem_tracker = MemTracker::CreateTracker(-1, "ExportSink", state->instance_mem_tracker()); - // Prepare the exprs to run. - RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker)); // TODO(lingbin): add some Counter _bytes_written_counter = ADD_COUNTER(profile(), "BytesExported", TUnit::BYTES); diff --git a/be/src/runtime/export_sink.h b/be/src/runtime/export_sink.h index 8dda722bc2175d..4c2d933ed10eab 100644 --- a/be/src/runtime/export_sink.h +++ b/be/src/runtime/export_sink.h @@ -31,7 +31,6 @@ class TExpr; class RuntimeState; class RuntimeProfile; class ExprContext; -class MemTracker; class FileWriter; class TupleRow; @@ -75,8 +74,6 @@ class ExportSink : public DataSink { RuntimeProfile* _profile; - std::shared_ptr _mem_tracker; - RuntimeProfile::Counter* _bytes_written_counter; RuntimeProfile::Counter* _rows_written_counter; RuntimeProfile::Counter* _write_timer; diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index f093c04235ee93..6a3e69a02c7e0c 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -50,7 +50,6 @@ Status FoldConstantExecutor::fold_constant_expr( // init Status status = _init(query_globals); if (UNLIKELY(!status.ok())) { - LOG(WARNING) << "Failed to init mem trackers, msg: " << status.get_error_msg(); return status; } @@ -64,7 +63,6 @@ Status FoldConstantExecutor::fold_constant_expr( // prepare and open context status = _prepare_and_open(ctx); if (UNLIKELY(!status.ok())) { - LOG(WARNING) << "Failed to init mem trackers, msg: " << status.get_error_msg(); return status; } @@ -188,7 +186,7 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) { _runtime_profile = _runtime_state->runtime_profile(); _runtime_profile->set_name("FoldConstantExpr"); - _mem_tracker = MemTracker::CreateTracker(-1, "FoldConstantExpr", _runtime_state->instance_mem_tracker()); + _mem_tracker = MemTracker::create_tracker(-1, "FoldConstantExpr", _runtime_state->instance_mem_tracker()); _mem_pool.reset(new MemPool(_mem_tracker.get())); return Status::OK(); diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 88fe4c7963eb80..cacd38be17c3a5 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -301,12 +301,12 @@ void FragmentExecState::coordinator_callback(const Status& status, RuntimeProfil RuntimeState* runtime_state = _executor.runtime_state(); DCHECK(runtime_state != nullptr); - if (runtime_state->query_options().query_type == TQueryType::LOAD && !done && status.ok()) { + if (runtime_state->query_type() == TQueryType::LOAD && !done && status.ok()) { // this is a load plan, and load is not finished, just make a brief report params.__set_loaded_rows(runtime_state->num_rows_load_total()); params.__set_loaded_bytes(runtime_state->num_bytes_load_total()); } else { - if (runtime_state->query_options().query_type == TQueryType::LOAD) { + if (runtime_state->query_type() == TQueryType::LOAD) { params.__set_loaded_rows(runtime_state->num_rows_load_total()); params.__set_loaded_bytes(runtime_state->num_bytes_load_total()); } diff --git a/be/src/runtime/free_pool.hpp b/be/src/runtime/free_pool.hpp index 379d2549aef696..6dde1648edf857 100644 --- a/be/src/runtime/free_pool.hpp +++ b/be/src/runtime/free_pool.hpp @@ -40,7 +40,7 @@ namespace doris { // contains the link to the next allocation. // This has O(1) Allocate() and Free(). // This is not thread safe. -// TODO: consider integrating this with MemPool. +// TODO(zxy): consider integrating this with MemPool. // TODO: consider changing to something more granular than doubling. class FreePool { public: diff --git a/be/src/runtime/initial_reservations.cc b/be/src/runtime/initial_reservations.cc index adbc2be09d7883..86b1f2f1b65e6c 100644 --- a/be/src/runtime/initial_reservations.cc +++ b/be/src/runtime/initial_reservations.cc @@ -38,7 +38,7 @@ InitialReservations::InitialReservations(ObjectPool* obj_pool, std::shared_ptr query_mem_tracker, int64_t initial_reservation_total_claims) : initial_reservation_mem_tracker_( - MemTracker::CreateTracker(-1, "InitialReservations", query_mem_tracker, false)), + MemTracker::create_tracker(-1, "InitialReservations", query_mem_tracker)), remaining_initial_reservation_claims_(initial_reservation_total_claims) { initial_reservations_.InitChildTracker(nullptr, query_reservation, initial_reservation_mem_tracker_.get(), @@ -83,7 +83,5 @@ void InitialReservations::Return(BufferPool::ClientHandle* src, int64_t bytes) { void InitialReservations::ReleaseResources() { initial_reservations_.Close(); - // TODO(HW): Close() is private. make this tracker shared later - // initial_reservation_mem_tracker_->Close(); } } // namespace doris diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index db523f2aa70da8..a8ba886fa70b59 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -28,8 +28,8 @@ LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit, int64_t tim const std::string& sender_ip) : _load_id(load_id), _timeout_s(timeout_s), _is_high_priority(is_high_priority), _sender_ip(sender_ip) { - _mem_tracker = MemTracker::CreateTracker( - mem_limit, "LoadChannel:" + _load_id.to_string(), mem_tracker, true, false, MemTrackerLevel::TASK); + _mem_tracker = MemTracker::create_tracker( + mem_limit, "LoadChannel:" + _load_id.to_string(), mem_tracker, MemTrackerLevel::TASK); // _last_updated_time should be set before being inserted to // _load_channels in load_channel_mgr, or it may be erased // immediately by gc thread. diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp index 5f0f2bbdb9986c..008dfeb40f4306 100644 --- a/be/src/runtime/load_channel_mgr.cpp +++ b/be/src/runtime/load_channel_mgr.cpp @@ -28,8 +28,8 @@ namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(load_channel_count, MetricUnit::NOUNIT); -DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_mem_consumption, MetricUnit::BYTES, "", - mem_consumption, Labels({{"type", "load"}})); +DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(load_channel_mem_consumption, MetricUnit::BYTES, "", mem_consumption, + Labels({{"type", "load"}})); // Calculate the total memory limit of all load tasks on this BE static int64_t calc_process_max_load_memory(int64_t process_mem_limit) { @@ -70,12 +70,11 @@ LoadChannelMgr::LoadChannelMgr() : _stop_background_threads_latch(1) { std::lock_guard l(_lock); return _load_channels.size(); }); - _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024, _mem_tracker); } LoadChannelMgr::~LoadChannelMgr() { DEREGISTER_HOOK_METRIC(load_channel_count); - DEREGISTER_HOOK_METRIC(load_mem_consumption); + DEREGISTER_HOOK_METRIC(load_channel_mem_consumption); _stop_background_threads_latch.count_down(); if (_load_channels_clean_thread) { _load_channels_clean_thread->join(); @@ -85,10 +84,11 @@ LoadChannelMgr::~LoadChannelMgr() { Status LoadChannelMgr::init(int64_t process_mem_limit) { int64_t load_mem_limit = calc_process_max_load_memory(process_mem_limit); - _mem_tracker = MemTracker::CreateTracker(load_mem_limit, "LoadChannelMgr", nullptr, true, false, MemTrackerLevel::OVERVIEW); - REGISTER_HOOK_METRIC(load_mem_consumption, [this]() { - return _mem_tracker->consumption(); - }); + _mem_tracker = MemTracker::create_tracker(load_mem_limit, "LoadChannelMgr", + MemTracker::get_process_tracker(), + MemTrackerLevel::OVERVIEW); + REGISTER_HOOK_METRIC(load_channel_mem_consumption, [this]() { return _mem_tracker->consumption(); }); + _last_success_channel = new_lru_cache("LastestSuccessChannelCache", 1024); RETURN_IF_ERROR(_start_bg_worker()); return Status::OK(); } diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index bcaaa27c448b25..469a641e64847d 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -37,6 +37,24 @@ const int MemPool::MAX_CHUNK_SIZE; const int MemPool::DEFAULT_ALIGNMENT; uint32_t MemPool::k_zero_length_region_ alignas(std::max_align_t) = MEM_POOL_POISON; +MemPool::MemPool(MemTracker* mem_tracker) + : current_chunk_idx_(-1), + next_chunk_size_(INITIAL_CHUNK_SIZE), + total_allocated_bytes_(0), + total_reserved_bytes_(0), + peak_allocated_bytes_(0), + _mem_tracker(mem_tracker) {} + +MemPool::MemPool(const std::string& label) + : current_chunk_idx_(-1), + next_chunk_size_(INITIAL_CHUNK_SIZE), + total_allocated_bytes_(0), + total_reserved_bytes_(0), + peak_allocated_bytes_(0) { + _mem_tracker_own = MemTracker::create_tracker(-1, label + ":MemPool"); + _mem_tracker = _mem_tracker_own.get(); +} + MemPool::ChunkInfo::ChunkInfo(const Chunk& chunk_) : chunk(chunk_), allocated_bytes(0) { DorisMetrics::instance()->memory_pool_bytes_total->increment(chunk.size); } @@ -45,9 +63,8 @@ MemPool::~MemPool() { int64_t total_bytes_released = 0; for (auto& chunk : chunks_) { total_bytes_released += chunk.chunk.size; - ChunkAllocator::instance()->free(chunk.chunk); + ChunkAllocator::instance()->free(chunk.chunk, _mem_tracker); } - mem_tracker_->Release(total_bytes_released); DorisMetrics::instance()->memory_pool_bytes_total->increment(-total_bytes_released); } @@ -65,7 +82,7 @@ void MemPool::free_all() { int64_t total_bytes_released = 0; for (auto& chunk : chunks_) { total_bytes_released += chunk.chunk.size; - ChunkAllocator::instance()->free(chunk.chunk); + ChunkAllocator::instance()->free(chunk.chunk, _mem_tracker); } chunks_.clear(); next_chunk_size_ = INITIAL_CHUNK_SIZE; @@ -73,16 +90,15 @@ void MemPool::free_all() { total_allocated_bytes_ = 0; total_reserved_bytes_ = 0; - mem_tracker_->Release(total_bytes_released); DorisMetrics::instance()->memory_pool_bytes_total->increment(-total_bytes_released); } -bool MemPool::find_chunk(size_t min_size, bool check_limits) { +Status MemPool::find_chunk(size_t min_size, bool check_limits) { // Try to allocate from a free chunk. We may have free chunks after the current chunk // if Clear() was called. The current chunk may be free if ReturnPartialAllocation() // was called. The first free chunk (if there is one) can therefore be either the // current chunk or the chunk immediately after the current chunk. - int first_free_idx; + int first_free_idx = 0; if (current_chunk_idx_ == -1) { first_free_idx = 0; } else { @@ -97,7 +113,7 @@ bool MemPool::find_chunk(size_t min_size, bool check_limits) { if (idx != first_free_idx) std::swap(chunks_[idx], chunks_[first_free_idx]); current_chunk_idx_ = first_free_idx; DCHECK(check_integrity(true)); - return true; + return Status::OK(); } } @@ -115,20 +131,11 @@ bool MemPool::find_chunk(size_t min_size, bool check_limits) { } chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); - if (check_limits) { - Status st = mem_tracker_->TryConsume(chunk_size); - WARN_IF_ERROR(st, "try to allocate a new buffer failed"); - if (!st) return false; - } else { - mem_tracker_->Consume(chunk_size); - } // Allocate a new chunk. Return early if allocate fails. Chunk chunk; - if (!ChunkAllocator::instance()->allocate(chunk_size, &chunk)) { - mem_tracker_->Release(chunk_size); - return false; - } + RETURN_IF_ERROR( + ChunkAllocator::instance()->allocate(chunk_size, &chunk, _mem_tracker, check_limits)); ASAN_POISON_MEMORY_REGION(chunk.data, chunk_size); // Put it before the first free chunk. If no free chunks, it goes at the end. if (first_free_idx == static_cast(chunks_.size())) { @@ -143,12 +150,12 @@ bool MemPool::find_chunk(size_t min_size, bool check_limits) { next_chunk_size_ = static_cast(std::min(chunk_size * 2, MAX_CHUNK_SIZE)); DCHECK(check_integrity(true)); - return true; + return Status::OK(); } void MemPool::acquire_data(MemPool* src, bool keep_current) { DCHECK(src->check_integrity(false)); - int num_acquired_chunks; + int num_acquired_chunks = 0; if (keep_current) { num_acquired_chunks = src->current_chunk_idx_; } else if (src->get_free_offset() == 0) { @@ -172,9 +179,8 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) { total_reserved_bytes_ += total_transferred_bytes; // Skip unnecessary atomic ops if the mem_trackers are the same. - if (src->mem_tracker_ != mem_tracker_) { - src->mem_tracker_->Release(total_transferred_bytes); - mem_tracker_->Consume(total_transferred_bytes); + if (src->_mem_tracker != _mem_tracker) { + src->_mem_tracker->transfer_to(_mem_tracker, total_transferred_bytes); } // insert new chunks after current_chunk_idx_ @@ -203,6 +209,7 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) { void MemPool::exchange_data(MemPool* other) { int64_t delta_size = other->total_reserved_bytes_ - total_reserved_bytes_; + other->_mem_tracker->transfer_to(_mem_tracker, delta_size); std::swap(current_chunk_idx_, other->current_chunk_idx_); std::swap(next_chunk_size_, other->next_chunk_size_); @@ -210,10 +217,6 @@ void MemPool::exchange_data(MemPool* other) { std::swap(total_reserved_bytes_, other->total_reserved_bytes_); std::swap(peak_allocated_bytes_, other->peak_allocated_bytes_); std::swap(chunks_, other->chunks_); - - // update MemTracker - mem_tracker_->Consume(delta_size); - other->mem_tracker_->Release(delta_size); } std::string MemPool::debug_string() { diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h index 397d2cde416f02..da3fe5c17e184e 100644 --- a/be/src/runtime/mem_pool.h +++ b/be/src/runtime/mem_pool.h @@ -27,6 +27,7 @@ #include "common/config.h" #include "common/logging.h" +#include "common/status.h" #include "gutil/dynamic_annotations.h" #include "olap/olap_define.h" #include "runtime/memory/chunk.h" @@ -88,16 +89,10 @@ class MemTracker; /// delete p; class MemPool { public: - /// 'tracker' tracks the amount of memory allocated by this pool. Must not be nullptr. - MemPool(MemTracker* mem_tracker) - : current_chunk_idx_(-1), - next_chunk_size_(INITIAL_CHUNK_SIZE), - total_allocated_bytes_(0), - total_reserved_bytes_(0), - peak_allocated_bytes_(0), - mem_tracker_(mem_tracker) { - DCHECK(mem_tracker != nullptr); - } + // 'tracker' tracks the amount of memory allocated by this pool. Must not be nullptr. + MemPool(MemTracker* mem_tracker); + MemPool(const std::string& label); + MemPool(); /// Frees all chunks of memory and subtracts the total allocated bytes /// from the registered limits. @@ -106,33 +101,37 @@ class MemPool { /// Allocates a section of memory of 'size' bytes with DEFAULT_ALIGNMENT at the end /// of the the current chunk. Creates a new chunk if there aren't any chunks /// with enough capacity. - uint8_t* allocate(int64_t size) { return allocate(size, DEFAULT_ALIGNMENT); } + uint8_t* allocate(int64_t size, Status* rst = nullptr) { + return allocate(size, DEFAULT_ALIGNMENT, rst); + } /// Same as Allocate() expect add a check when return a nullptr - OLAPStatus allocate_safely(int64_t size, uint8_t*& ret) { - return allocate_safely(size, DEFAULT_ALIGNMENT, ret); + OLAPStatus allocate_safely(int64_t size, uint8_t*& ret, Status* rst = nullptr) { + return allocate_safely(size, DEFAULT_ALIGNMENT, ret, rst); } /// Same as Allocate() except the mem limit is checked before the allocation and /// this call will fail (returns nullptr) if it does. /// The caller must handle the nullptr case. This should be used for allocations /// where the size can be very big to bound the amount by which we exceed mem limits. - uint8_t* try_allocate(int64_t size) { return allocate(size, DEFAULT_ALIGNMENT); } + uint8_t* try_allocate(int64_t size, Status* rst = nullptr) { + return allocate(size, DEFAULT_ALIGNMENT, rst); + } /// Same as TryAllocate() except a non-default alignment can be specified. It /// should be a power-of-two in [1, alignof(std::max_align_t)]. - uint8_t* try_allocate_aligned(int64_t size, int alignment) { + uint8_t* try_allocate_aligned(int64_t size, int alignment, Status* rst = nullptr) { DCHECK_GE(alignment, 1); DCHECK_LE(alignment, config::memory_max_alignment); DCHECK_EQ(BitUtil::RoundUpToPowerOfTwo(alignment), alignment); - return allocate(size, alignment); + return allocate(size, alignment, rst); } /// Same as TryAllocate() except returned memory is not aligned at all. - uint8_t* try_allocate_unaligned(int64_t size) { + uint8_t* try_allocate_unaligned(int64_t size, Status* rst = nullptr) { // Call templated implementation directly so that it is inlined here and the // alignment logic can be optimised out. - return allocate(size, 1); + return allocate(size, 1, rst); } /// Makes all allocated chunks available for re-use, but doesn't delete any chunks. @@ -159,7 +158,7 @@ class MemPool { int64_t total_reserved_bytes() const { return total_reserved_bytes_; } int64_t peak_allocated_bytes() const { return peak_allocated_bytes_; } - MemTracker* mem_tracker() { return mem_tracker_; } + MemTracker* mem_tracker() { return _mem_tracker; } static constexpr int DEFAULT_ALIGNMENT = 8; @@ -195,7 +194,7 @@ class MemPool { /// if a new chunk needs to be created. /// If check_limits is true, this call can fail (returns false) if adding a /// new chunk exceeds the mem limits. - bool find_chunk(size_t min_size, bool check_limits); + Status find_chunk(size_t min_size, bool check_limits); /// Check integrity of the supporting data structures; always returns true but DCHECKs /// all invariants. @@ -209,7 +208,7 @@ class MemPool { return chunks_[current_chunk_idx_].allocated_bytes; } - uint8_t * allocate_from_current_chunk(int64_t size, int alignment) { + uint8_t* allocate_from_current_chunk(int64_t size, int alignment) { // Manually ASAN poisoning is complicated and it is hard to make // it work right. There are illustrated examples in // http://blog.hostilefork.com/poison-memory-without-asan/. @@ -242,7 +241,7 @@ class MemPool { } template - uint8_t* ALWAYS_INLINE allocate(int64_t size, int alignment) { + uint8_t* ALWAYS_INLINE allocate(int64_t size, int alignment, Status* rst) { DCHECK_GE(size, 0); if (UNLIKELY(size == 0)) return reinterpret_cast(&k_zero_length_region_); @@ -250,7 +249,7 @@ class MemPool { uint8_t* result = allocate_from_current_chunk(size, alignment); if (result != nullptr) { return result; - } + } } // If we couldn't allocate a new chunk, return nullptr. malloc() guarantees alignment @@ -258,8 +257,12 @@ class MemPool { // guarantee alignment. //static_assert( //INITIAL_CHUNK_SIZE >= config::FLAGS_MEMORY_MAX_ALIGNMENT, "Min chunk size too low"); - if (UNLIKELY(!find_chunk(size + DEFAULT_PADDING_SIZE, CHECK_LIMIT_FIRST))) { - return nullptr; + if (rst == nullptr) { + if (UNLIKELY(!find_chunk(size + DEFAULT_PADDING_SIZE, CHECK_LIMIT_FIRST))) + return nullptr; + } else { + *rst = find_chunk(size + DEFAULT_PADDING_SIZE, CHECK_LIMIT_FIRST); + if (UNLIKELY(!*rst)) return nullptr; } uint8_t* result = allocate_from_current_chunk(size, alignment); @@ -267,8 +270,9 @@ class MemPool { } template - OLAPStatus ALWAYS_INLINE allocate_safely(int64_t size, int alignment, uint8_t*& ret) { - uint8_t* result = allocate(size, alignment); + OLAPStatus ALWAYS_INLINE allocate_safely(int64_t size, int alignment, uint8_t*& ret, + Status* rst = nullptr) { + uint8_t* result = allocate(size, alignment, rst); if (result == nullptr) { return OLAP_ERR_MALLOC_ERROR; } @@ -301,12 +305,14 @@ class MemPool { /// The current and peak memory footprint of this pool. This is different from /// total allocated_bytes_ since it includes bytes in chunks that are not used. - MemTracker* mem_tracker_; + MemTracker* _mem_tracker; + // TODO(zxy) temp variable, In the future, mem trackers should all use raw pointers. + std::shared_ptr _mem_tracker_own; }; // Stamp out templated implementations here so they're included in IR module -template uint8_t* MemPool::allocate(int64_t size, int alignment); -template uint8_t* MemPool::allocate(int64_t size, int alignment); +template uint8_t* MemPool::allocate(int64_t size, int alignment, Status* rst); +template uint8_t* MemPool::allocate(int64_t size, int alignment, Status* rst); } // namespace doris #endif diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp index 350f7bc3119668..f10f3a7f5c4846 100644 --- a/be/src/runtime/mem_tracker.cpp +++ b/be/src/runtime/mem_tracker.cpp @@ -17,319 +17,174 @@ #include "runtime/mem_tracker.h" -#include +#include -#include -#include - -#include #include #include "exec/exec_node.h" #include "gutil/once.h" -#include "gutil/strings/substitute.h" -#include "runtime/bufferpool/reservation_tracker_counters.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "service/backend_options.h" -#include "util/debug_util.h" -#include "util/doris_metrics.h" -#include "util/mem_info.h" #include "util/pretty_printer.h" -#include "util/stack_util.h" +#include "util/string_util.h" #include "util/uid_util.h" -using boost::join; -using std::deque; -using std::endl; -using std::greater; -using std::list; -using std::pair; -using std::priority_queue; -using std::shared_ptr; -using std::string; - -using std::vector; -using std::weak_ptr; -using strings::Substitute; - namespace doris { const std::string MemTracker::COUNTER_NAME = "PeakMemoryUsage"; -// Name for request pool MemTrackers. '$0' is replaced with the pool name. -const std::string REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT = "RequestPool=$0"; - -/// Calculate the soft limit for a MemTracker based on the hard limit 'limit'. -static int64_t CalcSoftLimit(int64_t limit) { - if (limit < 0) return -1; - double frac = std::max(0.0, std::min(1.0, config::soft_mem_limit_frac)); - return static_cast(limit * frac); +// The ancestor for all trackers. Every tracker is visible from the process down. +// All manually created trackers should specify the process tracker as the parent. +static std::shared_ptr process_tracker; +static MemTracker* raw_process_tracker; +static GoogleOnceType process_tracker_once = GOOGLE_ONCE_INIT; + +void MemTracker::create_process_tracker() { + process_tracker.reset( + new MemTracker(-1, "Process", nullptr, MemTrackerLevel::OVERVIEW, nullptr)); + process_tracker->init(); + raw_process_tracker = process_tracker.get(); } -// The ancestor for all trackers. Every tracker is visible from the root down. -static std::shared_ptr root_tracker; -static GoogleOnceType root_tracker_once = GOOGLE_ONCE_INIT; +std::shared_ptr MemTracker::get_process_tracker() { + GoogleOnceInit(&process_tracker_once, &MemTracker::create_process_tracker); + return process_tracker; +} -void MemTracker::CreateRootTracker() { - root_tracker.reset(new MemTracker(nullptr, -1, "Root", nullptr, true, MemTrackerLevel::OVERVIEW)); - root_tracker->Init(); +MemTracker* MemTracker::get_raw_process_tracker() { + GoogleOnceInit(&process_tracker_once, &MemTracker::create_process_tracker); + return raw_process_tracker; } -std::shared_ptr MemTracker::CreateTracker(RuntimeProfile* profile, int64_t byte_limit, - const std::string& label, const std::shared_ptr& parent, - bool reset_label_name, MemTrackerLevel level) { - std::shared_ptr real_parent; - std::string label_name; - // if parent is not null, reset label name to query id. - // The parent label always: RuntimeState:instance:8ca5a59e3aa84f74-84bb0d0466193736 - // we just need the last id of it: 8ca5a59e3aa84f74-84bb0d0466193736 - // to build the new label name of tracker: `label`: 8ca5a59e3aa84f74-84bb0d0466193736 - // else if parent is null - // just use the root is parent and keep the label_name as label - if (parent) { - real_parent = parent; - if (reset_label_name) { - std::vector tmp_result; - boost::split(tmp_result, parent->label(), boost::is_any_of(":")); - label_name = label + ":" + tmp_result[tmp_result.size() - 1]; - } else { - label_name = label; +void MemTracker::list_process_trackers(std::vector>* trackers) { + trackers->clear(); + std::deque> to_process; + to_process.push_front(get_process_tracker()); + while (!to_process.empty()) { + std::shared_ptr t = to_process.back(); + to_process.pop_back(); + + trackers->push_back(t); + std::list> children; + { + lock_guard l(t->_child_trackers_lock); + children = t->_child_trackers; + } + for (const auto& child_weak : children) { + std::shared_ptr child = child_weak.lock(); + if (child && static_cast(child->_level) <= + config::mem_tracker_level) { + to_process.emplace_back(std::move(child)); + } } - } else { - real_parent = GetRootTracker(); - label_name = label; } +} - shared_ptr tracker(new MemTracker(profile, byte_limit, label_name, real_parent, true, - level > real_parent->_level ? level : real_parent->_level)); - real_parent->AddChildTracker(tracker); - tracker->Init(); - +std::shared_ptr MemTracker::create_tracker(int64_t byte_limit, const std::string& label, + const std::shared_ptr& parent, + MemTrackerLevel level, + RuntimeProfile* profile) { + std::shared_ptr reset_parent = parent ? parent : MemTracker::get_process_tracker(); + DCHECK(reset_parent); + + std::shared_ptr tracker( + new MemTracker(byte_limit, label, reset_parent, + level > reset_parent->_level ? level : reset_parent->_level, profile)); + reset_parent->add_child_tracker(tracker); + tracker->init(); return tracker; } -std::shared_ptr MemTracker::CreateTracker(int64_t byte_limit, const std::string& label, - std::shared_ptr parent, bool log_usage_if_zero, bool reset_label_name, MemTrackerLevel level) { - std::shared_ptr real_parent; - std::string label_name; - // if parent is not null, reset label name to query id. - // The parent label always: RuntimeState:instance:8ca5a59e3aa84f74-84bb0d0466193736 - // we just need the last id of it: 8ca5a59e3aa84f74-84bb0d0466193736 - // to build the new label name of tracker: `label`: 8ca5a59e3aa84f74-84bb0d0466193736 - // else if parent is null - // just use the root is parent and keep the label_name as label - if (parent) { - real_parent = parent; - if (reset_label_name) { - std::vector tmp_result; - boost::split(tmp_result, parent->label(), boost::is_any_of(":")); - label_name = label + ":" + tmp_result[tmp_result.size() - 1]; - } else { - label_name = label; - } - } else { - real_parent = GetRootTracker(); - label_name = label; - } - - shared_ptr tracker( - new MemTracker(nullptr, byte_limit, label_name, real_parent, log_usage_if_zero, - level > real_parent->_level ? level : real_parent->_level)); - real_parent->AddChildTracker(tracker); - tracker->Init(); +std::shared_ptr MemTracker::create_virtual_tracker( + int64_t byte_limit, const std::string& label, const std::shared_ptr& parent, + MemTrackerLevel level) { + std::shared_ptr reset_parent = parent ? parent : MemTracker::get_process_tracker(); + DCHECK(reset_parent); + std::shared_ptr tracker( + new MemTracker(byte_limit, "[Virtual]-" + label, reset_parent, level, nullptr)); + reset_parent->add_child_tracker(tracker); + tracker->init_virtual(); return tracker; } MemTracker::MemTracker(int64_t byte_limit, const std::string& label) - : MemTracker(nullptr, byte_limit, label, std::shared_ptr(), true, MemTrackerLevel::VERBOSE) {} - -MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit, const string& label, - const std::shared_ptr& parent, bool log_usage_if_zero, MemTrackerLevel level) - : limit_(byte_limit), - soft_limit_(CalcSoftLimit(byte_limit)), - label_(label), - parent_(parent), - consumption_metric_(nullptr), - log_usage_if_zero_(log_usage_if_zero), - _level(level), - num_gcs_metric_(nullptr), - bytes_freed_by_last_gc_metric_(nullptr), - bytes_over_limit_metric_(nullptr), - limit_metric_(nullptr) { + : MemTracker(byte_limit, label, std::shared_ptr(), MemTrackerLevel::VERBOSE, + nullptr) {} + +MemTracker::MemTracker(int64_t byte_limit, const std::string& label, + const std::shared_ptr& parent, MemTrackerLevel level, + RuntimeProfile* profile) + : _limit(byte_limit), + _label(label), + _id(_label + std::to_string(GetCurrentTimeMicros()) + std::to_string(rand())), + _parent(parent), + _level(level) { if (profile == nullptr) { - consumption_ = std::make_shared(TUnit::BYTES); + _consumption = std::make_shared(TUnit::BYTES); } else { - consumption_ = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); + _consumption = profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES); } } -void MemTracker::Init() { - DCHECK_GE(limit_, -1); - DCHECK_LE(soft_limit_, limit_); - // populate all_trackers_ and limit_trackers_ +void MemTracker::init() { + DCHECK_GE(_limit, -1); MemTracker* tracker = this; - while (tracker != nullptr) { - all_trackers_.push_back(tracker); - if (tracker->has_limit()) limit_trackers_.push_back(tracker); - tracker = tracker->parent_.get(); - } - DCHECK_GT(all_trackers_.size(), 0); - DCHECK_EQ(all_trackers_[0], this); -} - -void MemTracker::AddChildTracker(const std::shared_ptr& tracker) { - lock_guard l(child_trackers_lock_); - tracker->child_tracker_it_ = child_trackers_.insert(child_trackers_.end(), tracker); -} - -void MemTracker::EnableReservationReporting(const ReservationTrackerCounters& counters) { - delete reservation_counters_.swap(new ReservationTrackerCounters(counters)); -} - -int64_t MemTracker::GetLowestLimit(MemLimit mode) const { - if (limit_trackers_.empty()) return -1; - int64_t min_limit = numeric_limits::max(); - for (MemTracker* limit_tracker : limit_trackers_) { - DCHECK(limit_tracker->has_limit()); - min_limit = std::min(min_limit, limit_tracker->GetLimit(mode)); + while (tracker != nullptr && tracker->_virtual == false) { + _all_trackers.push_back(tracker); + if (tracker->has_limit()) _limit_trackers.push_back(tracker); + tracker = tracker->_parent.get(); } - return min_limit; + DCHECK_GT(_all_trackers.size(), 0); + DCHECK_EQ(_all_trackers[0], this); } -int64_t MemTracker::SpareCapacity(MemLimit mode) const { - int64_t result = std::numeric_limits::max(); - for (const auto& tracker : limit_trackers_) { - int64_t mem_left = tracker->GetLimit(mode) - tracker->consumption(); - result = std::min(result, mem_left); - } - return result; -} - -void MemTracker::RefreshConsumptionFromMetric() { - DCHECK(consumption_metric_ != nullptr); - consumption_->set(consumption_metric_->value()); -} - -int64_t MemTracker::GetPoolMemReserved() { - // Pool trackers should have a pool_name_ and no limit. - DCHECK(!pool_name_.empty()); - DCHECK_EQ(limit_, -1) << LogUsage(UNLIMITED_DEPTH); - - // Use cache to avoid holding child_trackers_lock_ - list> children; - { - lock_guard l(child_trackers_lock_); - children = child_trackers_; - } - - int64_t mem_reserved = 0L; - for (const auto& child_weak : children) { - std::shared_ptr child = child_weak.lock(); - if (child) { - int64_t child_limit = child->limit(); - if (child_limit > 0) { - // Make sure we don't overflow if the query limits are set to ridiculous values. - mem_reserved += std::min(child_limit, MemInfo::physical_mem()); - } else { - DCHECK(child_limit == -1) - << child->LogUsage(UNLIMITED_DEPTH); - mem_reserved += child->consumption(); - } - } - } - return mem_reserved; -} - -std::shared_ptr PoolMemTrackerRegistry::GetRequestPoolMemTracker( - const string& pool_name, bool create_if_not_present) { - DCHECK(!pool_name.empty()); - lock_guard l(pool_to_mem_trackers_lock_); - PoolTrackersMap::iterator it = pool_to_mem_trackers_.find(pool_name); - if (it != pool_to_mem_trackers_.end()) { - MemTracker* tracker = it->second.get(); - DCHECK(pool_name == tracker->pool_name_); - return it->second; - } - if (!create_if_not_present) return nullptr; - // First time this pool_name registered, make a new object. - std::shared_ptr tracker = MemTracker::CreateTracker( - -1, strings::Substitute(REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT, pool_name), - ExecEnv::GetInstance()->process_mem_tracker()); - tracker->pool_name_ = pool_name; - pool_to_mem_trackers_.emplace(pool_name, std::shared_ptr(tracker)); - return tracker; +void MemTracker::init_virtual() { + DCHECK_GE(_limit, -1); + _all_trackers.push_back(this); + if (this->has_limit()) _limit_trackers.push_back(this); + _virtual = true; } MemTracker::~MemTracker() { - delete reservation_counters_.load(); - - if (parent()) { - DCHECK(consumption() == 0) << "Memory tracker " << debug_string() - << " has unreleased consumption " << consumption(); - parent_->Release(consumption()); - - lock_guard l(parent_->child_trackers_lock_); - if (child_tracker_it_ != parent_->child_trackers_.end()) { - parent_->child_trackers_.erase(child_tracker_it_); - child_tracker_it_ = parent_->child_trackers_.end(); + consume(_untracked_mem.exchange(0)); + if (!_virtual && config::memory_leak_detection) MemTracker::memory_leak_check(this); + if (!_virtual && parent()) { + if (consumption() != 0) { + // TODO(zxy) delete after. Because some trackers do not manually release completely before destructing + _parent->release(consumption()); } - } -} - -void MemTracker::ListTrackers(vector>* trackers) { - trackers->clear(); - deque> to_process; - to_process.push_front(GetRootTracker()); - while (!to_process.empty()) { - shared_ptr t = to_process.back(); - to_process.pop_back(); - trackers->push_back(t); - list> children; - { - lock_guard l(t->child_trackers_lock_); - children = t->child_trackers_; - } - for (const auto& child_weak : children) { - shared_ptr child = child_weak.lock(); - if (child && static_cast(child->_level) <= config::mem_tracker_level) { - to_process.emplace_back(std::move(child)); - } + // Do not call release on the parent tracker to avoid repeated releases. + // Ensure that all consume/release are triggered by TCMalloc new/delete hook. + lock_guard l(_parent->_child_trackers_lock); + if (_child_tracker_it != _parent->_child_trackers.end()) { + _parent->_child_trackers.erase(_child_tracker_it); + _child_tracker_it = _parent->_child_trackers.end(); } } + DCHECK_EQ(_untracked_mem, 0); } -//void MemTracker::RegisterMetrics(MetricGroup* metrics, const string& prefix) { -// num_gcs_metric_ = metrics->AddCounter(strings::Substitute("$0.num-gcs", prefix), 0); -// -// // TODO: Consider a total amount of bytes freed counter -// bytes_freed_by_last_gc_metric_ = metrics->AddGauge( -// strings::Substitute("$0.bytes-freed-by-last-gc", prefix), -1); -// -// bytes_over_limit_metric_ = metrics->AddGauge( -// strings::Substitute("$0.bytes-over-limit", prefix), -1); -// -// limit_metric_ = metrics->AddGauge(strings::Substitute("$0.limit", prefix), limit_); -//} - -void MemTracker::TransferTo(MemTracker* dst, int64_t bytes) { - DCHECK_EQ(all_trackers_.back(), dst->all_trackers_.back()) << "Must have same root"; +void MemTracker::transfer_to_relative(MemTracker* dst, int64_t bytes) { + DCHECK_EQ(_all_trackers.back(), dst->_all_trackers.back()) << "Must have same ancestor"; + DCHECK(!dst->has_limit()); // Find the common ancestor and update trackers between 'this'/'dst' and // the common ancestor. This logic handles all cases, including the // two trackers being the same or being ancestors of each other because // 'all_trackers_' includes the current tracker. - int ancestor_idx = all_trackers_.size() - 1; - int dst_ancestor_idx = dst->all_trackers_.size() - 1; + int ancestor_idx = _all_trackers.size() - 1; + int dst_ancestor_idx = dst->_all_trackers.size() - 1; while (ancestor_idx > 0 && dst_ancestor_idx > 0 && - all_trackers_[ancestor_idx - 1] == dst->all_trackers_[dst_ancestor_idx - 1]) { + _all_trackers[ancestor_idx - 1] == dst->_all_trackers[dst_ancestor_idx - 1]) { + DCHECK(!dst->_all_trackers[dst_ancestor_idx - 1]->has_limit()); --ancestor_idx; --dst_ancestor_idx; } - MemTracker* common_ancestor = all_trackers_[ancestor_idx]; - ReleaseLocal(bytes, common_ancestor); - dst->ConsumeLocal(bytes, common_ancestor); + MemTracker* common_ancestor = _all_trackers[ancestor_idx]; + release(bytes, common_ancestor); + dst->consume(bytes, common_ancestor); } // Calling this on the query tracker results in output like: @@ -353,83 +208,48 @@ void MemTracker::TransferTo(MemTracker* dst, int64_t bytes) { // TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB // Total=6.04 MB Peak=6.45 MB // -std::string MemTracker::LogUsage(int max_recursive_depth, const string& prefix, - int64_t* logged_consumption) { +std::string MemTracker::log_usage(int max_recursive_depth, int64_t* logged_consumption) { // Make sure the consumption is up to date. - if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric(); int64_t curr_consumption = consumption(); - int64_t peak_consumption = consumption_->value(); + int64_t peak_consumption = _consumption->value(); if (logged_consumption != nullptr) *logged_consumption = curr_consumption; - if (!log_usage_if_zero_ && curr_consumption == 0) return ""; - - std::stringstream ss; - ss << prefix << label_ << ":"; - if (CheckLimitExceeded(MemLimit::HARD)) ss << " memory limit exceeded."; - if (limit_ > 0) ss << " Limit=" << PrettyPrinter::print(limit_, TUnit::BYTES); - - // TODO(zxy): ReservationTrackerCounters is not actually used in the current Doris. - // Printing here ReservationTrackerCounters may cause BE crash when high concurrency. - // The memory tracker in Doris will be redesigned in the future. - // ReservationTrackerCounters* reservation_counters = reservation_counters_.load(); - // if (reservation_counters != nullptr) { - // int64_t reservation = reservation_counters->peak_reservation->current_value(); - // ss << " Reservation=" << PrettyPrinter::print(reservation, TUnit::BYTES); - // if (reservation_counters->reservation_limit != nullptr) { - // int64_t limit = reservation_counters->reservation_limit->value(); - // ss << " ReservationLimit=" << PrettyPrinter::print(limit, TUnit::BYTES); - // } - // ss << " OtherMemory=" << PrettyPrinter::print(curr_consumption - reservation, TUnit::BYTES); - // } - ss << " Total=" << PrettyPrinter::print(curr_consumption, TUnit::BYTES); - // Peak consumption is not accurate if the metric is lazily updated (i.e. - // this is a non-root tracker that exists only for reporting purposes). - // Only report peak consumption if we actually call Consume()/Release() on - // this tracker or an descendent. - if (consumption_metric_ == nullptr || parent_ == nullptr) { - ss << " Peak=" << PrettyPrinter::print(peak_consumption, TUnit::BYTES); - } + if (_level > MemTrackerLevel::INSTANCE && curr_consumption == 0) return ""; + + std::string detail = + "MemTracker log_usage Label: {}, Limit: {}, Total: {}, Peak: {}, Exceeded: {}"; + detail = fmt::format(detail, _label, PrettyPrinter::print(_limit, TUnit::BYTES), + PrettyPrinter::print(curr_consumption, TUnit::BYTES), + PrettyPrinter::print(peak_consumption, TUnit::BYTES), + limit_exceeded() ? "true" : "false"); // This call does not need the children, so return early. - if (max_recursive_depth == 0) return ss.str(); + if (max_recursive_depth == 0) return detail; // Recurse and get information about the children - std::string new_prefix = strings::Substitute(" $0", prefix); int64_t child_consumption; std::string child_trackers_usage; - list> children; + std::list> children; { - lock_guard l(child_trackers_lock_); - children = child_trackers_; - } - child_trackers_usage = - LogUsage(max_recursive_depth - 1, new_prefix, children, &child_consumption); - if (!child_trackers_usage.empty()) ss << "\n" << child_trackers_usage; - - if (parent_ == nullptr) { - // Log the difference between the metric value and children as "untracked" memory so - // that the values always add up. This value is not always completely accurate because - // we did not necessarily get a consistent snapshot of the consumption values for all - // children at a single moment in time, but is good enough for our purposes. - int64_t untracked_bytes = curr_consumption - child_consumption; - ss << "\n" - << new_prefix - << "Untracked Memory: Total=" << PrettyPrinter::print(untracked_bytes, TUnit::BYTES); + lock_guard l(_child_trackers_lock); + children = _child_trackers; } - return ss.str(); + child_trackers_usage = log_usage(max_recursive_depth - 1, children, &child_consumption); + if (!child_trackers_usage.empty()) detail += "\n" + child_trackers_usage; + return detail; } -std::string MemTracker::LogUsage(int max_recursive_depth, const string& prefix, - const list>& trackers, - int64_t* logged_consumption) { +std::string MemTracker::log_usage(int max_recursive_depth, + const std::list>& trackers, + int64_t* logged_consumption) { *logged_consumption = 0; - std::vector usage_strings; + std::vector usage_strings; for (const auto& tracker_weak : trackers) { - shared_ptr tracker = tracker_weak.lock(); + std::shared_ptr tracker = tracker_weak.lock(); if (tracker) { int64_t tracker_consumption; std::string usage_string = - tracker->LogUsage(max_recursive_depth, prefix, &tracker_consumption); + tracker->log_usage(max_recursive_depth, &tracker_consumption); if (!usage_string.empty()) usage_strings.push_back(usage_string); *logged_consumption += tracker_consumption; } @@ -437,136 +257,62 @@ std::string MemTracker::LogUsage(int max_recursive_depth, const string& prefix, return join(usage_strings, "\n"); } -std::string MemTracker::LogTopNQueries(int limit) { - if (limit == 0) return ""; - priority_queue, std::vector>, - std::greater>> - min_pq; - GetTopNQueries(min_pq, limit); - std::vector usage_strings(min_pq.size()); - while (!min_pq.empty()) { - usage_strings.push_back(min_pq.top().second); - min_pq.pop(); - } - std::reverse(usage_strings.begin(), usage_strings.end()); - return join(usage_strings, "\n"); -} - -void MemTracker::GetTopNQueries( - priority_queue, std::vector>, - greater>>& min_pq, - int limit) { - list> children; - { - lock_guard l(child_trackers_lock_); - children = child_trackers_; +Status MemTracker::mem_limit_exceeded(RuntimeState* state, const std::string& details, + int64_t failed_allocation_size, Status failed_alloc) { + MemTracker* process_tracker = MemTracker::get_raw_process_tracker(); + std::string detail = + "Memory exceed limit. fragment={}, details={}, on backend={}. Memory left in process " + "limit={}."; + detail = fmt::format(detail, state != nullptr ? print_id(state->fragment_instance_id()) : "", + details, BackendOptions::get_localhost(), + PrettyPrinter::print(process_tracker->spare_capacity(), TUnit::BYTES)); + if (!failed_alloc) { + detail += " failed alloc=<{}>. current tracker={}."; + detail = fmt::format(detail, failed_alloc.to_string(), _label); + } else { + detail += " current tracker ."; + detail = fmt::format(detail, _label, _consumption->current_value(), _limit, + PrettyPrinter::print(failed_allocation_size, TUnit::BYTES)); } - for (const auto& child_weak : children) { - shared_ptr child = child_weak.lock(); - if (child) { - child->GetTopNQueries(min_pq, limit); - } + detail += " If query, can change the limit by session variable exec_mem_limit."; + Status status = Status::MemoryLimitExceeded(detail); + if (state != nullptr) state->log_error(detail); + + // only print the tracker log_usage in be log. + if (process_tracker->spare_capacity() < failed_allocation_size) { + // Dumping the process MemTracker is expensive. Limiting the recursive depth to two + // levels limits the level of detail to a one-line summary for each query MemTracker. + detail += "\n" + process_tracker->log_usage(2); } -} - -MemTracker* MemTracker::GetQueryMemTracker() { - MemTracker* tracker = this; - while (tracker != nullptr) { - tracker = tracker->parent_.get(); + if (parent_task_mem_tracker() != nullptr) { + detail += "\n" + parent_task_mem_tracker()->log_usage(); } - return tracker; -} + LOG(WARNING) << detail; -Status MemTracker::MemLimitExceeded(MemTracker* mtracker, RuntimeState* state, - const std::string& details, int64_t failed_allocation_size) { - DCHECK_GE(failed_allocation_size, 0); - std::stringstream ss; - if (!details.empty()) ss << details << std::endl; - if (failed_allocation_size != 0) { - if (mtracker != nullptr) ss << mtracker->label(); - ss << " could not allocate " << PrettyPrinter::print(failed_allocation_size, TUnit::BYTES) - << " without exceeding limit." << std::endl; - } - ss << "Error occurred on backend " << BackendOptions::get_localhost(); - if (state != nullptr) ss << " by fragment " << print_id(state->fragment_instance_id()); - ss << std::endl; - ExecEnv* exec_env = ExecEnv::GetInstance(); - MemTracker* process_tracker = exec_env->process_mem_tracker().get(); - const int64_t process_capacity = process_tracker->SpareCapacity(MemLimit::HARD); - ss << "Memory left in process limit: " << PrettyPrinter::print(process_capacity, TUnit::BYTES) - << std::endl; - Status status = Status::MemoryLimitExceeded(ss.str()); - - // only print the query tracker in be log(if available). - MemTracker* query_tracker = nullptr; - if (mtracker != nullptr) { - query_tracker = mtracker->GetQueryMemTracker(); - if (query_tracker != nullptr) { - if (query_tracker->has_limit()) { - const int64_t query_capacity = - query_tracker->limit() - query_tracker->consumption(); - ss << "Memory left in query limit: " - << PrettyPrinter::print(query_capacity, TUnit::BYTES) << std::endl; - } - ss << query_tracker->LogUsage(UNLIMITED_DEPTH); - } - } - - // Log the process level if the process tracker is close to the limit or - // if this tracker is not within a query's MemTracker hierarchy. - if (process_capacity < failed_allocation_size || query_tracker == nullptr) { - // IMPALA-5598: For performance reasons, limit the levels of recursion when - // dumping the process tracker to only two layers. - ss << process_tracker->LogUsage(PROCESS_MEMTRACKER_LIMITED_DEPTH); - } - if (state != nullptr) state->log_error(ss.str()); - LOG(WARNING) << ss.str(); return status; } -void MemTracker::AddGcFunction(GcFunction f) { - gc_functions_.push_back(f); -} - -bool MemTracker::LimitExceededSlow(MemLimit mode) { - if (mode == MemLimit::HARD && bytes_over_limit_metric_ != nullptr) { - bytes_over_limit_metric_->set_value(consumption() - limit_); - } - return GcMemory(GetLimit(mode)); -} - -bool MemTracker::GcMemory(int64_t max_consumption) { +bool MemTracker::gc_memory(int64_t max_consumption) { if (max_consumption < 0) return true; - lock_guard l(gc_lock_); - if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric(); + lock_guard l(_gc_lock); int64_t pre_gc_consumption = consumption(); // Check if someone gc'd before us if (pre_gc_consumption < max_consumption) return false; - if (num_gcs_metric_ != nullptr) num_gcs_metric_->increment(1); int64_t curr_consumption = pre_gc_consumption; + const int64_t EXTRA_BYTES_TO_FREE = 4L * 1024L * 1024L * 1024L; // TODO(zxy) Consider as config // Try to free up some memory - for (int i = 0; i < gc_functions_.size(); ++i) { + for (int i = 0; i < _gc_functions.size(); ++i) { // Try to free up the amount we are over plus some extra so that we don't have to // immediately GC again. Don't free all the memory since that can be unnecessarily // expensive. - const int64_t EXTRA_BYTES_TO_FREE = 512L * 1024L * 1024L; int64_t bytes_to_free = curr_consumption - max_consumption + EXTRA_BYTES_TO_FREE; - gc_functions_[i](bytes_to_free); - if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric(); + _gc_functions[i](bytes_to_free); curr_consumption = consumption(); if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break; } - if (bytes_freed_by_last_gc_metric_ != nullptr) { - bytes_freed_by_last_gc_metric_->set_value(pre_gc_consumption - curr_consumption); - } return curr_consumption > max_consumption; } -std::shared_ptr MemTracker::GetRootTracker() { - GoogleOnceInit(&root_tracker_once, &MemTracker::CreateRootTracker); - return root_tracker; -} - } // namespace doris diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h index 1622a70e71adba..ffd2756c9c6a04 100644 --- a/be/src/runtime/mem_tracker.h +++ b/be/src/runtime/mem_tracker.h @@ -18,49 +18,29 @@ #pragma once #include -#include #include #include -#include -#include -#include -#include -#include -#include +#include "common/config.h" #include "common/status.h" -#include "gen_cpp/Types_types.h" // for TUniqueId #include "util/mem_info.h" -#include "util/metrics.h" #include "util/runtime_profile.h" #include "util/spinlock.h" namespace doris { -/// Mode argument passed to various MemTracker methods to indicate whether a soft or hard -/// limit should be used. -enum class MemLimit { HARD, SOFT }; +// The Level use to decide whether to show it in web page, +// each MemTracker have a Level less than or equal to parent, only be set explicit, +// TASK contains query, import, compaction, etc. +enum class MemTrackerLevel { OVERVIEW = 0, TASK, INSTANCE, VERBOSE }; -/// The Level use to decide whether to show it in web page -/// each MemTracker have a Level equals to parent, only be set explicit -enum class MemTrackerLevel { OVERVIEW = 0, TASK, VERBOSE }; - -class ObjectPool; class MemTracker; -struct ReservationTrackerCounters; class RuntimeState; -class TQueryOptions; /// A MemTracker tracks memory consumption; it contains an optional limit /// and can be arranged into a tree structure such that the consumption tracked /// by a MemTracker is also tracked by its ancestors. /// -/// A MemTracker has a hard and a soft limit derived from the limit. If the hard limit -/// is exceeded, all memory allocations and queries should fail until we are under the -/// limit again. The soft limit can be exceeded without causing query failures, but -/// consumers of memory that can tolerate running without more memory should not allocate -/// memory in excess of the soft limit. -/// /// We use a five-level hierarchy of mem trackers: process, pool, query, fragment /// instance. Specific parts of the fragment (exec nodes, sinks, etc) will add a /// fifth level when they are initialized. This function also initializes a user @@ -77,7 +57,7 @@ class TQueryOptions; /// Release(). /// /// GcFunctions can be attached to a MemTracker in order to free up memory if the limit is -/// reached. If LimitExceeded() is called and the limit is exceeded, it will first call +/// reached. If limit_exceeded() is called and the limit is exceeded, it will first call /// the GcFunctions to try to free memory and recheck the limit. For example, the process /// tracker has a GcFunction that releases any unused memory still held by tcmalloc, so /// this will be called before the process limit is reported as exceeded. GcFunctions are @@ -86,130 +66,90 @@ class TQueryOptions; /// call back into MemTrackers, except to release memory. // /// This class is thread-safe. -class MemTracker : public std::enable_shared_from_this { +class MemTracker { public: - // Creates and adds the tracker to the tree so that it can be retrieved with - // FindTracker/FindOrCreateTracker. - static std::shared_ptr CreateTracker( + // Creates and adds the tracker to the tree + static std::shared_ptr create_tracker( int64_t byte_limit = -1, const std::string& label = std::string(), - std::shared_ptr parent = std::shared_ptr(), - bool log_usage_if_zero = true, bool reset_label_name = true, - MemTrackerLevel level = MemTrackerLevel::VERBOSE); + const std::shared_ptr& parent = std::shared_ptr(), + MemTrackerLevel level = MemTrackerLevel::VERBOSE, RuntimeProfile* profile = nullptr); - static std::shared_ptr CreateTracker( - RuntimeProfile* profile, int64_t byte_limit, const std::string& label = std::string(), + // Cosume/release will not sync to parent.Usually used to manually record the specified memory, + // It is independent of the recording of TCMalloc Hook in the thread local tracker, so the same + // block of memory is recorded independently in these two trackers. + static std::shared_ptr create_virtual_tracker( + int64_t byte_limit = -1, const std::string& label = std::string(), const std::shared_ptr& parent = std::shared_ptr(), - bool reset_label_name = true, MemTrackerLevel level = MemTrackerLevel::VERBOSE); + MemTrackerLevel level = MemTrackerLevel::VERBOSE); // this is used for creating an orphan mem tracker, or for unit test. - // If a mem tracker has parent, it should be created by `CreateTracker()` + // If a mem tracker has parent, it should be created by `create_tracker()` MemTracker(int64_t byte_limit = -1, const std::string& label = std::string()); ~MemTracker(); // Returns a list of all the valid trackers. - static void ListTrackers(std::vector>* trackers); - - /// Include counters from a ReservationTracker in logs and other diagnostics. - /// The counters should be owned by the fragment's RuntimeProfile. - void EnableReservationReporting(const ReservationTrackerCounters& counters); + static void list_process_trackers(std::vector>* trackers); - // Gets a shared_ptr to the "root" tracker, creating it if necessary. - static std::shared_ptr GetRootTracker(); + // Gets a shared_ptr to the "process" tracker, creating it if necessary. + static std::shared_ptr get_process_tracker(); + static MemTracker* get_raw_process_tracker(); - // delete static CreateQueryMemTracker(), cuz it cannot use shared tracker - - /// Increases consumption of this tracker and its ancestors by 'bytes'. - void Consume(int64_t bytes) { - // DCHECK_GE(bytes, 0); - if (bytes < 0) { - Release(-bytes); - return; - } - if (bytes == 0) { - return; + inline Status check_sys_mem_info(int64_t bytes) { + if (MemInfo::initialized() && MemInfo::current_mem() + bytes >= MemInfo::mem_limit()) { + return Status::MemoryLimitExceeded(fmt::format( + "{}: TryConsume failed, bytes={} process whole consumption={} mem limit={}", + _label, bytes, MemInfo::current_mem(), MemInfo::mem_limit())); } + return Status::OK(); + } - if (UNLIKELY(consumption_metric_ != nullptr)) { - RefreshConsumptionFromMetric(); - return; // TODO(yingchun): why return not update tracker? + // Increases consumption of this tracker and its ancestors by 'bytes'. + // up to (but not including) end_tracker. + // This is useful if we want to move tracking between trackers that share a common (i.e. end_tracker) + // ancestor. This happens when we want to update tracking on a particular mem tracker but the consumption + // against the limit recorded in one of its ancestors already happened. + void consume(int64_t bytes, MemTracker* end_tracker = nullptr) { + if (bytes <= 0) { + release(-bytes, end_tracker); + return; } - for (auto& tracker : all_trackers_) { - tracker->consumption_->add(bytes); - if (LIKELY(tracker->consumption_metric_ == nullptr)) { - DCHECK_GE(tracker->consumption_->current_value(), 0); - } + for (auto& tracker : _all_trackers) { + if (tracker == end_tracker) return; + tracker->_consumption->add(bytes); } } - /// Increases the consumption of this tracker and the ancestors up to (but - /// not including) end_tracker. This is useful if we want to move tracking between - /// trackers that share a common (i.e. end_tracker) ancestor. This happens when we want - /// to update tracking on a particular mem tracker but the consumption against - /// the limit recorded in one of its ancestors already happened. - void ConsumeLocal(int64_t bytes, MemTracker* end_tracker) { - DCHECK_GE(bytes, 0); - if (UNLIKELY(bytes < 0)) return; // needed in RELEASE, hits DCHECK in DEBUG - ChangeConsumption(bytes, end_tracker); - } - - /// Same as above, but it decreases the consumption. - void ReleaseLocal(int64_t bytes, MemTracker* end_tracker) { - DCHECK_GE(bytes, 0); - if (UNLIKELY(bytes < 0)) return; // needed in RELEASE, hits DCHECK in DEBUG - ChangeConsumption(-bytes, end_tracker); - } - - /// Increases consumption of this tracker and its ancestors by 'bytes' only if - /// they can all consume 'bytes' without exceeding limit (hard or soft) specified - /// by 'mode'. If any limit would be exceed, no MemTrackers are updated. If the - /// caller can tolerate an allocation failing, it should set mode=SOFT so that - /// other callers that may not tolerate allocation failures have a better chance - /// of success. Returns true if the consumption was successfully updated. + // Increases consumption of this tracker and its ancestors by 'bytes' only if + // they can all consume 'bytes' without exceeding limit. If limit would be exceed, + // no MemTrackers are updated. Returns true if the consumption was successfully updated. WARN_UNUSED_RESULT - Status TryConsume(int64_t bytes, MemLimit mode = MemLimit::HARD) { - // DCHECK_GE(bytes, 0); + Status try_consume(int64_t bytes) { if (bytes <= 0) { - Release(-bytes); + release(-bytes); return Status::OK(); } - if (MemInfo::current_mem() + bytes >= MemInfo::mem_limit()) { - return Status::MemoryLimitExceeded(fmt::format( - "{}: TryConsume failed, bytes={} process whole consumption={} mem limit={}", - label_, bytes, MemInfo::current_mem(), MemInfo::mem_limit())); - } - // if (UNLIKELY(bytes == 0)) return true; - // if (UNLIKELY(bytes < 0)) return false; // needed in RELEASE, hits DCHECK in DEBUG - if (UNLIKELY(consumption_metric_ != nullptr)) RefreshConsumptionFromMetric(); + RETURN_IF_ERROR(check_sys_mem_info(bytes)); int i; // Walk the tracker tree top-down. - for (i = all_trackers_.size() - 1; i >= 0; --i) { - MemTracker* tracker = all_trackers_[i]; - const int64_t limit = tracker->GetLimit(mode); - if (limit < 0) { - tracker->consumption_->add(bytes); // No limit at this tracker. + for (i = _all_trackers.size() - 1; i >= 0; --i) { + MemTracker* tracker = _all_trackers[i]; + if (tracker->limit() < 0) { + tracker->_consumption->add(bytes); // No limit at this tracker. } else { // If TryConsume fails, we can try to GC, but we may need to try several times if // there are concurrent consumers because we don't take a lock before trying to - // update consumption_. + // update _consumption. while (true) { - if (LIKELY(tracker->consumption_->try_add(bytes, limit))) break; - - if (UNLIKELY(tracker->GcMemory(limit - bytes))) { - DCHECK_GE(i, 0); + if (LIKELY(tracker->_consumption->try_add(bytes, tracker->limit()))) break; + Status st = tracker->try_gc_memory(bytes); + if (!st) { // Failed for this mem tracker. Roll back the ones that succeeded. - for (int j = all_trackers_.size() - 1; j > i; --j) { - all_trackers_[j]->consumption_->add(-bytes); + for (int j = _all_trackers.size() - 1; j > i; --j) { + _all_trackers[j]->_consumption->add(-bytes); } - return Status::MemoryLimitExceeded(fmt::format( - "{}: TryConsume failed, bytes={} consumption={} imit={} " - "attempting to GC", - tracker->label(), bytes, tracker->consumption_->current_value(), - limit)); + return st; } - VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes - << " consumption=" << tracker->consumption_->current_value() - << " limit=" << limit; } } } @@ -218,69 +158,108 @@ class MemTracker : public std::enable_shared_from_this { return Status::OK(); } - /// Decreases consumption of this tracker and its ancestors by 'bytes'. - void Release(int64_t bytes) { - // DCHECK_GE(bytes, 0); + // Decreases consumption of this tracker and its ancestors by 'bytes'. + // up to (but not including) end_tracker. + void release(int64_t bytes, MemTracker* end_tracker = nullptr) { if (bytes < 0) { - Consume(-bytes); + consume(-bytes, end_tracker); return; } - if (bytes == 0) { return; } + for (auto& tracker : _all_trackers) { + if (tracker == end_tracker) return; + tracker->_consumption->add(-bytes); + } + } - // if (UNLIKELY(bytes <= 0)) return; // < 0 needed in RELEASE, hits DCHECK in DEBUG + static void batch_consume(int64_t bytes, + const std::vector>& trackers) { + for (auto& tracker : trackers) { + tracker->consume(bytes); + } + } - if (UNLIKELY(consumption_metric_ != nullptr)) { - RefreshConsumptionFromMetric(); - return; + // When the accumulated untracked memory value exceeds the upper limit, + // the current value is returned and set to 0. + // Thread safety. + int64_t add_untracked_mem(int64_t bytes) { + _untracked_mem += bytes; + if (std::abs(_untracked_mem) >= config::mem_tracker_consume_min_size_bytes) { + return _untracked_mem.exchange(0); } - for (auto& tracker : all_trackers_) { - tracker->consumption_->add(-bytes); - /// If a UDF calls FunctionContext::TrackAllocation() but allocates less than the - /// reported amount, the subsequent call to FunctionContext::Free() may cause the - /// process mem tracker to go negative until it is synced back to the tcmalloc - /// metric. Don't blow up in this case. (Note that this doesn't affect non-process - /// trackers since we can enforce that the reported memory usage is internally - /// consistent.) - if (LIKELY(tracker->consumption_metric_ == nullptr)) { - DCHECK_GE(tracker->consumption_->current_value(), 0) - << std::endl - << tracker->LogUsage(UNLIMITED_DEPTH); - } + return 0; + } + + // In most cases, no need to call flush_untracked_mem on the child tracker, + // because when it is destructed, theoretically all its children have been destructed. + void flush_untracked_mem() { + consume(_untracked_mem.exchange(0)); + for (const auto& tracker_weak : _child_trackers) { + std::shared_ptr tracker = tracker_weak.lock(); + if (tracker) tracker->flush_untracked_mem(); + } + } + + void release_cache(int64_t bytes) { + int64_t consume_bytes = add_untracked_mem(-bytes); + if (consume_bytes != 0) { + release(-consume_bytes); + } + } + + void consume_cache(int64_t bytes) { + int64_t consume_bytes = add_untracked_mem(bytes); + if (consume_bytes != 0) { + consume(consume_bytes); } } - /// Transfer 'bytes' of consumption from this tracker to 'dst', updating - /// all ancestors up to the first shared ancestor. Must not be used if - /// 'dst' has a limit, or an ancestor with a limit, that is not a common - /// ancestor with the tracker, because this does not check memory limits. - void TransferTo(MemTracker* dst, int64_t bytes); - - /// Returns true if a valid limit of this tracker or one of its ancestors is - /// exceeded. - bool AnyLimitExceeded(MemLimit mode) { - for (const auto& tracker : limit_trackers_) { - if (tracker->LimitExceeded(mode)) { - return true; + WARN_UNUSED_RESULT + Status try_consume_cache(int64_t bytes) { + if (bytes <= 0) { + release_cache(-bytes); + return Status::OK(); + } + int64_t consume_bytes = add_untracked_mem(bytes); + if (consume_bytes != 0) { + Status st = try_consume(consume_bytes); + if (!st) { + _untracked_mem += consume_bytes; + return st; } } - return false; + return Status::OK(); } - /// If this tracker has a limit, checks the limit and attempts to free up some memory if - /// the hard limit is exceeded by calling any added GC functions. Returns true if the - /// limit is exceeded after calling the GC functions. Returns false if there is no limit - /// or consumption is under the limit. - bool LimitExceeded(MemLimit mode) { - if (UNLIKELY(CheckLimitExceeded(mode))) return LimitExceededSlow(mode); - return false; + // Transfer 'bytes' of consumption from this tracker to 'dst'. + // updating all ancestors up to the first shared ancestor. Must not be used if + // 'dst' has a limit, or an ancestor with a limit, that is not a common + // ancestor with the tracker, because this does not check memory limits. + void transfer_to_relative(MemTracker* dst, int64_t bytes); + + WARN_UNUSED_RESULT + Status try_transfer_to(MemTracker* dst, int64_t bytes) { + // Must release first, then consume + release_cache(bytes); + Status st = dst->try_consume_cache(bytes); + if (!st) { + consume_cache(bytes); + return st; + } + return Status::OK(); } - // Return limit exceeded tracker or null - MemTracker* find_limit_exceeded_tracker() { - for (const auto& tracker : limit_trackers_) { + // Forced transfer, 'dst' may limit exceed, and more ancestor trackers will be updated. + void transfer_to(MemTracker* dst, int64_t bytes) { + release_cache(bytes); + dst->consume_cache(bytes); + } + + // Returns true if a valid limit of this tracker or one of its ancestors is exceeded. + MemTracker* limit_exceeded_tracker() const { + for (const auto& tracker : _limit_trackers) { if (tracker->limit_exceeded()) { return tracker; } @@ -288,66 +267,68 @@ class MemTracker : public std::enable_shared_from_this { return nullptr; } - /// Returns the maximum consumption that can be made without exceeding the limit on - /// this tracker or any of its parents. Returns int64_t::max() if there are no - /// limits and a negative value if any limit is already exceeded. - int64_t SpareCapacity(MemLimit mode) const; - - /// Refresh the memory consumption value from the consumption metric. Only valid to - /// call if this tracker has a consumption metric. - void RefreshConsumptionFromMetric(); - - // TODO(yingchun): following functions are old style which have no MemLimit parameter - bool limit_exceeded() const { return limit_ >= 0 && limit_ < consumption(); } + bool any_limit_exceeded() const { return limit_exceeded_tracker() != nullptr; } - int64_t limit() const { return limit_; } - bool has_limit() const { return limit_ >= 0; } + // Returns the maximum consumption that can be made without exceeding the limit on + // this tracker or any of its parents. Returns int64_t::max() if there are no + // limits and a negative value if any limit is already exceeded. + int64_t spare_capacity() const { + int64_t result = std::numeric_limits::max(); + for (const auto& tracker : _limit_trackers) { + int64_t mem_left = tracker->limit() - tracker->consumption(); + result = std::min(result, mem_left); + } + return result; + } - int64_t soft_limit() const { return soft_limit_; } - int64_t GetLimit(MemLimit mode) const { - if (mode == MemLimit::SOFT) return soft_limit(); - DCHECK_ENUM_EQ(mode, MemLimit::HARD); - return limit(); + // Returns the lowest limit for this tracker and its ancestors. Returns -1 if there is no limit. + int64_t get_lowest_limit() const { + if (_limit_trackers.empty()) return -1; + int64_t min_limit = std::numeric_limits::max(); + for (const auto& tracker : _limit_trackers) { + DCHECK(tracker->has_limit()); + min_limit = std::min(min_limit, tracker->limit()); + } + return min_limit; } - const std::string& label() const { return label_; } - /// Returns the lowest limit for this tracker and its ancestors. Returns - /// -1 if there is no limit. - int64_t GetLowestLimit(MemLimit mode) const; + bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); } + int64_t limit() const { return _limit; } + void set_limit(int64_t limit) { _limit = limit; } + bool has_limit() const { return _limit >= 0; } - /// Returns the memory 'reserved' by this resource pool mem tracker, which is the sum - /// of the memory reserved by the queries in it (i.e. its child trackers). The mem - /// reserved for a query that is currently executing is its limit_, if set (which - /// should be the common case with admission control). Otherwise, if the query has - /// no limit or the query is finished executing, the current consumption is used. - int64_t GetPoolMemReserved(); + Status check_limit(int64_t bytes) { + if (bytes <= 0) return Status::OK(); + RETURN_IF_ERROR(check_sys_mem_info(bytes)); + int i; + // Walk the tracker tree top-down. + for (i = _all_trackers.size() - 1; i >= 0; --i) { + MemTracker* tracker = _all_trackers[i]; + if (tracker->limit() > 0) { + while (true) { + if (LIKELY(tracker->_consumption->current_value() + bytes < tracker->limit())) + break; + RETURN_IF_ERROR(tracker->try_gc_memory(bytes)); + } + } + } + return Status::OK(); + } - /// Returns the memory consumed in bytes. - int64_t consumption() const { return consumption_->current_value(); } + const std::string& label() const { return _label; } - /// Note that if consumption_ is based on consumption_metric_, this will the max value - /// we've recorded in consumption(), not necessarily the highest value - /// consumption_metric_ has ever reached. - int64_t peak_consumption() const { return consumption_->value(); } + // Returns the memory consumed in bytes. + int64_t consumption() const { return _consumption->current_value(); } + int64_t peak_consumption() const { return _consumption->value(); } - std::shared_ptr parent() const { return parent_; } + std::shared_ptr parent() const { return _parent; } - /// Signature for function that can be called to free some memory after limit is - /// reached. The function should try to free at least 'bytes_to_free' bytes of - /// memory. See the class header for further details on the expected behaviour of - /// these functions. typedef std::function GcFunction; - /// Add a function 'f' to be called if the limit is reached, if none of the other /// previously-added GC functions were successful at freeing up enough memory. /// 'f' does not need to be thread-safe as long as it is added to only one MemTracker. /// Note that 'f' must be valid for the lifetime of this MemTracker. - void AddGcFunction(GcFunction f); - - /// Register this MemTracker's metrics. Each key will be of the form - /// ".". - // TODO(yingchun): remove comments - //void RegisterMetrics(MetricGroup* metrics, const std::string& prefix); + void add_gc_function(GcFunction f) { _gc_functions.push_back(f); } /// Logs the usage of this tracker and optionally its children (recursively). /// If 'logged_consumption' is non-nullptr, sets the consumption value logged. @@ -355,251 +336,148 @@ class MemTracker : public std::enable_shared_from_this { /// to include in the dump. If it is zero, then no children are dumped. /// Limiting the recursive depth reduces the cost of dumping, particularly /// for the process MemTracker. - /// TODO: once all memory is accounted in ReservationTracker hierarchy, move - /// reporting there. - std::string LogUsage(int max_recursive_depth, const std::string& prefix = "", - int64_t* logged_consumption = nullptr); - /// Dumping the process MemTracker is expensive. Limiting the recursive depth - /// to two levels limits the level of detail to a one-line summary for each query - /// MemTracker, avoiding all MemTrackers below that level. This provides a summary - /// of process usage with substantially lower cost than the full dump. - static const int PROCESS_MEMTRACKER_LIMITED_DEPTH = 2; - /// Unlimited dumping is useful for query memtrackers or error conditions that - /// are not performance sensitive - static const int UNLIMITED_DEPTH = INT_MAX; - - /// Logs the usage of 'limit' number of queries based on maximum total memory - /// consumption. - std::string LogTopNQueries(int limit); + std::string log_usage(int max_recursive_depth = INT_MAX, int64_t* logged_consumption = nullptr); /// Log the memory usage when memory limit is exceeded and return a status object with /// details of the allocation which caused the limit to be exceeded. /// If 'failed_allocation_size' is greater than zero, logs the allocation size. If /// 'failed_allocation_size' is zero, nothing about the allocation size is logged. /// If 'state' is non-nullptr, logs the error to 'state'. - Status MemLimitExceeded(RuntimeState* state, const std::string& details, - int64_t failed_allocation = 0) WARN_UNUSED_RESULT { - return MemLimitExceeded(this, state, details, failed_allocation); + Status mem_limit_exceeded(RuntimeState* state, const std::string& details = std::string(), + int64_t failed_allocation = -1, + Status failed_alloc = Status::OK()) WARN_UNUSED_RESULT; + + // Usually, a negative values means that the statistics are not accurate, + // 1. The released memory is not consumed. + // 2. The same block of memory, tracker A calls consume, and tracker B calls release. + // 3. Repeated releases of MemTacker. When the consume is called on the child MemTracker, + // after the release is called on the parent MemTracker, + // the child ~MemTracker will cause repeated releases. + static void memory_leak_check(MemTracker* tracker) { + tracker->flush_untracked_mem(); + DCHECK_EQ(tracker->_consumption->current_value(), 0) << std::endl << tracker->log_usage(); } - /// Makes MemLimitExceeded callable for nullptr MemTrackers. - static Status MemLimitExceeded(MemTracker* mtracker, RuntimeState* state, - const std::string& details, - int64_t failed_allocation = 0) WARN_UNUSED_RESULT; - - static void update_limits(int64_t bytes, - const std::vector>& trackers) { - for (auto& tracker : trackers) { - tracker->Consume(bytes); + // If an ancestor of this tracker is a Task MemTracker, return that tracker. Otherwise return nullptr. + MemTracker* parent_task_mem_tracker() { + MemTracker* tracker = this; + while (tracker != nullptr && tracker->_level != MemTrackerLevel::TASK) { + tracker = tracker->_parent.get(); } + return tracker; } - static bool limit_exceeded(const std::vector>& trackers) { - for (const auto& tracker : trackers) { - if (tracker->limit_exceeded()) { - // TODO: remove logging - LOG(WARNING) << "exceeded limit: limit=" << tracker->limit() - << " consumption=" << tracker->consumption(); - return true; - } + bool has_virtual_ancestor() { + MemTracker* tracker = this; + while (tracker != nullptr && tracker->_virtual == false) { + tracker = tracker->_parent.get(); } - - return false; + return tracker == nullptr ? false : true; } + std::string id() { return _id; } + std::string debug_string() { std::stringstream msg; - msg << "limit: " << limit_ << "; " - << "consumption: " << consumption_->current_value() << "; " - << "label: " << label_ << "; " - << "all tracker size: " << all_trackers_.size() << "; " - << "limit trackers size: " << limit_trackers_.size() << "; " - << "parent is null: " << ((parent_ == nullptr) ? "true" : "false") << "; "; + msg << "limit: " << _limit << "; " + << "consumption: " << _consumption->current_value() << "; " + << "label: " << _label << "; " + << "all tracker size: " << _all_trackers.size() << "; " + << "limit trackers size: " << _limit_trackers.size() << "; " + << "parent is null: " << ((_parent == nullptr) ? "true" : "false") << "; "; return msg.str(); } - bool is_consumption_metric_null() const { return consumption_metric_ == nullptr; } - static const std::string COUNTER_NAME; private: /// 'byte_limit' < 0 means no limit - /// 'label' is the label used in the usage string (LogUsage()) - /// If 'log_usage_if_zero' is false, this tracker (and its children) will not be - /// included in LogUsage() output if consumption is 0. - MemTracker(RuntimeProfile* profile, int64_t byte_limit, const std::string& label, - const std::shared_ptr& parent, bool log_usage_if_zero, MemTrackerLevel); + /// 'label' is the label used in the usage string (log_usage()) + MemTracker(int64_t byte_limit, const std::string& label, + const std::shared_ptr& parent, MemTrackerLevel, RuntimeProfile* profile); private: - friend class PoolMemTrackerRegistry; - - // TODO(HW): remove later - /// Closes this MemTracker. After closing it is invalid to consume memory on this - /// tracker and the tracker's consumption counter (which may be owned by a - /// RuntimeProfile, not this MemTracker) can be safely destroyed. MemTrackers without - /// consumption metrics in the context of a daemon must always be closed. - /// Idempotent: calling multiple times has no effect. - void Close(); - - /// Returns true if the current memory tracker's limit is exceeded. - bool CheckLimitExceeded(MemLimit mode) const { - int64_t limit = GetLimit(mode); - return limit >= 0 && limit < consumption(); + // If consumption is higher than max_consumption, attempts to free memory by calling + // any added GC functions. Returns true if max_consumption is still exceeded. Takes gc_lock. + // Note: If the cache of segment/chunk is released due to insufficient query memory at a certain moment, + // the performance of subsequent queries may be degraded, so the use of gc function should be careful enough. + bool gc_memory(int64_t max_consumption); + + inline Status try_gc_memory(int64_t bytes) { + if (UNLIKELY(gc_memory(_limit - bytes))) { + return Status::MemoryLimitExceeded( + fmt::format("label={} TryConsume failed size={}, used={}, limit={}", label(), + bytes, _consumption->current_value(), _limit)); + } + VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes + << " consumption=" << _consumption->current_value() << " limit=" << _limit; + return Status::OK(); } - /// Slow path for LimitExceeded(). - bool LimitExceededSlow(MemLimit mode); + // Walks the MemTracker hierarchy and populates _all_trackers and + // limit_trackers_ + void init(); + void init_virtual(); - /// If consumption is higher than max_consumption, attempts to free memory by calling - /// any added GC functions. Returns true if max_consumption is still exceeded. Takes - /// gc_lock. Updates metrics if initialized. - bool GcMemory(int64_t max_consumption); - - /// Walks the MemTracker hierarchy and populates all_trackers_ and - /// limit_trackers_ - void Init(); - - /// Adds tracker to child_trackers_ - void AddChildTracker(const std::shared_ptr& tracker); + // Adds tracker to _child_trackers + void add_child_tracker(const std::shared_ptr& tracker) { + std::lock_guard l(_child_trackers_lock); + tracker->_child_tracker_it = _child_trackers.insert(_child_trackers.end(), tracker); + } /// Log consumption of all the trackers provided. Returns the sum of consumption in /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels /// of children to include in the dump. If it is zero, then no children are dumped. - static std::string LogUsage(int max_recursive_depth, const std::string& prefix, - const std::list>& trackers, - int64_t* logged_consumption); - - /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy - /// and populates 'min_pq' with 'limit' number of elements (that contain state related - /// to query MemTrackers) based on maximum total memory consumption. - void GetTopNQueries(std::priority_queue, - std::vector>, - std::greater>>& min_pq, - int limit); - - /// If an ancestor of this tracker is a query MemTracker, return that tracker. - /// Otherwise return nullptr. - MemTracker* GetQueryMemTracker(); - - /// Increases/Decreases the consumption of this tracker and the ancestors up to (but - /// not including) end_tracker. - void ChangeConsumption(int64_t bytes, MemTracker* end_tracker) { - DCHECK(consumption_metric_ == nullptr) << "Should not be called on root."; - for (MemTracker* tracker : all_trackers_) { - if (tracker == end_tracker) return; - DCHECK(!tracker->has_limit()) << tracker->label() << " have limit:" << tracker->limit(); - tracker->consumption_->add(bytes); - } - DCHECK(false) << "end_tracker is not an ancestor"; - } + static std::string log_usage(int max_recursive_depth, + const std::list>& trackers, + int64_t* logged_consumption); - // Creates the root tracker. - static void CreateRootTracker(); + // Creates the process tracker. + static void create_process_tracker(); - /// Lock to protect GcMemory(). This prevents many GCs from occurring at once. - std::mutex gc_lock_; + // Limit on memory consumption, in bytes. If limit_ == -1, there is no consumption limit. + int64_t _limit; - /// Only used if 'is_query_mem_tracker_' is true. - /// 0 if the query is still executing or 1 if it has finished executing. Before - /// it has finished executing, the tracker limit is treated as "reserved memory" - /// for the purpose of admission control - see GetPoolMemReserved(). - std::atomic query_exec_finished_ {0}; + std::string _label; - /// Only valid for MemTrackers returned from GetRequestPoolMemTracker() - std::string pool_name_; + std::string _id; - /// Hard limit on memory consumption, in bytes. May not be exceeded. If limit_ == -1, - /// there is no consumption limit. - const int64_t limit_; + std::shared_ptr _parent; // The parent of this tracker. - /// Soft limit on memory consumption, in bytes. Can be exceeded but callers to - /// TryConsume() can opt not to exceed this limit. If -1, there is no consumption limit. - const int64_t soft_limit_; - - std::string label_; - - /// The parent of this tracker. The pointer is never modified, even after this tracker - /// is unregistered. - std::shared_ptr parent_; + MemTrackerLevel _level; - /// in bytes - std::shared_ptr consumption_; + bool _virtual = false; - /// If non-nullptr, used to measure consumption (in bytes) rather than the values provided - /// to Consume()/Release(). Only used for the process tracker, thus parent_ should be - /// nullptr if consumption_metric_ is set. - IntGauge* consumption_metric_; + std::shared_ptr _consumption; // in bytes - /// If non-nullptr, counters from a corresponding ReservationTracker that should be - /// reported in logs and other diagnostics. Owned by this MemTracker. The counters - /// are owned by the fragment's RuntimeProfile. - AtomicPtr reservation_counters_; + // Consume size smaller than mem_tracker_consume_min_size_bytes will continue to accumulate + // to avoid frequent calls to consume/release of MemTracker. + // TODO(zxy) It may be more performant to use thread_local static, which is inherently thread-safe. + // Test after introducing TCMalloc hook + std::atomic _untracked_mem = 0; - std::vector all_trackers_; // this tracker plus all of its ancestors - std::vector limit_trackers_; // all_trackers_ with valid limits + std::vector _all_trackers; // this tracker plus all of its ancestors + std::vector _limit_trackers; // _all_trackers with valid limits // All the child trackers of this tracker. Used for error reporting and // listing only (i.e. updating the consumption of a parent tracker does not // update that of its children). - SpinLock child_trackers_lock_; - std::list> child_trackers_; - - /// Iterator into parent_->child_trackers_ for this object. Stored to have O(1) - /// remove. - std::list>::iterator child_tracker_it_; - - /// Functions to call after the limit is reached to free memory. - std::vector gc_functions_; - - /// If false, this tracker (and its children) will not be included in LogUsage() output - /// if consumption is 0. - bool log_usage_if_zero_; - - MemTrackerLevel _level; - - /// The number of times the GcFunctions were called. - IntCounter* num_gcs_metric_; - - /// The number of bytes freed by the last round of calling the GcFunctions (-1 before any - /// GCs are performed). - IntGauge* bytes_freed_by_last_gc_metric_; - - /// The number of bytes over the limit we were the last time LimitExceeded() was called - /// and the limit was exceeded pre-GC. -1 if there is no limit or the limit was never - /// exceeded. - IntGauge* bytes_over_limit_metric_; - - /// Metric for limit_. - IntGauge* limit_metric_; + SpinLock _child_trackers_lock; + std::list> _child_trackers; + // Iterator into parent_->child_trackers_ for this object. Stored to have O(1) remove. + std::list>::iterator _child_tracker_it; + + // Lock to protect gc_memory(). This prevents many GCs from occurring at once. + std::mutex _gc_lock; + // Functions to call after the limit is reached to free memory. + std::vector _gc_functions; }; -/// Global registry for query and pool MemTrackers. Owned by ExecEnv. -class PoolMemTrackerRegistry { -public: - /// Returns a MemTracker object for request pool 'pool_name'. Calling this with the same - /// 'pool_name' will return the same MemTracker object. This is used to track the local - /// memory usage of all requests executing in this pool. If 'create_if_not_present' is - /// true, the first time this is called for a pool, a new MemTracker object is created - /// with the process tracker as its parent. There is no explicit per-pool byte_limit - /// set at any particular impalad, so newly created trackers will always have a limit - /// of -1. - /// TODO(cmy): this function is not used for now. the memtracker returned from here is - /// got from a shared_ptr in `pool_to_mem_trackers_`. - /// This funtion is from - /// https://github.com/cloudera/Impala/blob/495397101e5807c701df71ea288f4815d69c2c8a/be/src/runtime/mem-tracker.h#L497 - /// And in impala this function will return a raw pointer. - std::shared_ptr GetRequestPoolMemTracker(const std::string& pool_name, - bool create_if_not_present); - -private: - /// All per-request pool MemTracker objects. It is assumed that request pools will live - /// for the entire duration of the process lifetime so MemTrackers are never removed - /// from this map. Protected by '_pool_to_mem_trackers_lock' - typedef std::unordered_map> PoolTrackersMap; - PoolTrackersMap pool_to_mem_trackers_; - /// IMPALA-3068: Use SpinLock instead of std::mutex so that the lock won't - /// automatically destroy itself as part of process teardown, which could cause races. - SpinLock pool_to_mem_trackers_lock_; -}; +#define RETURN_LIMIT_EXCEEDED(tracker, ...) return tracker->mem_limit_exceeded(__VA_ARGS__); +#define RETURN_IF_LIMIT_EXCEEDED(tracker, state, msg) \ + if (tracker->any_limit_exceeded()) RETURN_LIMIT_EXCEEDED(tracker, state, msg); +#define RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, msg) \ + if (state->instance_mem_tracker()->any_limit_exceeded()) \ + RETURN_LIMIT_EXCEEDED(state->instance_mem_tracker(), state, msg); } // namespace doris diff --git a/be/src/runtime/mem_tracker_task_pool.cpp b/be/src/runtime/mem_tracker_task_pool.cpp new file mode 100644 index 00000000000000..d6d23f41dba289 --- /dev/null +++ b/be/src/runtime/mem_tracker_task_pool.cpp @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/mem_tracker_task_pool.h" + +#include "common/config.h" +#include "runtime/exec_env.h" +#include "util/pretty_printer.h" + +namespace doris { + +std::shared_ptr MemTrackerTaskPool::register_task_mem_tracker_impl( + const std::string& task_id, int64_t mem_limit, const std::string& label, + std::shared_ptr parent) { + DCHECK(!task_id.empty()); + // First time this task_id registered, make a new object, otherwise do nothing. + // Combine create_tracker and emplace into one operation to avoid the use of locks + // Name for task MemTrackers. '$0' is replaced with the task id. + _task_mem_trackers.try_emplace_l( + task_id, [](std::shared_ptr) {}, + MemTracker::create_tracker(mem_limit, label, parent, MemTrackerLevel::TASK)); + std::shared_ptr tracker = get_task_mem_tracker(task_id); + return tracker; +} + +std::shared_ptr MemTrackerTaskPool::register_query_mem_tracker( + const std::string& query_id, int64_t mem_limit) { + VLOG_FILE << "Register Query memory tracker, query id: " << query_id + << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES); + return register_task_mem_tracker_impl(query_id, mem_limit, fmt::format("queryId={}", query_id), + ExecEnv::GetInstance()->query_pool_mem_tracker()); +} + +std::shared_ptr MemTrackerTaskPool::register_load_mem_tracker( + const std::string& load_id, int64_t mem_limit) { + VLOG_FILE << "Register Load memory tracker, load id: " << load_id + << " limit: " << PrettyPrinter::print(mem_limit, TUnit::BYTES); + return register_task_mem_tracker_impl(load_id, mem_limit, fmt::format("loadId={}", load_id), + ExecEnv::GetInstance()->load_pool_mem_tracker()); +} + +std::shared_ptr MemTrackerTaskPool::get_task_mem_tracker(const std::string& task_id) { + DCHECK(!task_id.empty()); + std::shared_ptr tracker = nullptr; + // Avoid using locks to resolve erase conflicts + _task_mem_trackers.if_contains(task_id, + [&tracker](std::shared_ptr v) { tracker = v; }); + return tracker; +} + +void MemTrackerTaskPool::logout_task_mem_tracker() { + std::vector expired_tasks; + for (auto it = _task_mem_trackers.begin(); it != _task_mem_trackers.end(); it++) { + // No RuntimeState uses this task MemTracker, it is only referenced by this map, delete it + if (it->second.use_count() == 1) { + if (config::memory_leak_detection && it->second->consumption() == 0) { + // If consumption is not equal to 0 before query mem tracker is destructed, + // there are two possibilities in theory. + // 1. A memory leak occurs. + // 2. Some of the memory consumed/released on the query mem tracker is actually released/consume on + // other trackers such as the process mem tracker, and there is no manual transfer between the two trackers. + // + // The second case should be eliminated in theory, but it has not been done so far, so the query memory leak + // cannot be located, and the value of the query pool mem tracker statistics will be inaccurate. + // + // In order to ensure that the query pool mem tracker is the sum of all currently running query mem trackers, + // the effect of the ended query mem tracker on the query pool mem tracker should be cleared, that is, + // the negative number of the current value of consume. + LOG(WARNING) << "Task memory tracker memory leak:" << it->second->debug_string(); + } + it->second->parent()->consume(-it->second->consumption(), + MemTracker::get_process_tracker().get()); + expired_tasks.emplace_back(it->first); + } + } + for (auto tid : expired_tasks) { + DCHECK(_task_mem_trackers[tid].use_count() == 1); + _task_mem_trackers.erase(tid); + VLOG_FILE << "Deregister task memory tracker, task id: " << tid; + } +} + +// TODO(zxy) More observable methods +// /// Logs the usage of 'limit' number of queries based on maximum total memory +// /// consumption. +// std::string MemTracker::LogTopNQueries(int limit) { +// if (limit == 0) return ""; +// priority_queue, std::vector>, +// std::greater>> +// min_pq; +// GetTopNQueries(min_pq, limit); +// std::vector usage_strings(min_pq.size()); +// while (!min_pq.empty()) { +// usage_strings.push_back(min_pq.top().second); +// min_pq.pop(); +// } +// std::reverse(usage_strings.begin(), usage_strings.end()); +// return join(usage_strings, "\n"); +// } + +// /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy +// /// and populates 'min_pq' with 'limit' number of elements (that contain state related +// /// to query MemTrackers) based on maximum total memory consumption. +// void MemTracker::GetTopNQueries( +// priority_queue, std::vector>, +// greater>>& min_pq, +// int limit) { +// list> children; +// { +// lock_guard l(child_trackers_lock_); +// children = child_trackers_; +// } +// for (const auto& child_weak : children) { +// shared_ptr child = child_weak.lock(); +// if (child) { +// child->GetTopNQueries(min_pq, limit); +// } +// } +// } + +} // namespace doris diff --git a/be/src/runtime/mem_tracker_task_pool.h b/be/src/runtime/mem_tracker_task_pool.h new file mode 100644 index 00000000000000..20b0eaf7be1fda --- /dev/null +++ b/be/src/runtime/mem_tracker_task_pool.h @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "runtime/mem_tracker.h" + +namespace doris { + +// Global task pool for query MemTrackers. Owned by ExecEnv. +class MemTrackerTaskPool { +public: + // Construct a MemTracker object for 'task_id' with 'mem_limit' as the memory limit. + // The MemTracker is a child of the pool MemTracker, Calling this with the same + // 'task_id' will return the same MemTracker object. This is used to track the local + // memory usage of all tasks executing. The first time this is called for a task, + // a new MemTracker object is created with the pool tracker as its parent. + // Newly created trackers will always have a limit of -1. + std::shared_ptr register_task_mem_tracker_impl(const std::string& task_id, + int64_t mem_limit, + const std::string& label, + std::shared_ptr parent); + std::shared_ptr register_query_mem_tracker(const std::string& query_id, + int64_t mem_limit); + std::shared_ptr register_load_mem_tracker(const std::string& load_id, + int64_t mem_limit); + + std::shared_ptr get_task_mem_tracker(const std::string& task_id); + + // Remove the mem tracker that has ended the query. + void logout_task_mem_tracker(); + +private: + // All per-task MemTracker objects. + // The life cycle of task memtracker in the process is the same as task runtime state, + // MemTrackers will be removed from this map after query finish or cancel. + using TaskTrackersMap = phmap::parallel_flat_hash_map< + std::string, std::shared_ptr, phmap::priv::hash_default_hash, + phmap::priv::hash_default_eq, + std::allocator>>, 12, + std::mutex>; + + TaskTrackersMap _task_mem_trackers; +}; + +} // namespace doris \ No newline at end of file diff --git a/be/src/runtime/memory/chunk_allocator.cpp b/be/src/runtime/memory/chunk_allocator.cpp index cbc2462953c882..937f9b367f1e71 100644 --- a/be/src/runtime/memory/chunk_allocator.cpp +++ b/be/src/runtime/memory/chunk_allocator.cpp @@ -22,6 +22,7 @@ #include #include "gutil/dynamic_annotations.h" +#include "runtime/mem_tracker.h" #include "runtime/memory/chunk.h" #include "runtime/memory/system_allocator.h" #include "util/bit_util.h" @@ -114,6 +115,8 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) : _reserve_bytes_limit(reserve_limit), _reserved_bytes(0), _arenas(CpuInfo::get_max_num_cores()) { + _mem_tracker = + MemTracker::create_tracker(-1, "ChunkAllocator", nullptr, MemTrackerLevel::OVERVIEW); for (int i = 0; i < _arenas.size(); ++i) { _arenas[i].reset(new ChunkArena()); } @@ -128,8 +131,16 @@ ChunkAllocator::ChunkAllocator(size_t reserve_limit) INT_COUNTER_METRIC_REGISTER(_chunk_allocator_metric_entity, chunk_pool_system_free_cost_ns); } -bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { +Status ChunkAllocator::allocate(size_t size, Chunk* chunk, MemTracker* tracker, bool check_limits) { // fast path: allocate from current core arena + if (tracker) { + if (check_limits) { + RETURN_IF_ERROR(tracker->try_consume_cache(size)); + } else { + tracker->consume_cache(size); + } + } + int core_id = CpuInfo::get_current_core(); chunk->size = size; chunk->core_id = core_id; @@ -138,7 +149,9 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { DCHECK_GE(_reserved_bytes, 0); _reserved_bytes.fetch_sub(size); chunk_pool_local_core_alloc_count->increment(1); - return true; + // This means the chunk's memory ownership is transferred from ChunkAllocator to MemPool. + if (tracker) _mem_tracker->release_cache(size); + return Status::OK(); } if (_reserved_bytes > size) { // try to allocate from other core's arena @@ -150,7 +163,9 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { chunk_pool_other_core_alloc_count->increment(1); // reset chunk's core_id to other chunk->core_id = core_id % _arenas.size(); - return true; + // This means the chunk's memory ownership is transferred from ChunkAllocator to MemPool. + if (tracker) _mem_tracker->release_cache(size); + return Status::OK(); } } } @@ -164,15 +179,18 @@ bool ChunkAllocator::allocate(size_t size, Chunk* chunk) { chunk_pool_system_alloc_count->increment(1); chunk_pool_system_alloc_cost_ns->increment(cost_ns); if (chunk->data == nullptr) { - return false; + if (tracker) tracker->release_cache(size); + return Status::MemoryAllocFailed( + fmt::format("ChunkAllocator failed to allocate chunk {} bytes", size)); } - return true; + return Status::OK(); } -void ChunkAllocator::free(const Chunk& chunk) { +void ChunkAllocator::free(const Chunk& chunk, MemTracker* tracker) { if (chunk.core_id == -1) { return; } + if (tracker) tracker->transfer_to(_mem_tracker.get(), chunk.size); int64_t old_reserved_bytes = _reserved_bytes; int64_t new_reserved_bytes = 0; do { @@ -193,8 +211,9 @@ void ChunkAllocator::free(const Chunk& chunk) { _arenas[chunk.core_id]->push_free_chunk(chunk.data, chunk.size); } -bool ChunkAllocator::allocate_align(size_t size, Chunk* chunk) { - return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk); +Status ChunkAllocator::allocate_align(size_t size, Chunk* chunk, MemTracker* tracker, + bool check_limits) { + return allocate(BitUtil::RoundUpToPowerOfTwo(size), chunk, tracker, check_limits); } } // namespace doris diff --git a/be/src/runtime/memory/chunk_allocator.h b/be/src/runtime/memory/chunk_allocator.h index cfdcb0bb1f46d4..6f3b80e4adafcb 100644 --- a/be/src/runtime/memory/chunk_allocator.h +++ b/be/src/runtime/memory/chunk_allocator.h @@ -28,6 +28,8 @@ namespace doris { struct Chunk; class ChunkArena; class MetricEntity; +class MemTracker; +class Status; // Used to allocate memory with power-of-two length. // This Allocator allocate memory from system and cache free chunks for @@ -63,12 +65,14 @@ class ChunkAllocator { // Allocate a Chunk with a power-of-two length "size". // Return true if success and allocated chunk is saved in "chunk". // Otherwise return false. - bool allocate(size_t size, Chunk* chunk); + Status allocate(size_t size, Chunk* chunk, MemTracker* tracker = nullptr, + bool check_limits = false); - bool allocate_align(size_t size, Chunk* chunk); + Status allocate_align(size_t size, Chunk* chunk, MemTracker* tracker = nullptr, + bool check_limits = false); // Free chunk allocated from this allocator - void free(const Chunk& chunk); + void free(const Chunk& chunk, MemTracker* tracker = nullptr); private: static ChunkAllocator* _s_instance; @@ -79,6 +83,8 @@ class ChunkAllocator { std::vector> _arenas; std::shared_ptr _chunk_allocator_metric_entity; + + std::shared_ptr _mem_tracker; }; } // namespace doris diff --git a/be/src/runtime/memory_scratch_sink.h b/be/src/runtime/memory_scratch_sink.h index 19c71f7c00a227..f8e5fceca10832 100644 --- a/be/src/runtime/memory_scratch_sink.h +++ b/be/src/runtime/memory_scratch_sink.h @@ -42,7 +42,6 @@ class RuntimeProfile; class BufferControlBlock; class ExprContext; class ResultWriter; -class MemTracker; class TupleRow; // used to push data to blocking queue diff --git a/be/src/runtime/mysql_table_sink.cpp b/be/src/runtime/mysql_table_sink.cpp index cb7911d9f2b904..0e5042c9b90afa 100644 --- a/be/src/runtime/mysql_table_sink.cpp +++ b/be/src/runtime/mysql_table_sink.cpp @@ -33,7 +33,7 @@ MysqlTableSink::MysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc, : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs), - _mem_tracker(MemTracker::CreateTracker(-1, "MysqlTableSink")) { + _mem_tracker(MemTracker::create_tracker(-1, "MysqlTableSink")) { _name = "MysqlTableSink"; } diff --git a/be/src/runtime/odbc_table_sink.cpp b/be/src/runtime/odbc_table_sink.cpp index b92b1517a0f5af..adbe2e54a1c559 100644 --- a/be/src/runtime/odbc_table_sink.cpp +++ b/be/src/runtime/odbc_table_sink.cpp @@ -28,11 +28,8 @@ namespace doris { OdbcTableSink::OdbcTableSink(ObjectPool* pool, const RowDescriptor& row_desc, - const std::vector& t_exprs) - : _pool(pool), - _row_desc(row_desc), - _t_output_expr(t_exprs), - _mem_tracker(MemTracker::CreateTracker(-1, "OdbcTableSink")) { + const std::vector& t_exprs) + : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs) { _name = "OOBC_TABLE_SINK"; } @@ -56,7 +53,7 @@ Status OdbcTableSink::init(const TDataSink& t_sink) { Status OdbcTableSink::prepare(RuntimeState* state) { RETURN_IF_ERROR(DataSink::prepare(state)); // Prepare the exprs to run. - RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker)); + RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker)); std::stringstream title; title << "ODBC_TABLE_SINK (frag_id=" << state->fragment_instance_id() << ")"; // create profile @@ -100,4 +97,4 @@ Status OdbcTableSink::close(RuntimeState* state, Status exec_status) { return Status::OK(); } -} +} // namespace doris diff --git a/be/src/runtime/odbc_table_sink.h b/be/src/runtime/odbc_table_sink.h index 385075b49aa658..3f9c8fd3b25ce6 100644 --- a/be/src/runtime/odbc_table_sink.h +++ b/be/src/runtime/odbc_table_sink.h @@ -32,7 +32,6 @@ class TOdbcTableSink; class RuntimeState; class RuntimeProfile; class ExprContext; -class MemTracker; //This class is a sinker, which put input data to odbc table class OdbcTableSink : public DataSink { @@ -73,9 +72,8 @@ class OdbcTableSink : public DataSink { bool _use_transaction; RuntimeProfile* _profile; - std::shared_ptr _mem_tracker; }; -} +} // namespace doris #endif diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp index 04843e3091f4a2..5803250dfc6fad 100644 --- a/be/src/runtime/plan_fragment_executor.cpp +++ b/be/src/runtime/plan_fragment_executor.cpp @@ -108,28 +108,6 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, _is_report_success = request.query_options.is_report_success; } - int64_t bytes_limit = request.query_options.mem_limit; - if (bytes_limit <= 0) { - // sometimes the request does not set the query mem limit, we use default one. - // TODO(cmy): we should not allow request without query mem limit. - bytes_limit = 2 * 1024 * 1024 * 1024L; - } - - if (bytes_limit > _exec_env->process_mem_tracker()->limit()) { - VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) - << " exceeds process memory limit of " - << PrettyPrinter::print(_exec_env->process_mem_tracker()->limit(), TUnit::BYTES) - << ". Using process memory limit instead"; - bytes_limit = _exec_env->process_mem_tracker()->limit(); - } - // NOTE: this MemTracker only for olap - _mem_tracker = MemTracker::CreateTracker(bytes_limit, - "PlanFragmentExecutor:" + print_id(_query_id) + ":" + - print_id(params.fragment_instance_id), - _exec_env->process_mem_tracker(), true, false, - MemTrackerLevel::TASK); - _runtime_state->set_fragment_mem_tracker(_mem_tracker); - RETURN_IF_ERROR(_runtime_state->create_block_mgr()); // set up desc tbl @@ -231,7 +209,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request, } Status PlanFragmentExecutor::open() { - int64_t mem_limit = _runtime_state->fragment_mem_tracker()->limit(); + int64_t mem_limit = _runtime_state->instance_mem_tracker()->limit(); TAG(LOG(INFO)) .log("PlanFragmentExecutor::open, using query memory limit: " + PrettyPrinter::print(mem_limit, TUnit::BYTES)) @@ -454,7 +432,7 @@ void PlanFragmentExecutor::_collect_node_statistics() { DCHECK(_runtime_state->backend_id() != -1); NodeStatistics* node_statistics = _query_statistics->add_nodes_statistics(_runtime_state->backend_id()); - node_statistics->add_peak_memory(_mem_tracker->peak_consumption()); + node_statistics->add_peak_memory(_runtime_state->instance_mem_tracker()->peak_consumption()); } void PlanFragmentExecutor::report_profile() { @@ -608,7 +586,7 @@ void PlanFragmentExecutor::update_status(const Status& new_status) { _runtime_state->set_mem_limit_exceeded(new_status.get_error_msg()); } _status = new_status; - if (_runtime_state->query_options().query_type == TQueryType::EXTERNAL) { + if (_runtime_state->query_type() == TQueryType::EXTERNAL) { TUniqueId fragment_instance_id = _runtime_state->fragment_instance_id(); _exec_env->result_queue_mgr()->update_queue_status(fragment_instance_id, new_status); @@ -696,10 +674,6 @@ void PlanFragmentExecutor::close() { << print_id(_runtime_state->fragment_instance_id()); } - // _mem_tracker init failed - if (_mem_tracker.get() != nullptr) { - _mem_tracker->Release(_mem_tracker->consumption()); - } _closed = true; } diff --git a/be/src/runtime/plan_fragment_executor.h b/be/src/runtime/plan_fragment_executor.h index de11ca87d9dbb4..8d7927a2817c17 100644 --- a/be/src/runtime/plan_fragment_executor.h +++ b/be/src/runtime/plan_fragment_executor.h @@ -147,7 +147,6 @@ class PlanFragmentExecutor { ExecEnv* _exec_env; // not owned ExecNode* _plan; // lives in _runtime_state->obj_pool() TUniqueId _query_id; - std::shared_ptr _mem_tracker; // profile reporting-related report_status_callback _report_status_cb; diff --git a/be/src/runtime/result_file_sink.cpp b/be/src/runtime/result_file_sink.cpp index efe367cda6931f..a26e4a38d70288 100644 --- a/be/src/runtime/result_file_sink.cpp +++ b/be/src/runtime/result_file_sink.cpp @@ -22,7 +22,6 @@ #include "runtime/buffer_control_block.h" #include "runtime/exec_env.h" #include "runtime/file_result_writer.h" -#include "runtime/mem_tracker.h" #include "runtime/mysql_result_writer.h" #include "runtime/result_buffer_mgr.h" #include "runtime/row_batch.h" @@ -110,9 +109,9 @@ Status ResultFileSink::prepare(RuntimeState* state) { _local_bytes_send_counter = ADD_COUNTER(profile(), "LocalBytesSent", TUnit::BYTES); _uncompressed_bytes_counter = ADD_COUNTER(profile(), "UncompressedRowBatchSize", TUnit::BYTES); - _mem_tracker = MemTracker::CreateTracker( - _profile, -1, "ResultFileSink:" + print_id(state->fragment_instance_id()), - state->instance_mem_tracker()); + _mem_tracker = MemTracker::create_tracker( + -1, "ResultFileSink:" + print_id(state->fragment_instance_id()), + state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile); // create writer _output_batch = new RowBatch(_output_row_descriptor, 1024, _mem_tracker.get()); _writer.reset(new (std::nothrow) FileResultWriter( diff --git a/be/src/runtime/result_file_sink.h b/be/src/runtime/result_file_sink.h index b68678df4c6700..fcac446dd43776 100644 --- a/be/src/runtime/result_file_sink.h +++ b/be/src/runtime/result_file_sink.h @@ -34,7 +34,6 @@ class RuntimeProfile; class BufferControlBlock; class ExprContext; class ResultWriter; -class MemTracker; struct ResultFileOptions; class ResultFileSink : public DataStreamSender { diff --git a/be/src/runtime/result_sink.h b/be/src/runtime/result_sink.h index b94f4e0830b988..1150d8295e920d 100644 --- a/be/src/runtime/result_sink.h +++ b/be/src/runtime/result_sink.h @@ -33,7 +33,6 @@ class RuntimeProfile; class BufferControlBlock; class ExprContext; class ResultWriter; -class MemTracker; struct ResultFileOptions; namespace vectorized { diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index cdbbebbfedbdb0..1b6deee4ea538a 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -57,7 +57,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_ _tuple_ptrs_size = _capacity * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); // TODO: switch to Init() pattern so we can check memory limit and return Status. - _mem_tracker->Consume(_tuple_ptrs_size); + _mem_tracker->consume(_tuple_ptrs_size); _tuple_ptrs = (Tuple**)(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != nullptr); } @@ -85,7 +85,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, _tuple_ptrs_size = _num_rows * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); // TODO: switch to Init() pattern so we can check memory limit and return Status. - _mem_tracker->Consume(_tuple_ptrs_size); + _mem_tracker->consume(_tuple_ptrs_size); _tuple_ptrs = (Tuple**)(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != nullptr); @@ -227,7 +227,7 @@ void RowBatch::clear() { } DCHECK(_tuple_ptrs != nullptr); free(_tuple_ptrs); - _mem_tracker->Release(_tuple_ptrs_size); + _mem_tracker->release(_tuple_ptrs_size); _tuple_ptrs = nullptr; _cleared = true; } diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index b5302aeaceb89e..80bdae034f535f 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -46,8 +46,9 @@ RuntimeFilterMgr::RuntimeFilterMgr(const UniqueId& query_id, RuntimeState* state RuntimeFilterMgr::~RuntimeFilterMgr() {} Status RuntimeFilterMgr::init() { - DCHECK(_state->instance_mem_tracker().get() != nullptr); - _tracker = _state->instance_mem_tracker().get(); + DCHECK(_state->instance_mem_tracker() != nullptr); + _tracker = MemTracker::create_tracker(-1, "RuntimeFilterMgr", _state->instance_mem_tracker(), + MemTrackerLevel::TASK); return Status::OK(); } @@ -102,7 +103,7 @@ Status RuntimeFilterMgr::regist_filter(const RuntimeFilterRole role, const TRunt RuntimeFilterMgrVal filter_mgr_val; filter_mgr_val.role = role; - RETURN_IF_ERROR(IRuntimeFilter::create(_state, _tracker, &_pool, &desc, &options, + RETURN_IF_ERROR(IRuntimeFilter::create(_state, _tracker.get(), &_pool, &desc, &options, role, node_id, &filter_mgr_val.filter)); filter_map->emplace(key, filter_mgr_val); @@ -150,7 +151,7 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( cntVal->runtime_filter_desc = *runtime_filter_desc; cntVal->target_info = *target_info; cntVal->pool.reset(new ObjectPool()); - cntVal->tracker = MemTracker::CreateTracker(); + cntVal->tracker = MemTracker::create_tracker(); cntVal->filter = cntVal->pool->add( new IRuntimeFilter(nullptr, cntVal->tracker.get(), cntVal->pool.get())); diff --git a/be/src/runtime/runtime_filter_mgr.h b/be/src/runtime/runtime_filter_mgr.h index 653ce675b2356a..9d2bd532fd87f2 100644 --- a/be/src/runtime/runtime_filter_mgr.h +++ b/be/src/runtime/runtime_filter_mgr.h @@ -91,7 +91,7 @@ class RuntimeFilterMgr { std::map _producer_map; RuntimeState* _state; - MemTracker* _tracker; + std::shared_ptr _tracker; ObjectPool _pool; TNetworkAddress _merge_addr; diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index ccbf2a0f11334b..6fd6dc70a46189 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -36,6 +36,7 @@ #include "runtime/initial_reservations.h" #include "runtime/load_path_mgr.h" #include "runtime/mem_tracker.h" +#include "runtime/mem_tracker_task_pool.h" #include "runtime/runtime_filter_mgr.h" #include "util/cpu_info.h" #include "util/disk_info.h" @@ -52,8 +53,7 @@ namespace doris { RuntimeState::RuntimeState(const TUniqueId& fragment_instance_id, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env) - : _fragment_mem_tracker(nullptr), - _profile("Fragment " + print_id(fragment_instance_id)), + : _profile("Fragment " + print_id(fragment_instance_id)), _obj_pool(new ObjectPool()), _runtime_filter_mgr(new RuntimeFilterMgr(TUniqueId(), this)), _data_stream_recvrs_pool(new ObjectPool()), @@ -79,8 +79,7 @@ RuntimeState::RuntimeState(const TUniqueId& fragment_instance_id, RuntimeState::RuntimeState(const TPlanFragmentExecParams& fragment_exec_params, const TQueryOptions& query_options, const TQueryGlobals& query_globals, ExecEnv* exec_env) - : _fragment_mem_tracker(nullptr), - _profile("Fragment " + print_id(fragment_exec_params.fragment_instance_id)), + : _profile("Fragment " + print_id(fragment_exec_params.fragment_instance_id)), _obj_pool(new ObjectPool()), _runtime_filter_mgr(new RuntimeFilterMgr(fragment_exec_params.query_id, this)), _data_stream_recvrs_pool(new ObjectPool()), @@ -206,42 +205,40 @@ Status RuntimeState::init(const TUniqueId& fragment_instance_id, const TQueryOpt Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { bool has_query_mem_tracker = _query_options.__isset.mem_limit && (_query_options.mem_limit > 0); int64_t bytes_limit = has_query_mem_tracker ? _query_options.mem_limit : -1; - // we do not use global query-map for now, to avoid mem-exceeded different fragments - // running on the same machine. - // TODO(lingbin): open it later. note that open with BufferedBlockMgr's BlockMgrsMap - // at the same time. - - // _query_mem_tracker = MemTracker::get_query_mem_tracker( - // query_id, bytes_limit, _exec_env->process_mem_tracker()); - + if (bytes_limit > MemTracker::get_process_tracker()->limit()) { + VLOG_NOTICE << "Query memory limit " << PrettyPrinter::print(bytes_limit, TUnit::BYTES) + << " exceeds process memory limit of " + << PrettyPrinter::print(MemTracker::get_process_tracker()->limit(), TUnit::BYTES) + << ". Using process memory limit instead"; + bytes_limit = MemTracker::get_process_tracker()->limit(); + } auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES); mem_tracker_counter->set(bytes_limit); - _query_mem_tracker = - MemTracker::CreateTracker(bytes_limit, "RuntimeState:query:" + print_id(query_id), - _exec_env->process_mem_tracker(), true, false); - _instance_mem_tracker = - MemTracker::CreateTracker(&_profile, -1, "RuntimeState:instance:", _query_mem_tracker); - - /* - // TODO: this is a stopgap until we implement ExprContext - _udf_mem_tracker.reset( - new MemTracker(-1, "UDFs", _instance_mem_tracker.get())); - _udf_pool.reset(new MemPool(_udf_mem_tracker.get())); - */ - // _udf_pool.reset(new MemPool(_instance_mem_tracker.get())); + if (query_type() == TQueryType::SELECT) { + _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_query_mem_tracker( + print_id(query_id), bytes_limit); + } else if (query_type() == TQueryType::LOAD) { + _query_mem_tracker = _exec_env->task_pool_mem_tracker_registry()->register_load_mem_tracker( + print_id(query_id), bytes_limit); + } else { + DCHECK(false); + } + + _instance_mem_tracker = MemTracker::create_tracker( + bytes_limit, "RuntimeState:instance:" + print_id(_fragment_instance_id), _query_mem_tracker, + MemTrackerLevel::INSTANCE, &_profile); RETURN_IF_ERROR(init_buffer_poolstate()); _initial_reservations = _obj_pool->add( - new InitialReservations(_obj_pool.get(), _buffer_reservation, _query_mem_tracker, + new InitialReservations(_obj_pool.get(), _buffer_reservation, nullptr, _query_options.initial_reservation_total_claims)); RETURN_IF_ERROR(_initial_reservations->Init(_query_id, min_reservation())); DCHECK_EQ(0, _initial_reservation_refcnt.load()); if (_instance_buffer_reservation != nullptr) { - _instance_buffer_reservation->InitChildTracker(&_profile, _buffer_reservation, - _instance_mem_tracker.get(), + _instance_buffer_reservation->InitChildTracker(&_profile, _buffer_reservation, nullptr, std::numeric_limits::max()); } @@ -251,13 +248,13 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { } Status RuntimeState::init_instance_mem_tracker() { - _instance_mem_tracker = MemTracker::CreateTracker(-1, "RuntimeState"); + _instance_mem_tracker = MemTracker::create_tracker(-1, "RuntimeState"); return Status::OK(); } Status RuntimeState::init_buffer_poolstate() { ExecEnv* exec_env = ExecEnv::GetInstance(); - int64_t mem_limit = _query_mem_tracker->GetLowestLimit(MemLimit::HARD); + int64_t mem_limit = _query_mem_tracker->get_lowest_limit(); int64_t max_reservation; if (query_options().__isset.buffer_pool_limit && query_options().buffer_pool_limit > 0) { max_reservation = query_options().buffer_pool_limit; @@ -273,8 +270,8 @@ Status RuntimeState::init_buffer_poolstate() { VLOG_QUERY << "Buffer pool limit for " << print_id(_query_id) << ": " << max_reservation; _buffer_reservation = _obj_pool->add(new ReservationTracker); - _buffer_reservation->InitChildTracker(nullptr, exec_env->buffer_reservation(), - _query_mem_tracker.get(), max_reservation); + _buffer_reservation->InitChildTracker(nullptr, exec_env->buffer_reservation(), nullptr, + max_reservation); return Status::OK(); } @@ -330,46 +327,13 @@ void RuntimeState::get_unreported_errors(std::vector* new_errors) { } } -Status RuntimeState::set_mem_limit_exceeded(MemTracker* tracker, int64_t failed_allocation_size, - const std::string* msg) { - DCHECK_GE(failed_allocation_size, 0); +Status RuntimeState::set_mem_limit_exceeded(const std::string& msg) { { std::lock_guard l(_process_status_lock); if (_process_status.ok()) { - if (msg != nullptr) { - _process_status = Status::MemoryLimitExceeded(*msg); - } else { - _process_status = Status::MemoryLimitExceeded("Memory limit exceeded"); - } - } else { - return _process_status; + _process_status = Status::MemoryLimitExceeded(msg); } } - - DCHECK(_query_mem_tracker.get() != nullptr); - std::stringstream ss; - ss << "Memory Limit Exceeded\n"; - if (failed_allocation_size != 0) { - DCHECK(tracker != nullptr); - ss << " " << tracker->label() << " could not allocate " - << PrettyPrinter::print(failed_allocation_size, TUnit::BYTES) - << " without exceeding limit." << std::endl; - } - - // if (_exec_env->process_mem_tracker()->LimitExceeded()) { - // ss << _exec_env->process_mem_tracker()->LogUsage(); - // } else { - // ss << _query_mem_tracker->LogUsage(); - // } - // log_error(ErrorMsg(TErrorCode::GENERAL, ss.str())); - log_error(ss.str()); - // Add warning about missing stats except for compute stats child queries. - // if (!query_ctx().__isset.parent_query_id && - // query_ctx().__isset.tables_missing_stats && - // !query_ctx().tables_missing_stats.empty()) { - // LogError(ErrorMsg(TErrorCode::GENERAL, - // GetTablesMissingStatsWarning(query_ctx().tables_missing_stats))); - // } DCHECK(_process_status.is_mem_limit_exceeded()); return _process_status; } @@ -377,7 +341,7 @@ Status RuntimeState::set_mem_limit_exceeded(MemTracker* tracker, int64_t failed_ Status RuntimeState::check_query_state(const std::string& msg) { // TODO: it would be nice if this also checked for cancellation, but doing so breaks // cases where we use Status::Cancelled("Cancelled") to indicate that the limit was reached. - RETURN_IF_LIMIT_EXCEEDED(this, msg); + RETURN_IF_LIMIT_EXCEEDED(_instance_mem_tracker, this, msg); return query_status(); } @@ -421,7 +385,7 @@ Status RuntimeState::create_error_log_file() { Status RuntimeState::append_error_msg_to_file(std::function line, std::function error_msg, bool* stop_processing, bool is_summary) { *stop_processing = false; - if (_query_options.query_type != TQueryType::LOAD) { + if (query_type() != TQueryType::LOAD) { return Status::OK(); } // If file havn't been opened, open it here @@ -493,12 +457,6 @@ void RuntimeState::export_load_error(const std::string& err_msg) { } } -// TODO chenhao , check scratch_limit, disable_spilling and file_group -// before spillng -Status RuntimeState::StartSpilling(MemTracker* mem_tracker) { - return Status::InternalError("Mem limit exceeded."); -} - int64_t RuntimeState::get_load_mem_limit() { if (_query_options.__isset.load_mem_limit && _query_options.load_mem_limit > 0) { return _query_options.load_mem_limit; diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 449b4c2a1738ac..e058706928a086 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -84,7 +84,6 @@ class RuntimeState { // The instance tracker is tied to our profile. // Specific parts of the fragment (i.e. exec nodes, sinks, data stream senders, etc) // will add a fourth level when they are initialized. - // This function also initializes a user function mem tracker (in the fourth level). Status init_mem_trackers(const TUniqueId& query_id); // for ut only @@ -113,6 +112,7 @@ class RuntimeState { int max_errors() const { return _query_options.max_errors; } int max_io_buffers() const { return _query_options.max_io_buffers; } int num_scanner_threads() const { return _query_options.num_scanner_threads; } + TQueryType::type query_type() const { return _query_options.query_type; } int64_t timestamp_ms() const { return _timestamp_ms; } const std::string& timezone() const { return _timezone; } const cctz::time_zone& timezone_obj() const { return _timezone_obj; } @@ -121,8 +121,6 @@ class RuntimeState { const TUniqueId& query_id() const { return _query_id; } const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; } ExecEnv* exec_env() { return _exec_env; } - const std::vector>& mem_trackers() { return _mem_trackers; } - std::shared_ptr fragment_mem_tracker() { return _fragment_mem_tracker; } std::shared_ptr query_mem_tracker() { return _query_mem_tracker; } std::shared_ptr instance_mem_tracker() { return _instance_mem_tracker; } ThreadResourceMgr::ResourcePool* resource_pool() { return _resource_pool; } @@ -158,22 +156,6 @@ class RuntimeState { return _process_status; }; - // MemPool* udf_pool() { - // return _udf_pool.get(); - // }; - - // Create and return a stream receiver for _fragment_instance_id - // from the data stream manager. The receiver is added to _data_stream_recvrs_pool. - DataStreamRecvr* create_recvr(const RowDescriptor& row_desc, PlanNodeId dest_node_id, - int num_senders, int buffer_size, RuntimeProfile* profile); - - // Sets the fragment memory limit and adds it to _mem_trackers - void set_fragment_mem_tracker(std::shared_ptr tracker) { - DCHECK(_fragment_mem_tracker == nullptr); - _fragment_mem_tracker = tracker; - _mem_trackers.push_back(tracker); - } - // Appends error to the _error_log if there is space bool log_error(const std::string& error); @@ -226,19 +208,11 @@ class RuntimeState { _process_status = status; } - // Sets query_status_ to MEM_LIMIT_EXCEEDED and logs all the registered trackers. - // Subsequent calls to this will be no-ops. Returns query_status_. - // If 'failed_allocation_size' is not 0, then it is the size of the allocation (in - // bytes) that would have exceeded the limit allocated for 'tracker'. - // This value and tracker are only used for error reporting. + // Sets _process_status to MEM_LIMIT_EXCEEDED. + // Subsequent calls to this will be no-ops. Returns _process_status. // If 'msg' is non-nullptr, it will be appended to query_status_ in addition to the // generic "Memory limit exceeded" error. - Status set_mem_limit_exceeded(MemTracker* tracker = nullptr, int64_t failed_allocation_size = 0, - const std::string* msg = nullptr); - - Status set_mem_limit_exceeded(const std::string& msg) { - return set_mem_limit_exceeded(nullptr, 0, &msg); - } + Status set_mem_limit_exceeded(const std::string& msg = "Memory limit exceeded"); // Returns a non-OK status if query execution should stop (e.g., the query was cancelled // or a mem limit was exceeded). Exec nodes should check this periodically so execution @@ -397,12 +371,6 @@ class RuntimeState { static const int DEFAULT_BATCH_SIZE = 2048; - // all mem limits that apply to this query - std::vector> _mem_trackers; - - // Fragment memory limit. Also contained in _mem_trackers - std::shared_ptr _fragment_mem_tracker; - // MemTracker that is shared by all fragment instances running on this host. // The query mem tracker must be released after the _instance_mem_tracker. std::shared_ptr _query_mem_tracker; diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc index a461ebe7faff9a..79c3b17c2844de 100644 --- a/be/src/runtime/spill_sorter.cc +++ b/be/src/runtime/spill_sorter.cc @@ -638,8 +638,6 @@ Status SpillSorter::Run::prepare_read() { _pin_next_fixed_len_block = _pin_next_var_len_block = false; _num_tuples_returned = 0; - // _buffered_batch.reset(new RowBatch(*_sorter->_output_row_desc, - // _sorter->_state->batch_size(), _sorter->_mem_tracker)); _buffered_batch.reset(new RowBatch(*_sorter->_output_row_desc, _sorter->_state->batch_size(), _sorter->_mem_tracker.get())); diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp index e8e29902675efc..7c309a0260578b 100644 --- a/be/src/runtime/tablets_channel.cpp +++ b/be/src/runtime/tablets_channel.cpp @@ -35,7 +35,7 @@ TabletsChannel::TabletsChannel(const TabletsChannelKey& key, const std::shared_ptr& mem_tracker, bool is_high_priority) : _key(key), _state(kInitialized), _closed_senders(64), _is_high_priority(is_high_priority) { - _mem_tracker = MemTracker::CreateTracker(-1, "TabletsChannel", mem_tracker); + _mem_tracker = MemTracker::create_tracker(-1, "TabletsChannel:" + std::to_string(key.index_id), mem_tracker); static std::once_flag once_flag; std::call_once(once_flag, [] { REGISTER_HOOK_METRIC(tablet_writer_count, [&]() { return _s_tablet_writer_count.load(); }); @@ -199,8 +199,6 @@ Status TabletsChannel::close(int sender_id, int64_t backend_id, bool* finished, // tablet_vec will only contains success tablet, and then let FE judge it. writer->close_wait(tablet_vec, (_broken_tablets.find(writer->tablet_id()) != _broken_tablets.end())); } - // TODO(gaodayue) clear and destruct all delta writers to make sure all memory are freed - // DCHECK_EQ(_mem_tracker->consumption(), 0); } return Status::OK(); } diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp index 1fcdcd93582fc8..f26822833ce96c 100644 --- a/be/src/runtime/vectorized_row_batch.cpp +++ b/be/src/runtime/vectorized_row_batch.cpp @@ -29,7 +29,7 @@ VectorizedRowBatch::VectorizedRowBatch(const TabletSchema* schema, _selected_in_use = false; _size = 0; - _tracker = MemTracker::CreateTracker(-1, "VectorizedRowBatch", parent_tracker); + _tracker = MemTracker::create_tracker(-1, "VectorizedRowBatch", parent_tracker); _mem_pool.reset(new MemPool(_tracker.get())); _selected = reinterpret_cast(new char[sizeof(uint16_t) * _capacity]); diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 6fcd2589aad2ac..6564dfe8d0fa91 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -483,6 +483,9 @@ int main(int argc, char** argv) { #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) doris::MemInfo::refresh_current_mem(); #endif + // TODO(zxy) 10s is too long to clear the expired task mem tracker. + // It should be actively triggered at the end of query/load. + doris::ExecEnv::GetInstance()->task_pool_mem_tracker_registry()->logout_task_mem_tracker(); sleep(10); } diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index c50d14ad725eb8..c5ecde96aedd8a 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -55,7 +55,7 @@ HttpService::HttpService(ExecEnv* env, int port, int num_threads) HttpService::~HttpService() {} Status HttpService::start() { - add_default_path_handlers(_web_page_handler.get(), _env->process_mem_tracker()); + add_default_path_handlers(_web_page_handler.get(), MemTracker::get_process_tracker()); // register load MiniLoadAction* miniload_action = _pool.add(new MiniLoadAction(_env)); diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index 8015dcaefac3f1..e99f5980fcd32f 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -183,6 +183,7 @@ class DorisMetrics { UIntGauge* compaction_mem_consumption; UIntGauge* load_mem_consumption; + UIntGauge* load_channel_mem_consumption; UIntGauge* query_mem_consumption; UIntGauge* schema_change_mem_consumption; UIntGauge* tablet_meta_mem_consumption; diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h index 6ae8669f868d50..b2c556e60f4c22 100644 --- a/be/src/util/mem_info.h +++ b/be/src/util/mem_info.h @@ -34,6 +34,8 @@ class MemInfo { // Initialize MemInfo. static void init(); + static inline bool initialized() { return _s_initialized; } + // Get total physical memory in bytes (if has cgroups memory limits, return the limits). static inline int64_t physical_mem() { DCHECK(_s_initialized); diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 5c20a19487711d..c33bcb2c7103a5 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -59,7 +59,7 @@ struct ProcessHashTableBuild { Defer defer {[&]() { int64_t bucket_size = hash_table_ctx.hash_table.get_buffer_size_in_cells(); int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); - _join_node->_mem_tracker->Consume(bucket_bytes - old_bucket_bytes); + _join_node->_mem_tracker->consume(bucket_bytes - old_bucket_bytes); _join_node->_mem_used += bucket_bytes - old_bucket_bytes; COUNTER_SET(_join_node->_build_buckets_counter, bucket_size); }}; @@ -732,7 +732,7 @@ Status HashJoinNode::close(RuntimeState* state) { if (_vother_join_conjunct_ptr) (*_vother_join_conjunct_ptr)->close(state); - _mem_tracker->Release(_mem_used); + _mem_tracker->release(_mem_used); return ExecNode::close(state); } @@ -899,9 +899,9 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(child(1)->get_next(state, &block, &eos)); - _mem_tracker->Consume(block.allocated_bytes()); + _mem_tracker->consume(block.allocated_bytes()); _mem_used += block.allocated_bytes(); - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while getting next from the child 1."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while getting next from the child 1."); if (block.rows() != 0) { mutable_block.merge(block); } @@ -912,7 +912,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { // TODO:: Rethink may we should do the proess after we recevie all build blocks ? // which is better. RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index)); - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); mutable_block = MutableBlock(); ++index; @@ -922,7 +922,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { _build_blocks.emplace_back(mutable_block.to_block()); RETURN_IF_ERROR(_process_build_block(state, _build_blocks[index], index)); - RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table."); return std::visit( [&](auto&& arg) -> Status { diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 76b4b349efc4d0..2f8df6b934e400 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -355,7 +355,7 @@ Status AggregationNode::open(RuntimeState* state) { } RETURN_IF_ERROR(_executor.execute(&block)); _executor.update_memusage(); - RETURN_IF_LIMIT_EXCEEDED(state, "aggregator, while execute open."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "aggregator, while execute open."); } return Status::OK(); @@ -395,7 +395,7 @@ Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) { } _executor.update_memusage(); - RETURN_IF_LIMIT_EXCEEDED(state, "aggregator, while execute get_next."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "aggregator, while execute get_next."); return Status::OK(); } @@ -555,7 +555,7 @@ Status AggregationNode::_merge_without_key(Block* block) { } void AggregationNode::_update_memusage_without_key() { - mem_tracker()->Consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); + mem_tracker()->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); _mem_usage_record.used_in_arena = _agg_arena_pool.size(); } @@ -1078,8 +1078,8 @@ void AggregationNode::_update_memusage_with_serialized_key() { std::visit( [&](auto&& agg_method) -> void { auto& data = agg_method.data; - mem_tracker()->Consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); - mem_tracker()->Consume(data.get_buffer_size_in_bytes() - + mem_tracker()->consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena); + mem_tracker()->consume(data.get_buffer_size_in_bytes() - _mem_usage_record.used_in_state); _mem_usage_record.used_in_state = data.get_buffer_size_in_bytes(); _mem_usage_record.used_in_arena = _agg_arena_pool.size(); @@ -1103,7 +1103,7 @@ void AggregationNode::_close_with_serialized_key() { } void AggregationNode::release_tracker() { - mem_tracker()->Release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); + mem_tracker()->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena); } } // namespace doris::vectorized diff --git a/be/src/vec/exec/vcross_join_node.cpp b/be/src/vec/exec/vcross_join_node.cpp index 6d48527f73df2f..e1c14f00272d3c 100644 --- a/be/src/vec/exec/vcross_join_node.cpp +++ b/be/src/vec/exec/vcross_join_node.cpp @@ -44,7 +44,7 @@ Status VCrossJoinNode::close(RuntimeState* state) { if (is_closed()) { return Status::OK(); } - _mem_tracker->Release(_total_mem_usage); + _mem_tracker->release(_total_mem_usage); VBlockingJoinNode::close(state); return Status::OK(); } @@ -67,10 +67,10 @@ Status VCrossJoinNode::construct_build_side(RuntimeState* state) { _build_rows += rows; _total_mem_usage += mem_usage; _build_blocks.emplace_back(std::move(block)); - _mem_tracker->Consume(mem_usage); + _mem_tracker->consume(mem_usage); } // to prevent use too many memory - RETURN_IF_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1."); if (eos) { break; diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp index ba9838b7553ddf..88dfa3e4ab1d26 100644 --- a/be/src/vec/exec/volap_scan_node.cpp +++ b/be/src/vec/exec/volap_scan_node.cpp @@ -84,7 +84,7 @@ void VOlapScanNode::transfer_thread(RuntimeState* state) { _free_blocks.emplace_back(block); _buffered_bytes += block->allocated_bytes(); } - _mem_tracker->Consume(_buffered_bytes); + _mem_tracker->consume(_buffered_bytes); // read from scanner while (LIKELY(status.ok())) { @@ -349,8 +349,9 @@ Status VOlapScanNode::start_scan_thread(RuntimeState* state) { ++j, ++i) { scanner_ranges.push_back(cond_ranges[i].get()); } - VOlapScanner* scanner = new VOlapScanner(state, this, _olap_scan_node.is_preaggregation, - _need_agg_finalize, *scan_range); + VOlapScanner* scanner = + new VOlapScanner(state, this, _olap_scan_node.is_preaggregation, + _need_agg_finalize, *scan_range, _scanner_mem_tracker); // add scanner to pool before doing prepare. // so that scanner can be automatically deconstructed if prepare failed. _scanner_pool.add(scanner); @@ -400,7 +401,7 @@ Status VOlapScanNode::close(RuntimeState* state) { std::default_delete()); std::for_each(_scan_blocks.begin(), _scan_blocks.end(), std::default_delete()); std::for_each(_free_blocks.begin(), _free_blocks.end(), std::default_delete()); - _mem_tracker->Release(_buffered_bytes); + _mem_tracker->release(_buffered_bytes); // OlapScanNode terminate by exception // so that initiative close the Scanner diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp index fbdbd1151d8721..6a6e771a5660e1 100644 --- a/be/src/vec/exec/volap_scanner.cpp +++ b/be/src/vec/exec/volap_scanner.cpp @@ -33,8 +33,9 @@ namespace doris::vectorized { VOlapScanner::VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range) - : OlapScanner(runtime_state, parent, aggregation, need_agg_finalize, scan_range) {} + bool need_agg_finalize, const TPaloScanRange& scan_range, + std::shared_ptr tracker) + : OlapScanner(runtime_state, parent, aggregation, need_agg_finalize, scan_range, tracker) {} Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bool* eof) { // only empty block should be here diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h index 0c1c4adf854aee..b6ef7e32ff8250 100644 --- a/be/src/vec/exec/volap_scanner.h +++ b/be/src/vec/exec/volap_scanner.h @@ -33,7 +33,8 @@ class VOlapScanNode; class VOlapScanner : public OlapScanner { public: VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation, - bool need_agg_finalize, const TPaloScanRange& scan_range); + bool need_agg_finalize, const TPaloScanRange& scan_range, + std::shared_ptr tracker); Status get_block(RuntimeState* state, vectorized::Block* block, bool* eof); diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index 63a1ccbc16d939..3f5ffc7a8085a8 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -40,7 +40,7 @@ struct HashTableBuild { Defer defer {[&]() { int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); - _operation_node->_mem_tracker->Consume(bucket_bytes - old_bucket_bytes); + _operation_node->_mem_tracker->consume(bucket_bytes - old_bucket_bytes); _operation_node->_mem_used += bucket_bytes - old_bucket_bytes; }}; @@ -85,7 +85,7 @@ Status VSetOperationNode::close(RuntimeState* state) { for (auto& exprs : _child_expr_lists) { VExpr::close(exprs, state); } - _mem_tracker->Release(_mem_used); + _mem_tracker->release(_mem_used); return ExecNode::close(state); } @@ -240,10 +240,10 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) { RETURN_IF_ERROR(child(0)->get_next(state, &block, &eos)); size_t allocated_bytes = block.allocated_bytes(); - _mem_tracker->Consume(allocated_bytes); + _mem_tracker->consume(allocated_bytes); _mem_used += allocated_bytes; - RETURN_IF_LIMIT_EXCEEDED(state, "Set Operation Node, while getting next from the child 0."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Set Operation Node, while getting next from the child 0."); if (block.rows() != 0) { mutable_block.merge(block); } // make one block for each 4 gigabytes @@ -253,7 +253,7 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) { // TODO:: Rethink may we should do the proess after we recevie all build blocks ? // which is better. RETURN_IF_ERROR(process_build_block(_build_blocks[index], index)); - RETURN_IF_LIMIT_EXCEEDED(state, "Set Operation Node, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Set Operation Node, while constructing the hash table."); mutable_block = MutableBlock(); ++index; last_mem_used = _mem_used; @@ -262,7 +262,7 @@ Status VSetOperationNode::hash_table_build(RuntimeState* state) { _build_blocks.emplace_back(mutable_block.to_block()); RETURN_IF_ERROR(process_build_block(_build_blocks[index], index)); - RETURN_IF_LIMIT_EXCEEDED(state, "Set Operation Node, while constructing the hash table."); + RETURN_IF_INSTANCE_LIMIT_EXCEEDED(state, "Set Operation Node, while constructing the hash table."); return Status::OK(); } diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp index 734af91baac45a..919c5a040e7c50 100644 --- a/be/src/vec/exec/vsort_node.cpp +++ b/be/src/vec/exec/vsort_node.cpp @@ -102,7 +102,7 @@ Status VSortNode::close(RuntimeState* state) { if (is_closed()) { return Status::OK(); } - _mem_tracker->Release(_total_mem_usage); + _mem_tracker->release(_total_mem_usage); _vsort_exec_exprs.close(state); ExecNode::close(state); return Status::OK(); @@ -159,7 +159,7 @@ Status VSortNode::sort_input(RuntimeState* state) { _sorted_blocks.emplace_back(std::move(block)); } - _mem_tracker->Consume(mem_usage); + _mem_tracker->consume(mem_usage); RETURN_IF_CANCELLED(state); RETURN_IF_ERROR(state->check_query_state("vsort, while sorting input.")); } diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp index 8145cc3d4674ba..b31d5117e57ecf 100644 --- a/be/src/vec/olap/vgeneric_iterators.cpp +++ b/be/src/vec/olap/vgeneric_iterators.cpp @@ -269,7 +269,7 @@ class VMergeIterator : public RowwiseIterator { VMergeIterator(std::vector& iters, std::shared_ptr parent, int sequence_id_idx) : _origin_iters(iters),_sequence_id_idx(sequence_id_idx) { // use for count the mem use of Block use in Merge - _mem_tracker = MemTracker::CreateTracker(-1, "VMergeIterator", parent, false); + _mem_tracker = MemTracker::create_tracker(-1, "VMergeIterator", parent); } ~VMergeIterator() override { @@ -361,7 +361,7 @@ class VUnionIterator : public RowwiseIterator { // Client should not use iterators any more. VUnionIterator(std::vector& v, std::shared_ptr parent) : _origin_iters(v.begin(), v.end()) { - _mem_tracker = MemTracker::CreateTracker(-1, "VUnionIterator", parent, false); + _mem_tracker = MemTracker::create_tracker(-1, "VUnionIterator", parent); } ~VUnionIterator() override { diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index e3eb3d4e8f2f53..0ace1a7b8cf519 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -123,7 +123,7 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe SCOPED_TIMER(_recvr->_deserialize_row_batch_timer); block = new Block(pblock); } - _recvr->_mem_tracker->Consume(block->bytes()); + _recvr->_mem_tracker->consume(block->bytes()); VLOG_ROW << "added #rows=" << block->rows() << " batch_size=" << block_byte_size << "\n"; _block_queue.emplace_back(block_byte_size, block); @@ -162,7 +162,7 @@ void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) { std::unique_lock l(_lock); size_t block_size = nblock->bytes(); _block_queue.emplace_back(block_size, nblock); - _recvr->_mem_tracker->Consume(nblock->bytes()); + _recvr->_mem_tracker->consume(nblock->bytes()); _data_arrival_cv.notify_one(); if (_recvr->exceeds_limit(block_size)) { @@ -260,8 +260,9 @@ VDataStreamRecvr::VDataStreamRecvr( _num_buffered_bytes(0), _profile(profile), _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) { - _mem_tracker = MemTracker::CreateTracker( - _profile, -1, "VDataStreamRecvr:" + print_id(_fragment_instance_id), parent_tracker); + _mem_tracker = + MemTracker::create_tracker(-1, "VDataStreamRecvr:" + print_id(_fragment_instance_id), + parent_tracker, MemTrackerLevel::VERBOSE, _profile); // Create one queue per sender if is_merging is true. int num_queues = is_merging ? num_senders : 1; @@ -330,9 +331,9 @@ Status VDataStreamRecvr::get_next(Block* block, bool* eos) { } if (LIKELY(_mem_tracker->consumption() >= block->bytes())) { - _mem_tracker->Release(block->bytes()); + _mem_tracker->release(block->bytes()); } else { - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); } return Status::OK(); } @@ -362,7 +363,7 @@ void VDataStreamRecvr::close() { _mgr = nullptr; _merger.reset(); - _mem_tracker->Release(_mem_tracker->consumption()); + _mem_tracker->release(_mem_tracker->consumption()); } } // namespace doris::vectorized diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index cd830e5fbff41e..cc8555ca67c1ae 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -339,9 +339,9 @@ Status VDataStreamSender::prepare(RuntimeState* state) { _dest_node_id, instances); _profile = _pool->add(new RuntimeProfile(std::move(title))); SCOPED_TIMER(_profile->total_time_counter()); - _mem_tracker = MemTracker::CreateTracker( - _profile, -1, "VDataStreamSender:" + print_id(state->fragment_instance_id()), - state->instance_mem_tracker()); + _mem_tracker = MemTracker::create_tracker( + -1, "VDataStreamSender:" + print_id(state->fragment_instance_id()), + state->instance_mem_tracker(), MemTrackerLevel::VERBOSE, _profile); if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) { std::random_device rd; diff --git a/be/src/vec/sink/vmysql_table_sink.cpp b/be/src/vec/sink/vmysql_table_sink.cpp index 3ba7415940eb57..48e4501fd3cabd 100644 --- a/be/src/vec/sink/vmysql_table_sink.cpp +++ b/be/src/vec/sink/vmysql_table_sink.cpp @@ -32,7 +32,7 @@ VMysqlTableSink::VMysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc : _pool(pool), _row_desc(row_desc), _t_output_expr(t_exprs), - _mem_tracker(MemTracker::CreateTracker(-1, "VMysqlTableSink")) { + _mem_tracker(MemTracker::create_tracker(-1, "VMysqlTableSink")) { _name = "VMysqlTableSink"; } diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp index df5d29da52dd9c..6746fed3e1e18f 100644 --- a/be/test/exec/hash_table_test.cpp +++ b/be/test/exec/hash_table_test.cpp @@ -47,8 +47,8 @@ namespace doris { class HashTableTest : public testing::Test { public: HashTableTest() { - _tracker = MemTracker::CreateTracker(-1, "root"); - _pool_tracker = MemTracker::CreateTracker(-1, "mem-pool", _tracker); + _tracker = MemTracker::create_tracker(-1, "root"); + _pool_tracker = MemTracker::create_tracker(-1, "mem-pool", _tracker); _mem_pool.reset(new MemPool(_pool_tracker.get())); _state = _pool.add(new RuntimeState(TQueryGlobals())); _state->init_instance_mem_tracker(); @@ -196,7 +196,7 @@ TEST_F(HashTableTest, SetupTest) { // The hash table is rehashed a few times and the scans/finds are tested again. TEST_F(HashTableTest, BasicTest) { std::shared_ptr hash_table_tracker = - MemTracker::CreateTracker(-1, "hash-table-basic-tracker", _tracker); + MemTracker::create_tracker(-1, "hash-table-basic-tracker", _tracker); TupleRow* build_rows[5]; TupleRow* scan_rows[5] = {0}; @@ -260,7 +260,7 @@ TEST_F(HashTableTest, BasicTest) { // This tests makes sure we can scan ranges of buckets TEST_F(HashTableTest, ScanTest) { std::shared_ptr hash_table_tracker = - MemTracker::CreateTracker(-1, "hash-table-scan-tracker", _tracker); + MemTracker::create_tracker(-1, "hash-table-scan-tracker", _tracker); std::vector is_null_safe = {false}; int initial_seed = 1; @@ -314,7 +314,7 @@ TEST_F(HashTableTest, GrowTableTest) { int expected_size = 0; std::shared_ptr mem_tracker = - MemTracker::CreateTracker(1024 * 1024, "hash-table-grow-tracker", _tracker); + MemTracker::create_tracker(1024 * 1024, "hash-table-grow-tracker", _tracker); std::vector is_null_safe = {false}; int initial_seed = 1; int64_t num_buckets = 4; @@ -355,7 +355,7 @@ TEST_F(HashTableTest, GrowTableTest2) { int build_row_val = 0; std::shared_ptr mem_tracker = - MemTracker::CreateTracker(1024 * 1024 * 1024, "hash-table-grow2-tracker", _tracker); + MemTracker::create_tracker(1024 * 1024 * 1024, "hash-table-grow2-tracker", _tracker); std::vector is_null_safe = {false}; int initial_seed = 1; int64_t num_buckets = 4; diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp index 3d55699a6f6a8c..e59e972097ff78 100644 --- a/be/test/exec/tablet_sink_test.cpp +++ b/be/test/exec/tablet_sink_test.cpp @@ -57,6 +57,7 @@ class OlapTableSinkTest : public testing::Test { _env->_internal_client_cache = new BrpcClientCache(); _env->_function_client_cache = new BrpcClientCache(); _env->_buffer_reservation = new ReservationTracker(); + _env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool()); ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(1) .set_max_threads(5) diff --git a/be/test/exprs/bloom_filter_predicate_test.cpp b/be/test/exprs/bloom_filter_predicate_test.cpp index ca6e5a9f0d4795..1cba866e7d1fe8 100644 --- a/be/test/exprs/bloom_filter_predicate_test.cpp +++ b/be/test/exprs/bloom_filter_predicate_test.cpp @@ -31,7 +31,7 @@ class BloomFilterPredicateTest : public testing::Test { }; TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) { - auto tracker = MemTracker::CreateTracker(); + auto tracker = MemTracker::create_tracker(); std::unique_ptr func( create_bloom_filter(tracker.get(), PrimitiveType::TYPE_INT)); ASSERT_TRUE(func->init(1024, 0.05).ok()); @@ -53,7 +53,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) { } TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { - auto tracker = MemTracker::CreateTracker(); + auto tracker = MemTracker::create_tracker(); std::unique_ptr func( create_bloom_filter(tracker.get(), PrimitiveType::TYPE_VARCHAR)); ASSERT_TRUE(func->init(1024, 0.05).ok()); @@ -104,7 +104,7 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) { } TEST_F(BloomFilterPredicateTest, bloom_filter_size_test) { - auto tracker = MemTracker::CreateTracker(); + auto tracker = MemTracker::create_tracker(); std::unique_ptr func( create_bloom_filter(tracker.get(), PrimitiveType::TYPE_VARCHAR)); int length = 4096; diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp index 24abea12151ea9..7921fc88de9dda 100644 --- a/be/test/olap/bloom_filter_column_predicate_test.cpp +++ b/be/test/olap/bloom_filter_column_predicate_test.cpp @@ -95,7 +95,7 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) { return_columns.push_back(i); } - auto tracker = MemTracker::CreateTracker(-1, "OlapScanner"); + auto tracker = MemTracker::create_tracker(-1, "OlapScanner"); std::shared_ptr bloom_filter( create_bloom_filter(tracker.get(), PrimitiveType::TYPE_FLOAT)); diff --git a/be/test/olap/cumulative_compaction_policy_test.cpp b/be/test/olap/cumulative_compaction_policy_test.cpp index 950fcbf437330b..929149a0b00d29 100644 --- a/be/test/olap/cumulative_compaction_policy_test.cpp +++ b/be/test/olap/cumulative_compaction_policy_test.cpp @@ -1043,7 +1043,7 @@ TEST_F(TestSizeBasedCumulativeCompactionPolicy, _pick_missing_version_cumulative rowsets.push_back(_tablet->get_rowset_by_version({2, 2})); rowsets.push_back(_tablet->get_rowset_by_version({4, 4})); std::shared_ptr mem_tracker(new MemTracker()); - CumulativeCompaction compaction(_tablet, "label", mem_tracker); + CumulativeCompaction compaction(_tablet, mem_tracker); compaction.find_longest_consecutive_version(&rowsets, nullptr); ASSERT_EQ(3, rowsets.size()); ASSERT_EQ(2, rowsets[2]->end_version()); diff --git a/be/test/olap/generic_iterators_test.cpp b/be/test/olap/generic_iterators_test.cpp index b73ad0271e6531..c21f6b66a89f3c 100644 --- a/be/test/olap/generic_iterators_test.cpp +++ b/be/test/olap/generic_iterators_test.cpp @@ -83,8 +83,8 @@ TEST(GenericIteratorsTest, Union) { inputs.push_back(new_auto_increment_iterator(schema, 200)); inputs.push_back(new_auto_increment_iterator(schema, 300)); - auto iter = new_union_iterator(inputs, - MemTracker::CreateTracker(-1, "UnionIterator", nullptr, false)); + auto iter = + new_union_iterator(inputs, MemTracker::create_tracker(-1, "UnionIterator", nullptr)); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -123,8 +123,8 @@ TEST(GenericIteratorsTest, Merge) { inputs.push_back(new_auto_increment_iterator(schema, 200)); inputs.push_back(new_auto_increment_iterator(schema, 300)); - auto iter = new_merge_iterator( - std::move(inputs), MemTracker::CreateTracker(-1, "MergeIterator", nullptr, false), -1); + auto iter = new_merge_iterator(std::move(inputs), + MemTracker::create_tracker(-1, "MergeIterator", nullptr), -1); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp index c3b11efc7687bf..072f46393aea1d 100644 --- a/be/test/olap/rowset/beta_rowset_test.cpp +++ b/be/test/olap/rowset/beta_rowset_test.cpp @@ -175,8 +175,7 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) { // k2 := k1 * 10 // k3 := 4096 * i + rid for (int i = 0; i < num_segments; ++i) { - auto tracker = std::make_shared(); - MemPool mem_pool(tracker.get()); + MemPool mem_pool("BetaRowsetTest"); for (int rid = 0; rid < rows_per_segment; ++rid) { uint32_t k1 = rid * 10 + i; uint32_t k2 = k1 * 10; diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp index b2c4017ea1e47f..378b9c5d083e5b 100644 --- a/be/test/runtime/mem_limit_test.cpp +++ b/be/test/runtime/mem_limit_test.cpp @@ -24,121 +24,121 @@ namespace doris { TEST(MemTrackerTest, SingleTrackerNoLimit) { - auto t = MemTracker::CreateTracker(); + auto t = MemTracker::create_tracker(); EXPECT_FALSE(t->has_limit()); - t->Consume(10); + t->consume(10); EXPECT_EQ(t->consumption(), 10); - t->Consume(10); + t->consume(10); EXPECT_EQ(t->consumption(), 20); - t->Release(15); + t->release(15); EXPECT_EQ(t->consumption(), 5); - EXPECT_FALSE(t->LimitExceeded(MemLimit::HARD)); - t->Release(5); + EXPECT_FALSE(t->limit_exceeded()); + t->release(5); } TEST(MemTestTest, SingleTrackerWithLimit) { - auto t = MemTracker::CreateTracker(11, "limit tracker"); + auto t = MemTracker::create_tracker(11, "limit tracker"); EXPECT_TRUE(t->has_limit()); - t->Consume(10); + t->consume(10); EXPECT_EQ(t->consumption(), 10); - EXPECT_FALSE(t->LimitExceeded(MemLimit::HARD)); - t->Consume(10); + EXPECT_FALSE(t->limit_exceeded()); + t->consume(10); EXPECT_EQ(t->consumption(), 20); - EXPECT_TRUE(t->LimitExceeded(MemLimit::HARD)); - t->Release(15); + EXPECT_TRUE(t->limit_exceeded()); + t->release(15); EXPECT_EQ(t->consumption(), 5); - EXPECT_FALSE(t->LimitExceeded(MemLimit::HARD)); - t->Release(5); + EXPECT_FALSE(t->limit_exceeded()); + t->release(5); } TEST(MemTestTest, TrackerHierarchy) { - auto p = MemTracker::CreateTracker(100); - auto c1 = MemTracker::CreateTracker(80, "c1", p); - auto c2 = MemTracker::CreateTracker(50, "c2", p); + auto p = MemTracker::create_tracker(100); + auto c1 = MemTracker::create_tracker(80, "c1", p); + auto c2 = MemTracker::create_tracker(50, "c2", p); // everything below limits - c1->Consume(60); + c1->consume(60); EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_FALSE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c2->limit_exceeded()); + EXPECT_FALSE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(p->limit_exceeded()); + EXPECT_FALSE(p->any_limit_exceeded()); // p goes over limit - c2->Consume(50); + c2->consume(50); EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_TRUE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_TRUE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 50); - EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_TRUE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c2->limit_exceeded()); + EXPECT_TRUE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 110); - EXPECT_TRUE(p->LimitExceeded(MemLimit::HARD)); + EXPECT_TRUE(p->limit_exceeded()); // c2 goes over limit, p drops below limit - c1->Release(20); - c2->Consume(10); + c1->release(20); + c2->consume(10); EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_FALSE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 60); - EXPECT_TRUE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_TRUE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_TRUE(c2->limit_exceeded()); + EXPECT_TRUE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 100); - EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD)); - c1->Release(40); - c2->Release(60); + EXPECT_FALSE(p->limit_exceeded()); + c1->release(40); + c2->release(60); } TEST(MemTestTest, TrackerHierarchyTryConsume) { - auto p = MemTracker::CreateTracker(100); - auto c1 = MemTracker::CreateTracker(80, "c1", p); - auto c2 = MemTracker::CreateTracker(50, "c2", p); + auto p = MemTracker::create_tracker(100); + auto c1 = MemTracker::create_tracker(80, "c1", p); + auto c2 = MemTracker::create_tracker(50, "c2", p); // everything below limits - bool consumption = c1->TryConsume(60).ok(); + bool consumption = c1->try_consume(60).ok(); EXPECT_EQ(consumption, true); EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_FALSE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c2->limit_exceeded()); + EXPECT_FALSE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(p->limit_exceeded()); + EXPECT_FALSE(p->any_limit_exceeded()); // p goes over limit - consumption = c2->TryConsume(50).ok(); + consumption = c2->try_consume(50).ok(); EXPECT_EQ(consumption, false); EXPECT_EQ(c1->consumption(), 60); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_FALSE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 0); - EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c2->limit_exceeded()); + EXPECT_FALSE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 60); - EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(p->limit_exceeded()); + EXPECT_FALSE(p->any_limit_exceeded()); // c2 goes over limit, p drops below limit - c1->Release(20); - c2->Consume(10); + c1->release(20); + c2->consume(10); EXPECT_EQ(c1->consumption(), 40); - EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c1->limit_exceeded()); + EXPECT_FALSE(c1->any_limit_exceeded()); EXPECT_EQ(c2->consumption(), 10); - EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD)); - EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(c2->limit_exceeded()); + EXPECT_FALSE(c2->any_limit_exceeded()); EXPECT_EQ(p->consumption(), 50); - EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD)); + EXPECT_FALSE(p->limit_exceeded()); - c1->Release(40); - c2->Release(10); + c1->release(40); + c2->release(10); } } // end namespace doris diff --git a/be/test/runtime/memory/chunk_allocator_test.cpp b/be/test/runtime/memory/chunk_allocator_test.cpp index 1b009d90e24b4c..78c623ef20d830 100644 --- a/be/test/runtime/memory/chunk_allocator_test.cpp +++ b/be/test/runtime/memory/chunk_allocator_test.cpp @@ -20,6 +20,7 @@ #include #include "common/config.h" +#include "common/status.h" #include "runtime/memory/chunk.h" #include "util/cpu_info.h" #include "util/doris_metrics.h" @@ -30,7 +31,7 @@ TEST(ChunkAllocatorTest, Normal) { config::use_mmap_allocate_chunk = true; for (size_t size = 4096; size <= 1024 * 1024; size <<= 1) { Chunk chunk; - ASSERT_TRUE(ChunkAllocator::instance()->allocate(size, &chunk)); + ASSERT_TRUE(ChunkAllocator::instance()->allocate(size, &chunk).ok()); ASSERT_NE(nullptr, chunk.data); ASSERT_EQ(size, chunk.size); ChunkAllocator::instance()->free(chunk); diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp index b2443aced80aa7..e20f1023fce790 100644 --- a/be/test/runtime/memory_scratch_sink_test.cpp +++ b/be/test/runtime/memory_scratch_sink_test.cpp @@ -115,7 +115,7 @@ void MemoryScratchSinkTest::init_runtime_state() { _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _env->exec_env()); _state->init_instance_mem_tracker(); _mem_tracker = - MemTracker::CreateTracker(-1, "MemoryScratchSinkTest", _state->instance_mem_tracker()); + MemTracker::create_tracker(-1, "MemoryScratchSinkTest", _state->instance_mem_tracker()); _state->set_desc_tbl(_desc_tbl); _state->_load_dir = "./test_run/output/"; _state->init_mem_trackers(TUniqueId()); diff --git a/be/test/runtime/test_env.cc b/be/test/runtime/test_env.cc index eafaaed4badc80..6068e529863614 100644 --- a/be/test/runtime/test_env.cc +++ b/be/test/runtime/test_env.cc @@ -31,15 +31,14 @@ namespace doris { TestEnv::TestEnv() - : _block_mgr_parent_tracker(MemTracker::CreateTracker(-1, "BufferedBlockMgr2")), - _io_mgr_tracker(MemTracker::CreateTracker(-1, "DiskIoMgr")) { + : _block_mgr_parent_tracker(MemTracker::create_tracker(-1, "BufferedBlockMgr2")) { // Some code will use ExecEnv::GetInstance(), so init the global ExecEnv singleton _exec_env = ExecEnv::GetInstance(); _exec_env->_thread_mgr = new ThreadResourceMgr(2); _exec_env->_buffer_reservation = new ReservationTracker(); - _exec_env->_mem_tracker = MemTracker::CreateTracker(-1, "TestEnv"); + _exec_env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool()); _exec_env->_disk_io_mgr = new DiskIoMgr(1, 1, 1, 10); - _exec_env->disk_io_mgr()->init(_io_mgr_tracker); + _exec_env->disk_io_mgr()->init(-1); _exec_env->_scan_thread_pool = new PriorityThreadPool(1, 16); _exec_env->_result_queue_mgr = new ResultQueueMgr(); // TODO may need rpc support, etc. diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp index 658a5ac3a6f698..5a5f2dc36c14e6 100644 --- a/be/test/util/arrow/arrow_work_flow_test.cpp +++ b/be/test/util/arrow/arrow_work_flow_test.cpp @@ -91,6 +91,7 @@ void ArrowWorkFlowTest::init_runtime_state() { _exec_env->_result_queue_mgr = new ResultQueueMgr(); _exec_env->_thread_mgr = new ThreadResourceMgr(); _exec_env->_buffer_reservation = new ReservationTracker(); + _exec_env->_task_pool_mem_tracker_registry.reset(new MemTrackerTaskPool()); TQueryOptions query_options; query_options.batch_size = 1024; TUniqueId query_id; @@ -99,7 +100,7 @@ void ArrowWorkFlowTest::init_runtime_state() { _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env); _state->init_instance_mem_tracker(); _mem_tracker = - MemTracker::CreateTracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker()); + MemTracker::create_tracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker()); _state->set_desc_tbl(_desc_tbl); _state->_load_dir = "./test_run/output/"; _state->init_mem_trackers(TUniqueId()); diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 3d8366d1d352c2..92d9b28c651c39 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -57,7 +57,7 @@ TEST(BlockTest, RowBatchCovertToBlock) { auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false); + auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); int16_t k1 = -100; diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp b/be/test/vec/exec/vgeneric_iterators_test.cpp index 405c9a9103e1d2..94864673a250e9 100644 --- a/be/test/vec/exec/vgeneric_iterators_test.cpp +++ b/be/test/vec/exec/vgeneric_iterators_test.cpp @@ -103,7 +103,7 @@ TEST(VGenericIteratorsTest, Union) { inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200)); inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300)); - auto iter = vectorized::new_union_iterator(inputs, MemTracker::CreateTracker(-1, "VUnionIterator", nullptr, false)); + auto iter = vectorized::new_union_iterator(inputs, MemTracker::create_tracker(-1, "VUnionIterator", nullptr)); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -149,7 +149,7 @@ TEST(VGenericIteratorsTest, Merge) { inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200)); inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300)); - auto iter = vectorized::new_merge_iterator(inputs, MemTracker::CreateTracker(-1, "VMergeIterator", nullptr, false), -1); + auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), -1); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); @@ -276,7 +276,7 @@ TEST(VGenericIteratorsTest, MergeWithSeqColumn) { inputs.push_back(new SeqColumnUtIterator(schema, num_rows, rows_begin, seq_column_id, seq_id_in_every_file)); } - auto iter = vectorized::new_merge_iterator(inputs, MemTracker::CreateTracker(-1, "VMergeIterator", nullptr, false), seq_column_id); + auto iter = vectorized::new_merge_iterator(inputs, MemTracker::create_tracker(-1, "VMergeIterator", nullptr), seq_column_id); StorageReadOptions opts; auto st = iter->init(opts); ASSERT_TRUE(st.ok()); diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index c0d7bcc25a9951..cd2335ade3a669 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -48,7 +48,7 @@ TEST(TEST_VEXPR, ABSTEST) { auto tuple_desc = const_cast(desc_tbl->get_tuple_descriptor(0)); doris::RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = doris::MemTracker::CreateTracker(-1, "BlockTest", nullptr, false); + auto tracker_ptr = doris::MemTracker::create_tracker(-1, "BlockTest", nullptr); doris::RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; @@ -72,7 +72,7 @@ TEST(TEST_VEXPR, ABSTEST) { doris::TQueryGlobals(), nullptr); runtime_stat.init_instance_mem_tracker(); runtime_stat.set_desc_tbl(desc_tbl); - std::shared_ptr tracker = doris::MemTracker::CreateTracker(); + std::shared_ptr tracker = doris::MemTracker::create_tracker(); context->prepare(&runtime_stat, row_desc, tracker); context->open(&runtime_stat); @@ -96,7 +96,7 @@ TEST(TEST_VEXPR, ABSTEST2) { schema_scanner.init(¶m, &object_pool); auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false); + auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); std::string expr_json = R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|"; @@ -123,7 +123,7 @@ TEST(TEST_VEXPR, ABSTEST2) { DescriptorTbl desc_tbl; desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0]; runtime_stat.set_desc_tbl(&desc_tbl); - std::shared_ptr tracker = doris::MemTracker::CreateTracker(); + std::shared_ptr tracker = doris::MemTracker::create_tracker(); context->prepare(&runtime_stat, row_desc, tracker); context->open(&runtime_stat); diff --git a/be/test/vec/function/function_comparison_test.cpp b/be/test/vec/function/function_comparison_test.cpp index 8050f431b4bb0e..1668a253bf22ef 100644 --- a/be/test/vec/function/function_comparison_test.cpp +++ b/be/test/vec/function/function_comparison_test.cpp @@ -38,7 +38,7 @@ TEST(ComparisonTest, ComparisonFunctionTest) { auto tuple_desc = const_cast(schema_scanner.tuple_desc()); RowDescriptor row_desc(tuple_desc, false); - auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false); + auto tracker_ptr = MemTracker::create_tracker(-1, "BlockTest", nullptr); RowBatch row_batch(row_desc, 1024, tracker_ptr.get()); int16_t k1 = -100; diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md index c917492620b328..b93c7c0a173ba4 100644 --- a/docs/en/administrator-guide/config/be_config.md +++ b/docs/en/administrator-guide/config/be_config.md @@ -1430,11 +1430,25 @@ The size of the buffer before flashing * Type: int16 * Description: The level at which MemTracker is displayed on the Web page equal or lower than this level will be displayed on the Web page ``` - RELEASE = 0 - DEBUG = 1 + OVERVIEW = 0 + TASK = 1 + INSTANCE = 2 + VERBOSE = 3 ``` * Default: 0 +### `mem_tracker_consume_min_size_bytes` + +* Type: int32 +* Description: The minimum length of TCMalloc Hook when consume/release MemTracker. Consume size smaller than this value will continue to accumulate to avoid frequent calls to consume/release of MemTracker. Decreasing this value will increase the frequency of consume/release. Increasing this value will cause MemTracker statistics to be inaccurate. Theoretically, the statistical value of a MemTracker differs from the true value = ( mem_tracker_consume_min_size_bytes * the number of BE threads where the MemTracker is located). +* Default: 1048576 + +### `memory_leak_detection` + +* Type: bool +* Description: Whether to start memory leak detection, when MemTracker is a negative value, it is considered that a memory leak has occurred, but the actual MemTracker records inaccurately will also cause a negative value, so this feature is in the experimental stage. +* Default: false + ### `max_segment_num_per_rowset` * Type: int32 diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md index 65277f2600ee00..53fdb23a52cc04 100644 --- a/docs/zh-CN/administrator-guide/config/be_config.md +++ b/docs/zh-CN/administrator-guide/config/be_config.md @@ -1449,11 +1449,25 @@ webserver默认工作线程数 * 类型: int16 * 描述: MemTracker在Web页面上展示的级别,等于或低于这个级别的MemTracker会在Web页面上展示 ``` - RELEASE = 0 - DEBUG = 1 + OVERVIEW = 0 + TASK = 1 + INSTANCE = 2 + VERBOSE = 3 ``` * 默认值: 0 +### `mem_tracker_consume_min_size_bytes` + +* 类型: int32 +* 描述: TCMalloc Hook consume/release MemTracker时的最小长度,小于该值的consume size会持续累加,避免频繁调用MemTracker的consume/release,减小该值会增加consume/release的频率,增大该值会导致MemTracker统计不准,理论上一个MemTracker的统计值与真实值相差 = (mem_tracker_consume_min_size_bytes * 这个MemTracker所在的BE线程数)。 +* 默认值: 1048576 + +### `memory_leak_detection` + +* 类型: bool +* 描述: 是否启动内存泄漏检测,当 MemTracker 为负值时认为发生了内存泄漏,但实际 MemTracker 记录不准确时也会导致负值,所以这个功能处于实验阶段。 +* 默认值: false + ### `max_segment_num_per_rowset` * 类型: int32