From e8ea6c16e1c828a7f95d405b338c51ca82263432 Mon Sep 17 00:00:00 2001 From: huangwei Date: Thu, 11 Jun 2020 15:43:09 +0800 Subject: [PATCH 01/12] [] update MemTracker & make memtracker pointers shared (need test) --- be/src/common/atomic.h | 6 + be/src/common/config.h | 2 + be/src/common/logging.h | 6 + be/src/exec/aggregation_node.cpp | 8 +- be/src/exec/analytic_eval_node.cpp | 22 +- be/src/exec/base_scanner.cpp | 32 +- be/src/exec/base_scanner.h | 2 +- be/src/exec/blocking_join_node.cpp | 4 +- be/src/exec/broker_scan_node.cpp | 4 +- be/src/exec/cross_join_node.cpp | 2 +- be/src/exec/csv_scan_node.cpp | 2 +- be/src/exec/data_sink.cpp | 4 +- be/src/exec/data_sink.h | 4 +- be/src/exec/es_http_scan_node.cpp | 2 +- be/src/exec/es_http_scanner.cpp | 33 +- be/src/exec/es_http_scanner.h | 2 +- be/src/exec/except_node.cpp | 2 +- be/src/exec/exec_node.cpp | 27 +- be/src/exec/exec_node.h | 12 +- be/src/exec/hash_join_node.cpp | 6 +- be/src/exec/hash_table.cpp | 16 +- be/src/exec/hash_table.h | 4 +- be/src/exec/intersect_node.cpp | 2 +- be/src/exec/merge_join_node.h | 4 +- be/src/exec/merge_node.cpp | 2 +- be/src/exec/mysql_scan_node.cpp | 2 +- be/src/exec/olap_rewrite_node.cpp | 2 +- be/src/exec/olap_scan_node.cpp | 2 +- be/src/exec/partitioned_aggregation_node.cc | 14 +- be/src/exec/partitioned_hash_table.cc | 82 +- be/src/exec/partitioned_hash_table.h | 17 +- be/src/exec/repeat_node.cpp | 2 +- be/src/exec/schema_scan_node.cpp | 2 +- be/src/exec/select_node.cpp | 2 +- be/src/exec/set_operation_node.cpp | 4 +- be/src/exec/sort_exec_exprs.cpp | 2 +- be/src/exec/sort_exec_exprs.h | 2 +- be/src/exec/spill_sort_node.cc | 2 +- be/src/exec/tablet_info.cpp | 2 +- be/src/exec/tablet_info.h | 2 +- be/src/exec/tablet_sink.cpp | 15 +- be/src/exec/tablet_sink.h | 4 +- be/src/exec/topn_node.cpp | 6 +- be/src/exec/union_node.cpp | 6 +- be/src/exprs/agg_fn_evaluator.cpp | 8 +- be/src/exprs/agg_fn_evaluator.h | 4 +- be/src/exprs/expr.cpp | 10 +- be/src/exprs/expr.h | 10 +- be/src/exprs/expr_context.cpp | 8 +- be/src/exprs/expr_context.h | 2 +- be/src/exprs/new_agg_fn_evaluator.cc | 8 +- be/src/exprs/new_agg_fn_evaluator.h | 8 +- be/src/http/default_path_handlers.cpp | 6 +- be/src/http/default_path_handlers.h | 3 +- be/src/olap/aggregate_func.h | 4 +- be/src/olap/delta_writer.cpp | 23 +- be/src/olap/delta_writer.h | 6 +- be/src/olap/fs/file_block_manager.cpp | 2 +- be/src/olap/fs/file_block_manager.h | 2 +- be/src/olap/memtable.cpp | 30 +- be/src/olap/memtable.h | 4 +- be/src/olap/merger.cpp | 2 +- be/src/olap/olap_index.cpp | 15 +- be/src/olap/olap_index.h | 2 +- be/src/olap/reader.h | 2 +- be/src/olap/row_block.cpp | 11 +- be/src/olap/row_block.h | 2 +- be/src/olap/row_block2.cpp | 15 +- be/src/olap/row_block2.h | 2 +- be/src/olap/rowset/segment_reader.cpp | 71 +- be/src/olap/rowset/segment_reader.h | 2 +- .../rowset/segment_v2/binary_dict_page.cpp | 3 +- .../olap/rowset/segment_v2/binary_dict_page.h | 2 +- .../rowset/segment_v2/bitmap_index_reader.h | 14 +- .../rowset/segment_v2/bitmap_index_writer.cpp | 7 +- .../segment_v2/bloom_filter_index_reader.h | 5 +- .../segment_v2/bloom_filter_index_writer.cpp | 12 +- be/src/olap/rowset/segment_v2/column_reader.h | 20 +- .../segment_v2/indexed_column_writer.cpp | 7 +- .../rowset/segment_v2/indexed_column_writer.h | 2 +- .../olap/rowset/segment_v2/zone_map_index.cpp | 7 +- .../olap/rowset/segment_v2/zone_map_index.h | 2 +- be/src/olap/schema_change.cpp | 2 +- be/src/olap/task/engine_checksum_task.cpp | 2 +- be/src/runtime/buffered_block_mgr2.cc | 86 +- be/src/runtime/buffered_block_mgr2.h | 11 +- be/src/runtime/buffered_tuple_stream2.cc | 2 +- be/src/runtime/buffered_tuple_stream3.cc | 4 +- be/src/runtime/buffered_tuple_stream3.h | 2 +- be/src/runtime/bufferpool/buffer_pool.cc | 8 +- be/src/runtime/bufferpool/buffer_pool.h | 4 +- .../runtime/bufferpool/buffer_pool_internal.h | 2 +- .../runtime/bufferpool/reservation_tracker.cc | 14 +- be/src/runtime/data_spliter.cpp | 9 +- be/src/runtime/data_stream_recvr.cc | 7 +- be/src/runtime/data_stream_recvr.h | 6 +- be/src/runtime/data_stream_sender.cpp | 11 +- be/src/runtime/data_stream_sender.h | 2 +- be/src/runtime/disk_io_mgr.cc | 45 +- be/src/runtime/disk_io_mgr.h | 18 +- be/src/runtime/disk_io_mgr_internal.h | 6 +- be/src/runtime/disk_io_mgr_reader_context.cc | 5 +- be/src/runtime/dpp_sink.cpp | 4 +- be/src/runtime/dpp_sink_internal.cpp | 15 +- be/src/runtime/dpp_sink_internal.h | 4 +- be/src/runtime/exec_env.cpp | 9 +- be/src/runtime/exec_env.h | 51 +- be/src/runtime/exec_env_init.cpp | 4 +- be/src/runtime/export_sink.cpp | 2 +- be/src/runtime/export_sink.h | 2 +- be/src/runtime/initial_reservations.cc | 10 +- be/src/runtime/initial_reservations.h | 4 +- be/src/runtime/load_channel.cpp | 4 +- be/src/runtime/load_channel.h | 4 +- be/src/runtime/load_channel_mgr.cpp | 2 +- be/src/runtime/load_channel_mgr.h | 2 +- be/src/runtime/mem_pool.cpp | 18 +- be/src/runtime/mem_tracker.cpp | 665 +++++++------ be/src/runtime/mem_tracker.h | 908 ++++++++++-------- be/src/runtime/memory_scratch_sink.cpp | 3 +- be/src/runtime/mysql_table_sink.cpp | 2 +- be/src/runtime/plan_fragment_executor.cpp | 6 +- be/src/runtime/plan_fragment_executor.h | 3 +- be/src/runtime/qsorter.cpp | 2 +- be/src/runtime/result_sink.cpp | 3 +- be/src/runtime/row_batch.cpp | 14 +- be/src/runtime/runtime_state.cpp | 25 +- be/src/runtime/runtime_state.h | 30 +- be/src/runtime/spill_sorter.cc | 6 +- be/src/runtime/spill_sorter.h | 4 +- be/src/runtime/tablets_channel.cpp | 4 +- be/src/runtime/tablets_channel.h | 4 +- be/src/runtime/test_env.cc | 4 +- be/src/runtime/test_env.h | 8 +- be/src/testutil/function_utils.cpp | 9 +- be/src/testutil/function_utils.h | 4 +- be/src/util/arrow/row_batch.cpp | 8 +- be/src/util/arrow/row_batch.h | 2 +- be/test/exec/broker_scan_node_test.cpp | 4 +- be/test/exec/broker_scanner_test.cpp | 30 +- be/test/exec/csv_scan_node_test.cpp | 15 +- be/test/exec/es_scan_node_test.cpp | 2 +- be/test/exec/hash_table_test.cpp | 19 +- be/test/exec/orc_scanner_test.cpp | 15 +- be/test/exec/parquet_scanner_test.cpp | 4 +- be/test/exec/tablet_info_test.cpp | 8 +- be/test/exec/tablet_sink_test.cpp | 20 +- be/test/olap/aggregate_func_test.cpp | 10 +- be/test/olap/column_reader_test.cpp | 2 +- be/test/olap/comparison_predicate_test.cpp | 2 +- be/test/olap/delta_writer_test.cpp | 9 +- be/test/olap/in_list_predicate_test.cpp | 2 +- be/test/olap/key_coder_test.cpp | 4 +- be/test/olap/null_predicate_test.cpp | 2 +- be/test/olap/row_block_v2_test.cpp | 4 +- be/test/olap/row_cursor_test.cpp | 6 +- be/test/olap/rowset/alpha_rowset_test.cpp | 2 +- be/test/olap/rowset/beta_rowset_test.cpp | 4 +- be/test/olap/rowset/rowset_converter_test.cpp | 2 +- .../segment_v2/binary_dict_page_test.cpp | 8 +- .../segment_v2/binary_plain_page_test.cpp | 6 +- .../segment_v2/binary_prefix_page_test.cpp | 8 +- .../rowset/segment_v2/bitmap_index_test.cpp | 4 +- .../segment_v2/bitshuffle_page_test.cpp | 8 +- .../segment_v2/bloom_filter_page_test.cpp | 4 +- .../segment_v2/column_reader_writer_test.cpp | 22 +- .../frame_of_reference_page_test.cpp | 8 +- .../rowset/segment_v2/plain_page_test.cpp | 8 +- .../olap/rowset/segment_v2/rle_page_test.cpp | 8 +- .../olap/rowset/segment_v2/segment_test.cpp | 4 +- be/test/olap/schema_change_test.cpp | 2 +- be/test/olap/skiplist_test.cpp | 16 +- be/test/olap/storage_types_test.cpp | 8 +- be/test/runtime/CMakeLists.txt | 11 +- be/test/runtime/buffered_block_mgr2_test.cpp | 47 +- .../runtime/buffered_tuple_stream2_test.cpp | 16 +- be/test/runtime/data_stream_test.cpp | 18 +- be/test/runtime/disk_io_mgr_test.cpp | 140 ++- be/test/runtime/load_channel_mgr_test.cpp | 28 +- be/test/runtime/mem_limit_test.cpp | 206 ++-- be/test/runtime/memory_scratch_sink_test.cpp | 3 +- be/test/util/arrow/arrow_row_batch_test.cpp | 20 +- be/test/util/arrow/arrow_row_block_test.cpp | 24 +- be/test/util/arrow/arrow_work_flow_test.cpp | 25 +- 184 files changed, 1906 insertions(+), 1730 deletions(-) diff --git a/be/src/common/atomic.h b/be/src/common/atomic.h index 766c496f404fd9..a675c2d604334b 100644 --- a/be/src/common/atomic.h +++ b/be/src/common/atomic.h @@ -201,6 +201,12 @@ class AtomicPtr { /// Atomic store with "release" memory-ordering semantic. inline void store(T* val) { _ptr.store(reinterpret_cast(val)); } + /// Store 'new_val' and return the previous value. Implies a Release memory barrier + /// (i.e. the same as Store()). + inline T* swap(T* val) { + return reinterpret_cast(_ptr.swap(reinterpret_cast(val))); + } + private: AtomicInt _ptr; }; diff --git a/be/src/common/config.h b/be/src/common/config.h index d0858aa279e95f..31a62b76a0c219 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -544,6 +544,8 @@ namespace config { // Whether to continue to start be when load tablet from header failed. CONF_Bool(ignore_rowset_stale_unconsistent_delete, "false"); + // Soft memory limit as a fraction of hard memory limit. + CONF_Double(soft_mem_limit_frac, "0.9"); } // namespace config } // namespace doris diff --git a/be/src/common/logging.h b/be/src/common/logging.h index cad58f39a0fd6e..14545bc78940b7 100644 --- a/be/src/common/logging.h +++ b/be/src/common/logging.h @@ -61,4 +61,10 @@ #define VLOG_ROW_IS_ON VLOG_IS_ON(3) #define VLOG_PROGRESS_IS_ON VLOG_IS_ON(2) +/// Define a wrapper around DCHECK for strongly typed enums that print a useful error +/// message on failure. +#define DCHECK_ENUM_EQ(a, b) \ + DCHECK(a == b) << "[ " #a " = " << static_cast(a) << " , " #b " = " \ + << static_cast(b) << " ]" + #endif diff --git a/be/src/exec/aggregation_node.cpp b/be/src/exec/aggregation_node.cpp index bea37b48e4e533..990fb1641ea10d 100644 --- a/be/src/exec/aggregation_node.cpp +++ b/be/src/exec/aggregation_node.cpp @@ -113,7 +113,7 @@ Status AggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(Expr::prepare( _build_expr_ctxs, state, build_row_desc, expr_mem_tracker())); - _tuple_pool.reset(new MemPool(mem_tracker())); + _tuple_pool.reset(new MemPool(mem_tracker().get())); _agg_fn_ctxs.resize(_aggregate_evaluators.size()); int j = _probe_expr_ctxs.size(); @@ -128,8 +128,8 @@ Status AggregationNode::prepare(RuntimeState* state) { SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j]; SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j]; RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare( - state, child(0)->row_desc(), _tuple_pool.get(), - intermediate_slot_desc, output_slot_desc, mem_tracker(), &_agg_fn_ctxs[i])); + state, child(0)->row_desc(), _tuple_pool.get(), intermediate_slot_desc, + output_slot_desc, mem_tracker(), &_agg_fn_ctxs[i])); state->obj_pool()->add(_agg_fn_ctxs[i]); } @@ -160,7 +160,7 @@ Status AggregationNode::open(RuntimeState* state) { RETURN_IF_ERROR(_children[0]->open(state)); - RowBatch batch(_children[0]->row_desc(), state->batch_size(), mem_tracker()); + RowBatch batch(_children[0]->row_desc(), state->batch_size(), mem_tracker().get()); int64_t num_input_rows = 0; int64_t num_agg_rows = 0; diff --git a/be/src/exec/analytic_eval_node.cpp b/be/src/exec/analytic_eval_node.cpp index 400faa5a250ddb..b4e97e506e8ee3 100644 --- a/be/src/exec/analytic_eval_node.cpp +++ b/be/src/exec/analytic_eval_node.cpp @@ -146,18 +146,18 @@ Status AnalyticEvalNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); DCHECK(child(0)->row_desc().is_prefix_of(row_desc())); _child_tuple_desc = child(0)->row_desc().tuple_descriptors()[0]; - _curr_tuple_pool.reset(new MemPool(mem_tracker())); - _prev_tuple_pool.reset(new MemPool(mem_tracker())); - _mem_pool.reset(new MemPool(mem_tracker())); + _curr_tuple_pool.reset(new MemPool(mem_tracker().get())); + _prev_tuple_pool.reset(new MemPool(mem_tracker().get())); + _mem_pool.reset(new MemPool(mem_tracker().get())); _evaluation_timer = ADD_TIMER(runtime_profile(), "EvaluationTime"); DCHECK_EQ(_result_tuple_desc->slots().size(), _evaluators.size()); for (int i = 0; i < _evaluators.size(); ++i) { doris_udf::FunctionContext* ctx; - RETURN_IF_ERROR(_evaluators[i]->prepare(state, child(0)->row_desc(), _mem_pool.get(), - _intermediate_tuple_desc->slots()[i], _result_tuple_desc->slots()[i], - mem_tracker(), &ctx)); + RETURN_IF_ERROR(_evaluators[i]->prepare( + state, child(0)->row_desc(), _mem_pool.get(), _intermediate_tuple_desc->slots()[i], + _result_tuple_desc->slots()[i], mem_tracker(), &ctx)); _fn_ctxs.push_back(ctx); state->obj_pool()->add(ctx); } @@ -171,13 +171,13 @@ Status AnalyticEvalNode::prepare(RuntimeState* state) { if (_partition_by_eq_expr_ctx != NULL) { RETURN_IF_ERROR( - _partition_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); + _partition_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); //AddExprCtxToFree(_partition_by_eq_expr_ctx); } if (_order_by_eq_expr_ctx != NULL) { RETURN_IF_ERROR( - _order_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); + _order_by_eq_expr_ctx->prepare(state, cmp_row_desc, expr_mem_tracker())); //AddExprCtxToFree(_order_by_eq_expr_ctx); } } @@ -238,8 +238,8 @@ Status AnalyticEvalNode::open(RuntimeState* state) { // Fetch the first input batch so that some _prev_input_row can be set here to avoid // special casing in GetNext(). - _prev_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); - _curr_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); + _prev_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); + _curr_child_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); while (!_input_eos && _prev_input_row == NULL) { RETURN_IF_ERROR(child(0)->get_next(state, _curr_child_batch.get(), &_input_eos)); @@ -744,7 +744,7 @@ Status AnalyticEvalNode::get_next_output_batch(RuntimeState* state, RowBatch* ou ExprContext** ctxs = &_conjunct_ctxs[0]; int num_ctxs = _conjunct_ctxs.size(); - RowBatch input_batch(child(0)->row_desc(), output_batch->capacity(), mem_tracker()); + RowBatch input_batch(child(0)->row_desc(), output_batch->capacity(), mem_tracker().get()); int64_t stream_idx = _input_stream->rows_returned(); RETURN_IF_ERROR(_input_stream->get_next(&input_batch, eos)); diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index d72c301462922c..f0edbaae1a6891 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -27,23 +27,25 @@ namespace doris { -BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, ScannerCounter* counter) : - _state(state), _params(params), _counter(counter), - _src_tuple(nullptr), - _src_tuple_row(nullptr), +BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, + const TBrokerScanRangeParams& params, ScannerCounter* counter) + : _state(state), + _params(params), + _counter(counter), + _src_tuple(nullptr), + _src_tuple_row(nullptr), #if BE_TEST - _mem_tracker(new MemTracker()), - _mem_pool(_mem_tracker.get()), + _mem_tracker(new MemTracker()), #else - _mem_tracker(new MemTracker(-1, "Broker Scanner", state->instance_mem_tracker())), - _mem_pool(_state->instance_mem_tracker()), + _mem_tracker(new MemTracker(-1, "Broker Scanner", state->instance_mem_tracker())), #endif - _dest_tuple_desc(nullptr), - _strict_mode(false), - _profile(profile), - _rows_read_counter(nullptr), - _read_timer(nullptr), - _materialize_timer(nullptr) { + _mem_pool(_mem_tracker.get()), + _dest_tuple_desc(nullptr), + _strict_mode(false), + _profile(profile), + _rows_read_counter(nullptr), + _read_timer(nullptr), + _materialize_timer(nullptr) { } Status BaseScanner::open() { @@ -113,7 +115,7 @@ Status BaseScanner::init_expr_ctxes() { } ExprContext* ctx = nullptr; RETURN_IF_ERROR(Expr::create_expr_tree(_state->obj_pool(), it->second, &ctx)); - RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get(), _mem_tracker.get())); + RETURN_IF_ERROR(ctx->prepare(_state, *_row_desc.get(), _mem_tracker)); RETURN_IF_ERROR(ctx->open(_state)); _dest_expr_ctx.emplace_back(ctx); if (has_slot_id_map) { diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index 43461c2e4f85b2..d2335020ed42e8 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -76,7 +76,7 @@ class BaseScanner { Tuple* _src_tuple; TupleRow* _src_tuple_row; - std::unique_ptr _mem_tracker; + std::shared_ptr _mem_tracker; // Mem pool used to allocate _src_tuple and _src_tuple_row MemPool _mem_pool; diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp index e174973f1cfa32..7f60b81b095ef8 100644 --- a/be/src/exec/blocking_join_node.cpp +++ b/be/src/exec/blocking_join_node.cpp @@ -50,7 +50,7 @@ Status BlockingJoinNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - _build_pool.reset(new MemPool(mem_tracker())); + _build_pool.reset(new MemPool(mem_tracker().get())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime"); _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT); @@ -74,7 +74,7 @@ Status BlockingJoinNode::prepare(RuntimeState* state) { _probe_tuple_row_size = num_left_tuples * sizeof(Tuple*); _build_tuple_row_size = num_build_tuples * sizeof(Tuple*); - _left_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); + _left_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); return Status::OK(); } diff --git a/be/src/exec/broker_scan_node.cpp b/be/src/exec/broker_scan_node.cpp index 53212e1da98c5c..dcadbc8154e387 100644 --- a/be/src/exec/broker_scan_node.cpp +++ b/be/src/exec/broker_scan_node.cpp @@ -325,7 +325,7 @@ Status BrokerScanNode::scanner_scan( while (!scanner_eof) { // Fill one row batch std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker())); + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); @@ -382,7 +382,7 @@ Status BrokerScanNode::scanner_scan( // 1. too many batches in queue, or // 2. at least one batch in queue and memory exceed limit. (_batch_queue.size() >= _max_buffered_batches - || (mem_tracker()->any_limit_exceeded() && !_batch_queue.empty()))) { + || (mem_tracker()->AnyLimitExceeded(MemLimit::HARD) && !_batch_queue.empty()))) { _queue_writer_cond.wait_for(l, std::chrono::seconds(1)); } // Process already set failed, so we just return OK diff --git a/be/src/exec/cross_join_node.cpp b/be/src/exec/cross_join_node.cpp index 3d6736d12bf3be..c85877df15438a 100644 --- a/be/src/exec/cross_join_node.cpp +++ b/be/src/exec/cross_join_node.cpp @@ -57,7 +57,7 @@ Status CrossJoinNode::construct_build_side(RuntimeState* state) { while (true) { RowBatch* batch = _build_batch_pool->add( - new RowBatch(child(1)->row_desc(), state->batch_size(), mem_tracker())); + new RowBatch(child(1)->row_desc(), state->batch_size(), mem_tracker().get())); RETURN_IF_CANCELLED(state); // TODO(zhaochun): diff --git a/be/src/exec/csv_scan_node.cpp b/be/src/exec/csv_scan_node.cpp index f8ed70bf876a51..745f8a747652b1 100644 --- a/be/src/exec/csv_scan_node.cpp +++ b/be/src/exec/csv_scan_node.cpp @@ -209,7 +209,7 @@ Status CsvScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a csv scanner failed."); } - _tuple_pool.reset(new(std::nothrow) MemPool(state->instance_mem_tracker())); + _tuple_pool.reset(new(std::nothrow) MemPool(state->instance_mem_tracker().get())); if (_tuple_pool.get() == nullptr) { return Status::InternalError("new a mem pool failed."); } diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp index 7c75b2f5381371..9fd258e5eb593c 100644 --- a/be/src/exec/data_sink.cpp +++ b/be/src/exec/data_sink.cpp @@ -152,7 +152,9 @@ Status DataSink::init(const TDataSink& thrift_sink) { } Status DataSink::prepare(RuntimeState* state) { - _expr_mem_tracker.reset(new MemTracker(-1, "Data sink", state->instance_mem_tracker())); + _expr_mem_tracker.reset( + new MemTracker(-1, std::string("DataSink:") + std::to_string(state->load_job_id()), + state->instance_mem_tracker())); return Status::OK(); } diff --git a/be/src/exec/data_sink.h b/be/src/exec/data_sink.h index eeaf66e5445b43..1c26e2d4fa3470 100644 --- a/be/src/exec/data_sink.h +++ b/be/src/exec/data_sink.h @@ -63,7 +63,7 @@ class DataSink { // It must be okay to call this multiple times. Subsequent calls should // be ignored. virtual Status close(RuntimeState* state, Status exec_status) { - _expr_mem_tracker->close(); + _expr_mem_tracker.reset(); _closed = true; return Status::OK(); } @@ -86,7 +86,7 @@ class DataSink { // Set to true after close() has been called. subclasses should check and set this in // close(). bool _closed; - std::unique_ptr _expr_mem_tracker; + std::shared_ptr _expr_mem_tracker; // Maybe this will be transferred to BufferControlBlock. std::shared_ptr _query_statistics; diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index fa7b895805b38a..9cc9b25f32453a 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -317,7 +317,7 @@ Status EsHttpScanNode::scanner_scan( while (!scanner_eof) { // Fill one row batch std::shared_ptr row_batch( - new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker())); + new RowBatch(row_desc(), _runtime_state->batch_size(), mem_tracker().get())); // create new tuple buffer for row_batch MemPool* tuple_pool = row_batch->tuple_data_pool(); diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp index f04408088fd800..53bd2e96e11d7d 100644 --- a/be/src/exec/es_http_scanner.cpp +++ b/be/src/exec/es_http_scanner.cpp @@ -30,29 +30,24 @@ namespace doris { -EsHttpScanner::EsHttpScanner( - RuntimeState* state, - RuntimeProfile* profile, - TupleId tuple_id, - const std::map& properties, - const std::vector& conjunct_ctxs, - EsScanCounter* counter, - bool doc_value_mode) : - _state(state), - _profile(profile), - _tuple_id(tuple_id), - _properties(properties), - _conjunct_ctxs(conjunct_ctxs), - _next_range(0), - _line_eof(false), - _batch_eof(false), +EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, + const std::map& properties, + const std::vector& conjunct_ctxs, EsScanCounter* counter, + bool doc_value_mode) + : _state(state), + _profile(profile), + _tuple_id(tuple_id), + _properties(properties), + _conjunct_ctxs(conjunct_ctxs), + _next_range(0), + _line_eof(false), + _batch_eof(false), #if BE_TEST _mem_tracker(new MemTracker()), - _mem_pool(_mem_tracker.get()), -#else +#else _mem_tracker(new MemTracker(-1, "EsHttp Scanner", state->instance_mem_tracker())), - _mem_pool(_state->instance_mem_tracker()), #endif + _mem_pool(_mem_tracker.get()), _tuple_desc(nullptr), _counter(counter), _es_reader(nullptr), diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h index f94eded5ed9742..3ffa1eae14a3c3 100644 --- a/be/src/exec/es_http_scanner.h +++ b/be/src/exec/es_http_scanner.h @@ -87,7 +87,7 @@ class EsHttpScanner { std::vector _slot_descs; std::unique_ptr _row_desc; - std::unique_ptr _mem_tracker; + std::shared_ptr _mem_tracker; MemPool _mem_pool; const TupleDescriptor* _tuple_desc; diff --git a/be/src/exec/except_node.cpp b/be/src/exec/except_node.cpp index e6de96189a748e..1411647b62dc53 100644 --- a/be/src/exec/except_node.cpp +++ b/be/src/exec/except_node.cpp @@ -73,7 +73,7 @@ Status ExceptNode::open(RuntimeState* state) { temp_tbl->close(); } // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker())); + _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 697dcc6a24d78e..1a8b71a37de586 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -131,8 +131,7 @@ ExecNode::ExecNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl init_runtime_profile(print_plan_node_type(tnode.node_type)); } -ExecNode::~ExecNode() { -} +ExecNode::~ExecNode() {} void ExecNode::push_down_predicate( RuntimeState* state, std::list* expr_ctxs) { @@ -150,7 +149,7 @@ void ExecNode::push_down_predicate( if ((*iter)->root()->is_bound(&_tuple_ids)) { // LOG(INFO) << "push down success expr is " << (*iter)->debug_string() // << " and node is " << debug_string(); - (*iter)->prepare(state, row_desc(), _expr_mem_tracker.get()); + (*iter)->prepare(state, row_desc(), _expr_mem_tracker); (*iter)->open(state); _conjunct_ctxs.push_back(*iter); iter = expr_ctxs->erase(iter); @@ -177,8 +176,8 @@ Status ExecNode::prepare(RuntimeState* state) { _rows_returned_counter, runtime_profile()->total_time_counter()), ""); - _mem_tracker.reset(new MemTracker(_runtime_profile.get(), -1, _runtime_profile->name(), state->instance_mem_tracker())); - _expr_mem_tracker.reset(new MemTracker(-1, "Exprs", _mem_tracker.get())); + _mem_tracker.reset(new MemTracker(_runtime_profile.get(), -1, "ExecNode "+ _runtime_profile->name(), state->instance_mem_tracker())); + _expr_mem_tracker.reset(new MemTracker(-1, "ExecNode Exprs", _mem_tracker)); _expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get())); // TODO chenhao RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc(), expr_mem_tracker())); @@ -246,14 +245,6 @@ Status ExecNode::close(RuntimeState* state) { state->exec_env()->buffer_pool()->DeregisterClient(&_buffer_pool_client); } - if (_expr_mem_tracker != nullptr) { - _expr_mem_tracker->close(); - } - - if (_mem_tracker != nullptr) { - _mem_tracker->close(); - } - return result; } @@ -591,12 +582,10 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) { } ss << print_plan_node_type(_type) << " id=" << _id << " ptr=" << this; - RETURN_IF_ERROR(buffer_pool->RegisterClient(ss.str(), - state->instance_buffer_reservation(), - mem_tracker(), buffer_pool->GetSystemBytesLimit(), - runtime_profile(), - &_buffer_pool_client)); - + RETURN_IF_ERROR(buffer_pool->RegisterClient(ss.str(), state->instance_buffer_reservation(), + mem_tracker(), buffer_pool->GetSystemBytesLimit(), + runtime_profile(), &_buffer_pool_client)); + state->initial_reservations()->Claim(&_buffer_pool_client, _resource_profile.min_reservation); /* if (debug_action_ == TDebugAction::SET_DENY_RESERVATION_PROBABILITY && diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index ab51166084a76a..03c3eca4d75ee7 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -205,12 +205,12 @@ class ExecNode { return _memory_used_counter; } - MemTracker* mem_tracker() const { - return _mem_tracker.get(); + std::shared_ptr mem_tracker() const { + return _mem_tracker; } - MemTracker* expr_mem_tracker() const { - return _expr_mem_tracker.get(); + std::shared_ptr expr_mem_tracker() const { + return _expr_mem_tracker; } MemPool* expr_mem_pool() { @@ -313,10 +313,10 @@ class ExecNode { boost::scoped_ptr _runtime_profile; /// Account for peak memory used by this node - boost::scoped_ptr _mem_tracker; + std::shared_ptr _mem_tracker; /// MemTracker used by 'expr_mem_pool_'. - boost::scoped_ptr _expr_mem_tracker; + std::shared_ptr _expr_mem_tracker; /// MemPool for allocating data structures used by expression evaluators in this node. /// Created in Prepare(). diff --git a/be/src/exec/hash_join_node.cpp b/be/src/exec/hash_join_node.cpp index 3065cd8d6e4792..dea793ecfdb067 100644 --- a/be/src/exec/hash_join_node.cpp +++ b/be/src/exec/hash_join_node.cpp @@ -81,7 +81,7 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { Status HashJoinNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); - _build_pool.reset(new MemPool(mem_tracker())); + _build_pool.reset(new MemPool(mem_tracker().get())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _push_down_timer = @@ -139,7 +139,7 @@ Status HashJoinNode::prepare(RuntimeState* state) { _build_expr_ctxs, _probe_expr_ctxs, _build_tuple_size, stores_nulls, _is_null_safe_eq_join, id(), mem_tracker(), 1024)); - _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); + _probe_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); return Status::OK(); } @@ -185,7 +185,7 @@ Status HashJoinNode::construct_hash_table(RuntimeState* state) { // The hash join node needs to keep in memory all build tuples, including the tuple // row ptrs. The row ptrs are copied into the hash table's internal structure so they // don't need to be stored in the _build_pool. - RowBatch build_batch(child(1)->row_desc(), state->batch_size(), mem_tracker()); + RowBatch build_batch(child(1)->row_desc(), state->batch_size(), mem_tracker().get()); RETURN_IF_ERROR(child(1)->open(state)); while (true) { diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp index eab95d4209e6d0..c4136eda8931ca 100644 --- a/be/src/exec/hash_table.cpp +++ b/be/src/exec/hash_table.cpp @@ -33,7 +33,7 @@ HashTable::HashTable(const vector& build_expr_ctxs, int num_build_tuples, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - MemTracker* mem_tracker, int64_t num_buckets) : + std::shared_ptr mem_tracker, int64_t num_buckets) : _build_expr_ctxs(build_expr_ctxs), _probe_expr_ctxs(probe_expr_ctxs), _num_build_tuples(num_build_tuples), @@ -47,14 +47,14 @@ HashTable::HashTable(const vector& build_expr_ctxs, _exceeded_limit(false), _mem_tracker(mem_tracker), _mem_limit_exceeded(false) { - DCHECK(mem_tracker != NULL); + DCHECK(_mem_tracker); DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; _buckets.resize(num_buckets); _num_buckets = num_buckets; _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; - _mem_tracker->consume(_buckets.capacity() * sizeof(Bucket)); + _mem_tracker->Consume(_buckets.capacity() * sizeof(Bucket)); // Compute the layout and buffer size to store the evaluated expr results _results_buffer_size = Expr::compute_results_layout(_build_expr_ctxs, @@ -67,7 +67,7 @@ HashTable::HashTable(const vector& build_expr_ctxs, _nodes = reinterpret_cast(malloc(_nodes_capacity * _node_byte_size)); memset(_nodes, 0, _nodes_capacity * _node_byte_size); - _mem_tracker->consume(_nodes_capacity * _node_byte_size); + _mem_tracker->Consume(_nodes_capacity * _node_byte_size); if (_mem_tracker->limit_exceeded()) { mem_limit_exceeded(_nodes_capacity * _node_byte_size); } @@ -81,8 +81,8 @@ void HashTable::close() { delete[] _expr_values_buffer; delete[] _expr_value_null_bits; free(_nodes); - _mem_tracker->release(_nodes_capacity * _node_byte_size); - _mem_tracker->release(_buckets.size() * sizeof(Bucket)); + _mem_tracker->Release(_nodes_capacity * _node_byte_size); + _mem_tracker->Release(_buckets.size() * sizeof(Bucket)); } bool HashTable::eval_row(TupleRow* row, const vector& ctxs) { @@ -187,7 +187,7 @@ void HashTable::resize_buckets(int64_t num_buckets) { int64_t old_num_buckets = _num_buckets; int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - if (!_mem_tracker->try_consume(delta_bytes)) { + if (!_mem_tracker->TryConsume(delta_bytes)) { mem_limit_exceeded(delta_bytes); return; } @@ -248,7 +248,7 @@ void HashTable::grow_node_array() { free(_nodes); _nodes = new_nodes; - _mem_tracker->consume(new_size - old_size); + _mem_tracker->Consume(new_size - old_size); if (_mem_tracker->limit_exceeded()) { mem_limit_exceeded(new_size - old_size); } diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h index 06544b34d0d249..f06ff07972a0b4 100644 --- a/be/src/exec/hash_table.h +++ b/be/src/exec/hash_table.h @@ -94,7 +94,7 @@ class HashTable { int num_build_tuples, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - MemTracker* mem_tracker, + std::shared_ptr mem_tracker, int64_t num_buckets); ~HashTable(); @@ -401,7 +401,7 @@ class HashTable { bool _exceeded_limit; // true if any of _mem_trackers[].limit_exceeded() - MemTracker* _mem_tracker; + std::shared_ptr _mem_tracker; // Set to true if the hash table exceeds the memory limit. If this is set, // subsequent calls to Insert() will be ignored. bool _mem_limit_exceeded; diff --git a/be/src/exec/intersect_node.cpp b/be/src/exec/intersect_node.cpp index 39bd10cfca0869..59f6c2b178669b 100755 --- a/be/src/exec/intersect_node.cpp +++ b/be/src/exec/intersect_node.cpp @@ -77,7 +77,7 @@ Status IntersectNode::open(RuntimeState* state) { } } // probe - _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker())); + _probe_batch.reset(new RowBatch(child(i)->row_desc(), state->batch_size(), mem_tracker().get())); ScopedTimer probe_timer(_probe_timer); RETURN_IF_ERROR(child(i)->open(state)); eos = false; diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h index 758c9fa3fb2c8f..7dfe65d9c974c5 100644 --- a/be/src/exec/merge_join_node.h +++ b/be/src/exec/merge_join_node.h @@ -66,8 +66,8 @@ class MergeJoinNode : public ExecNode { int row_idx; bool is_eos; TupleRow* current_row; - ChildReaderContext(const RowDescriptor& desc, int batch_size, MemTracker* mem_tracker) : - batch(desc, batch_size, mem_tracker), row_idx(0), is_eos(false), current_row(NULL) { + ChildReaderContext(const RowDescriptor& desc, int batch_size, std::shared_ptr mem_tracker) : + batch(desc, batch_size, mem_tracker.get()), row_idx(0), is_eos(false), current_row(NULL) { } }; // _left_batch must be cleared before calling get_next(). used cache child(0)'s data diff --git a/be/src/exec/merge_node.cpp b/be/src/exec/merge_node.cpp index 9d393b4947aaf5..dabde5902afb9d 100644 --- a/be/src/exec/merge_node.cpp +++ b/be/src/exec/merge_node.cpp @@ -138,7 +138,7 @@ Status MergeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) if (_child_row_batch.get() == NULL) { RETURN_IF_CANCELLED(state); _child_row_batch.reset( - new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), mem_tracker())); + new RowBatch(child(_child_idx)->row_desc(), state->batch_size(), mem_tracker().get())); // Open child and fetch the first row batch. RETURN_IF_ERROR(child(_child_idx)->open(state)); RETURN_IF_ERROR(child(_child_idx)->get_next(state, _child_row_batch.get(), diff --git a/be/src/exec/mysql_scan_node.cpp b/be/src/exec/mysql_scan_node.cpp index f211b0770725cc..2da3507177512e 100644 --- a/be/src/exec/mysql_scan_node.cpp +++ b/be/src/exec/mysql_scan_node.cpp @@ -83,7 +83,7 @@ Status MysqlScanNode::prepare(RuntimeState* state) { return Status::InternalError("new a mysql scanner failed."); } - _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker())); + _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); if (_tuple_pool.get() == NULL) { return Status::InternalError("new a mem pool failed."); diff --git a/be/src/exec/olap_rewrite_node.cpp b/be/src/exec/olap_rewrite_node.cpp index fcfeee6b0c54aa..d113fe7265cf70 100644 --- a/be/src/exec/olap_rewrite_node.cpp +++ b/be/src/exec/olap_rewrite_node.cpp @@ -55,7 +55,7 @@ Status OlapRewriteNode::prepare(RuntimeState* state) { _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); // _child_row_batch.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), state->fragment_mem_tracker())); + new RowBatch(child(0)->row_desc(), state->batch_size(), state->fragment_mem_tracker().get())); _max_decimal_val.resize(_column_types.size()); _max_decimalv2_val.resize(_column_types.size()); diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 86df57cc8a8dc7..caec9af46791c2 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -1258,7 +1258,7 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) { break; } RowBatch *row_batch = new RowBatch( - this->row_desc(), state->batch_size(), _runtime_state->fragment_mem_tracker()); + this->row_desc(), state->batch_size(), _runtime_state->fragment_mem_tracker().get()); row_batch->set_scanner_id(scanner->id()); status = scanner->get_batch(_runtime_state, row_batch, &eos); if (!status.ok()) { diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc index cec1e0b6fde065..2c767e2afee668 100644 --- a/be/src/exec/partitioned_aggregation_node.cc +++ b/be/src/exec/partitioned_aggregation_node.cc @@ -190,8 +190,8 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); state_ = state; - mem_pool_.reset(new MemPool(mem_tracker())); - agg_fn_pool_.reset(new MemPool(expr_mem_tracker())); + mem_pool_.reset(new MemPool(mem_tracker().get())); + agg_fn_pool_.reset(new MemPool(expr_mem_tracker().get())); ht_resize_timer_ = ADD_TIMER(runtime_profile(), "HTResizeTime"); get_results_timer_ = ADD_TIMER(runtime_profile(), "GetResultsTime"); @@ -247,7 +247,7 @@ Status PartitionedAggregationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(NewAggFnEvaluator::Create(agg_fns_, state, _pool, agg_fn_pool_.get(), &agg_fn_evals_, expr_mem_tracker(), row_desc)); - expr_results_pool_.reset(new MemPool(_expr_mem_tracker.get())); + expr_results_pool_.reset(new MemPool(expr_mem_tracker().get())); if (!grouping_exprs_.empty()) { RowDescriptor build_row_desc(intermediate_tuple_desc_, false); RETURN_IF_ERROR(PartitionedHashTableCtx::Create(_pool, state, build_exprs_, @@ -308,7 +308,7 @@ Status PartitionedAggregationNode::open(RuntimeState* state) { // Streaming preaggregations do all processing in GetNext(). if (is_streaming_preagg_) return Status::OK(); - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker()); + RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); // Read all the rows from the child and process them. bool eos = false; do { @@ -532,7 +532,7 @@ Status PartitionedAggregationNode::GetRowsStreaming(RuntimeState* state, if (child_batch_ == NULL) { child_batch_.reset(new RowBatch(child(0)->row_desc(), state->batch_size(), - mem_tracker())); + mem_tracker().get())); } do { @@ -722,7 +722,7 @@ PartitionedAggregationNode::Partition::~Partition() { } Status PartitionedAggregationNode::Partition::InitStreams() { - agg_fn_pool.reset(new MemPool(parent->expr_mem_tracker())); + agg_fn_pool.reset(new MemPool(parent->expr_mem_tracker().get())); DCHECK_EQ(agg_fn_evals.size(), 0); NewAggFnEvaluator::ShallowClone(parent->partition_pool_.get(), agg_fn_pool.get(), parent->agg_fn_evals_, &agg_fn_evals); @@ -1363,7 +1363,7 @@ Status PartitionedAggregationNode::ProcessStream(BufferedTupleStream3* input_str bool eos = false; const RowDescriptor* desc = AGGREGATED_ROWS ? &intermediate_row_desc_ : &(_children[0]->row_desc()); - RowBatch batch(*desc, state_->batch_size(), const_cast(mem_tracker())); + RowBatch batch(*desc, state_->batch_size(), mem_tracker().get()); do { RETURN_IF_ERROR(input_stream->GetNext(&batch, &eos)); RETURN_IF_ERROR( diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc index ea62abd4a5e750..e92ff486a2a65f 100644 --- a/be/src/exec/partitioned_hash_table.cc +++ b/be/src/exec/partitioned_hash_table.cc @@ -80,8 +80,9 @@ static int64_t NULL_VALUE[] = { PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build_exprs, const std::vector& probe_exprs, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, MemPool* mem_pool, MemPool* expr_results_pool) - : build_exprs_(build_exprs), + int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, std::shared_ptr tracker) + : tracker_(tracker), + build_exprs_(build_exprs), probe_exprs_(probe_exprs), stores_nulls_(stores_nulls), finds_nulls_(finds_nulls), @@ -91,6 +92,7 @@ PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build scratch_row_(NULL), mem_pool_(mem_pool), expr_results_pool_(expr_results_pool) { + DCHECK(tracker_ != nullptr); DCHECK(!finds_some_nulls_ || stores_nulls_); // Compute the layout and buffer size to store the evaluated expr results DCHECK_EQ(build_exprs_.size(), probe_exprs_.size()); @@ -109,36 +111,38 @@ PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build } Status PartitionedHashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, - MemTracker* tracker, const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe) { - - int scratch_row_size = sizeof(Tuple*) * num_build_tuples; - scratch_row_ = reinterpret_cast(malloc(scratch_row_size)); - if (UNLIKELY(scratch_row_ == NULL)) { - return Status::InternalError(Substitute("Failed to allocate $0 bytes for scratch row of " - "PartitionedHashTableCtx.", scratch_row_size)); - } + const RowDescriptor& row_desc, + const RowDescriptor& row_desc_probe) { + int scratch_row_size = sizeof(Tuple*) * num_build_tuples; + scratch_row_ = reinterpret_cast(malloc(scratch_row_size)); + if (UNLIKELY(scratch_row_ == NULL)) { + return Status::InternalError( + Substitute("Failed to allocate $0 bytes for scratch row of " + "PartitionedHashTableCtx.", + scratch_row_size)); + } - // TODO chenhao replace ExprContext with ScalarFnEvaluator - for (int i = 0; i < build_exprs_.size(); i++) { - ExprContext* context = pool->add(new ExprContext(build_exprs_[i])); - context->prepare(state, row_desc, tracker); - if (context == nullptr) { - return Status::InternalError("Hashtable init error."); - } - build_expr_evals_.push_back(context); - } - DCHECK_EQ(build_exprs_.size(), build_expr_evals_.size()); - - for (int i = 0; i < probe_exprs_.size(); i++) { - ExprContext* context = pool->add(new ExprContext(probe_exprs_[i])); - context->prepare(state, row_desc_probe, tracker); - if (context == nullptr) { - return Status::InternalError("Hashtable init error."); - } - probe_expr_evals_.push_back(context); - } - DCHECK_EQ(probe_exprs_.size(), probe_expr_evals_.size()); - return expr_values_cache_.Init(state, mem_pool_->mem_tracker(), build_exprs_); + // TODO chenhao replace ExprContext with ScalarFnEvaluator + for (int i = 0; i < build_exprs_.size(); i++) { + ExprContext* context = pool->add(new ExprContext(build_exprs_[i])); + context->prepare(state, row_desc, tracker_); + if (context == nullptr) { + return Status::InternalError("Hashtable init error."); + } + build_expr_evals_.push_back(context); + } + DCHECK_EQ(build_exprs_.size(), build_expr_evals_.size()); + + for (int i = 0; i < probe_exprs_.size(); i++) { + ExprContext* context = pool->add(new ExprContext(probe_exprs_[i])); + context->prepare(state, row_desc_probe, tracker_); + if (context == nullptr) { + return Status::InternalError("Hashtable init error."); + } + probe_expr_evals_.push_back(context); + } + DCHECK_EQ(probe_exprs_.size(), probe_expr_evals_.size()); + return expr_values_cache_.Init(state, tracker_, build_exprs_); } Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state, @@ -146,12 +150,12 @@ Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state, const std::vector& probe_exprs, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - MemTracker* tracker, const RowDescriptor& row_desc, + std::shared_ptr tracker, const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe, scoped_ptr* ht_ctx) { ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls, - finds_nulls, initial_seed, max_levels, mem_pool, expr_results_pool)); - return (*ht_ctx)->Init(pool, state, num_build_tuples, tracker, row_desc, row_desc_probe); + finds_nulls, initial_seed, max_levels, mem_pool, expr_results_pool, tracker)); + return (*ht_ctx)->Init(pool, state, num_build_tuples, row_desc, row_desc_probe); } Status PartitionedHashTableCtx::Open(RuntimeState* state) { @@ -168,7 +172,7 @@ Status PartitionedHashTableCtx::Open(RuntimeState* state) { void PartitionedHashTableCtx::Close(RuntimeState* state) { free(scratch_row_); scratch_row_ = NULL; - expr_values_cache_.Close(mem_pool_->mem_tracker()); + expr_values_cache_.Close(tracker_); for (int i = 0; i < build_expr_evals_.size(); i++) { build_expr_evals_[i]->close(state); } @@ -310,7 +314,7 @@ PartitionedHashTableCtx::ExprValuesCache::ExprValuesCache() null_bitmap_(0) {} Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, - MemTracker* tracker, const std::vector& build_exprs) { + std::shared_ptr tracker, const std::vector& build_exprs) { // Initialize the number of expressions. num_exprs_ = build_exprs.size(); // Compute the layout of evaluated values of a row. @@ -328,7 +332,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_)); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - if (UNLIKELY(!tracker->try_consume(mem_usage))) { + if (UNLIKELY(!tracker->TryConsume(mem_usage))) { capacity_ = 0; string details = Substitute("PartitionedHashTableCtx::ExprValuesCache failed to allocate $0 bytes.", mem_usage); @@ -354,7 +358,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state, return Status::OK(); } -void PartitionedHashTableCtx::ExprValuesCache::Close(MemTracker* tracker) { +void PartitionedHashTableCtx::ExprValuesCache::Close(std::shared_ptr tracker) { if (capacity_ == 0) return; cur_expr_values_ = NULL; cur_expr_values_null_ = NULL; @@ -365,7 +369,7 @@ void PartitionedHashTableCtx::ExprValuesCache::Close(MemTracker* tracker) { expr_values_hash_array_.reset(); null_bitmap_.Reset(0); int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_); - tracker->release(mem_usage); + tracker->Release(mem_usage); } int PartitionedHashTableCtx::ExprValuesCache::MemUsage(int capacity, diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h index ab78b2efe9d425..12dc00c2c55f0e 100644 --- a/be/src/exec/partitioned_hash_table.h +++ b/be/src/exec/partitioned_hash_table.h @@ -115,7 +115,7 @@ class PartitionedHashTableCtx { const std::vector& probe_exprs, bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, - MemTracker* tracker, const RowDescriptor& row_desc, + std::shared_ptr tracker, const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe, boost::scoped_ptr* ht_ctx); @@ -211,12 +211,12 @@ class PartitionedHashTableCtx { /// Allocates memory and initializes various data structures. Return error status /// if memory allocation leads to the memory limits of the exec node to be exceeded. /// 'tracker' is the memory tracker of the exec node which owns this PartitionedHashTableCtx. - Status Init(RuntimeState* state, MemTracker* tracker, + Status Init(RuntimeState* state, std::shared_ptr tracker, const std::vector& build_exprs); /// Frees up various resources and updates memory tracker with proper accounting. /// 'tracker' should be the same memory tracker which was passed in for Init(). - void Close(MemTracker* tracker); + void Close(std::shared_ptr tracker); /// Resets the cache states (iterators, end pointers etc) before writing. void Reset() noexcept; @@ -382,9 +382,10 @@ class PartitionedHashTableCtx { /// in which nulls are stored and columns in which they are not, which could save /// space by not storing some rows we know will never match. PartitionedHashTableCtx(const std::vector& build_exprs, - const std::vector& probe_exprs, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, - int max_levels, MemPool* mem_pool, MemPool* expr_results_pool); + const std::vector& probe_exprs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, + int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, + std::shared_ptr tracker); /// Allocate various buffers for storing expression evaluation results, hash values, /// null bits etc. Also allocate evaluators for the build and probe expressions and @@ -392,7 +393,7 @@ class PartitionedHashTableCtx { /// be exceeded or the evaluators fail to initialize. 'num_build_tuples' is the number /// of tuples of a row in the build side, used for computing the size of a scratch row. Status Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples, - MemTracker* tracker, const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe); + const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe); /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null'. /// This will be replaced by codegen. We don't want this inlined for replacing @@ -454,6 +455,8 @@ class PartitionedHashTableCtx { bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } + std::shared_ptr tracker_; + const std::vector& build_exprs_; std::vector build_expr_evals_; diff --git a/be/src/exec/repeat_node.cpp b/be/src/exec/repeat_node.cpp index 274f07b0fe5e22..bfc22f57931021 100644 --- a/be/src/exec/repeat_node.cpp +++ b/be/src/exec/repeat_node.cpp @@ -175,7 +175,7 @@ Status RepeatNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) } _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); RETURN_IF_ERROR(child(0)->get_next(state, _child_row_batch.get(), &_child_eos)); if (_child_row_batch->num_rows() <= 0) { diff --git a/be/src/exec/schema_scan_node.cpp b/be/src/exec/schema_scan_node.cpp index a0521af94b8629..6abee999382f7d 100644 --- a/be/src/exec/schema_scan_node.cpp +++ b/be/src/exec/schema_scan_node.cpp @@ -103,7 +103,7 @@ Status SchemaScanNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ScanNode::prepare(state)); // new one mem pool - _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker())); + _tuple_pool.reset(new(std::nothrow) MemPool(mem_tracker().get())); if (NULL == _tuple_pool.get()) { return Status::InternalError("Allocate MemPool failed."); diff --git a/be/src/exec/select_node.cpp b/be/src/exec/select_node.cpp index 933f97064fbbfa..015713f17e5e43 100644 --- a/be/src/exec/select_node.cpp +++ b/be/src/exec/select_node.cpp @@ -35,7 +35,7 @@ SelectNode::SelectNode( Status SelectNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _child_row_batch.reset( - new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker())); + new RowBatch(child(0)->row_desc(), state->batch_size(), mem_tracker().get())); return Status::OK(); } diff --git a/be/src/exec/set_operation_node.cpp b/be/src/exec/set_operation_node.cpp index 627e139f3812d4..f5552f12c1963a 100644 --- a/be/src/exec/set_operation_node.cpp +++ b/be/src/exec/set_operation_node.cpp @@ -39,7 +39,7 @@ Status SetOperationNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); DCHECK(_tuple_desc != nullptr); - _build_pool.reset(new MemPool(mem_tracker())); + _build_pool.reset(new MemPool(mem_tracker().get())); _build_timer = ADD_TIMER(runtime_profile(), "BuildTime"); _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime"); SCOPED_TIMER(_runtime_profile->total_time_counter()); @@ -142,7 +142,7 @@ Status SetOperationNode::open(RuntimeState* state) { // initial build hash table used for remove duplicted _hash_tbl.reset(new HashTable(_child_expr_lists[0], _child_expr_lists[1], _build_tuple_size, true, _find_nulls, id(), mem_tracker(), 1024)); - RowBatch build_batch(child(0)->row_desc(), state->batch_size(), mem_tracker()); + RowBatch build_batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); RETURN_IF_ERROR(child(0)->open(state)); bool eos = false; diff --git a/be/src/exec/sort_exec_exprs.cpp b/be/src/exec/sort_exec_exprs.cpp index 5a3d62c06476e2..3780ce387e9e82 100644 --- a/be/src/exec/sort_exec_exprs.cpp +++ b/be/src/exec/sort_exec_exprs.cpp @@ -50,7 +50,7 @@ Status SortExecExprs::init(const std::vector& lhs_ordering_expr_ct Status SortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc, const RowDescriptor& output_row_desc, - MemTracker* expr_mem_tracker) { + std::shared_ptr expr_mem_tracker) { if (_materialize_tuple) { RETURN_IF_ERROR(Expr::prepare( _sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker)); diff --git a/be/src/exec/sort_exec_exprs.h b/be/src/exec/sort_exec_exprs.h index d78f7f7f955c97..070703036b31d3 100644 --- a/be/src/exec/sort_exec_exprs.h +++ b/be/src/exec/sort_exec_exprs.h @@ -44,7 +44,7 @@ class SortExecExprs { // prepare all expressions used for sorting and tuple materialization. Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc, - const RowDescriptor& output_row_desc, MemTracker* mem_tracker); + const RowDescriptor& output_row_desc, std::shared_ptr mem_tracker); // open all expressions used for sorting and tuple materialization. Status open(RuntimeState* state); diff --git a/be/src/exec/spill_sort_node.cc b/be/src/exec/spill_sort_node.cc index 2d4936495827f4..4463fa8d3df76c 100644 --- a/be/src/exec/spill_sort_node.cc +++ b/be/src/exec/spill_sort_node.cc @@ -157,7 +157,7 @@ void SpillSortNode::debug_string(int indentation_level, stringstream* out) const } Status SpillSortNode::sort_input(RuntimeState* state) { - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker()); + RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); bool eos = false; do { batch.reset(); diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 8188b20fdc3a83..98026d3d021936 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -150,7 +150,7 @@ OlapTablePartitionParam::OlapTablePartitionParam( std::shared_ptr schema, const TOlapTablePartitionParam& t_param) : _schema(schema), _t_param(t_param), - _mem_tracker(new MemTracker()), + _mem_tracker(new MemTracker(-1, "OlapTablePartitionParam")), _mem_pool(new MemPool(_mem_tracker.get())) { } diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h index f80719ffae0e39..dece0bf830504a 100644 --- a/be/src/exec/tablet_info.h +++ b/be/src/exec/tablet_info.h @@ -182,7 +182,7 @@ class OlapTablePartitionParam { std::vector _distributed_slot_descs; ObjectPool _obj_pool; - std::unique_ptr _mem_tracker; + std::shared_ptr _mem_tracker; std::unique_ptr _mem_pool; std::vector _partitions; std::unique_ptr< diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index e6260810474813..5d8ee35622a4da 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -65,7 +65,7 @@ Status NodeChannel::init(RuntimeState* state) { _row_desc.reset(new RowDescriptor(_tuple_desc, false)); _batch_size = state->batch_size(); - _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker)); + _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get())); _stub = state->exec_env()->brpc_stub_cache()->get_stub(_node_info->host, _node_info->brpc_port); if (_stub == nullptr) { @@ -187,7 +187,8 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { // But there is still some unfinished things, we do mem limit here temporarily. // _cancelled may be set by rpc callback, and it's possible that _cancelled might be set in any of the steps below. // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close(). - while (!_cancelled && _parent->_mem_tracker->any_limit_exceeded() && _pending_batches_num > 0) { + while (!_cancelled && _parent->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) && + _pending_batches_num > 0) { SCOPED_RAW_TIMER(&_mem_exceeded_block_ns); SleepFor(MonoDelta::FromMilliseconds(10)); } @@ -202,7 +203,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { _pending_batches_num++; } - _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker)); + _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get())); _cur_add_batch_request.clear_tablet_ids(); row_no = _cur_batch->add_row(); @@ -420,7 +421,6 @@ OlapTableSink::~OlapTableSink() { // We clear NodeChannels' batches here, cuz NodeChannels' batches destruction will use // OlapTableSink::_mem_tracker and its parents. // But their destructions are after OlapTableSink's. - // TODO: can be remove after all MemTrackers become shared. for (auto index_channel : _channels) { index_channel->for_each_node_channel([](NodeChannel* ch) { ch->clear_all_batches(); }); } @@ -463,13 +463,12 @@ Status OlapTableSink::prepare(RuntimeState* state) { // profile must add to state's object pool _profile = state->obj_pool()->add(new RuntimeProfile("OlapTableSink")); - _mem_tracker = _pool->add(new MemTracker(-1, "OlapTableSink", state->instance_mem_tracker())); + _mem_tracker.reset(new MemTracker(-1, "OlapTableSink", state->instance_mem_tracker())); SCOPED_TIMER(_profile->total_time_counter()); // Prepare the exprs to run. - RETURN_IF_ERROR( - Expr::prepare(_output_expr_ctxs, state, _input_row_desc, _expr_mem_tracker.get())); + RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _input_row_desc, _expr_mem_tracker)); // get table's tuple descriptor _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_desc_id); @@ -497,7 +496,7 @@ Status OlapTableSink::prepare(RuntimeState* state) { } _output_row_desc = _pool->add(new RowDescriptor(_output_tuple_desc, false)); - _output_batch.reset(new RowBatch(*_output_row_desc, state->batch_size(), _mem_tracker)); + _output_batch.reset(new RowBatch(*_output_row_desc, state->batch_size(), _mem_tracker.get())); _max_decimal_val.resize(_output_tuple_desc->slots().size()); _min_decimal_val.resize(_output_tuple_desc->slots().size()); diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h index 4f61ed15cc0f1c..3c993d1eef931f 100644 --- a/be/src/exec/tablet_sink.h +++ b/be/src/exec/tablet_sink.h @@ -186,6 +186,7 @@ class NodeChannel { Status none_of(std::initializer_list vars); + // TODO(HW): remove after mem tracker shared void clear_all_batches(); private: @@ -315,6 +316,8 @@ class OlapTableSink : public DataSink { friend class NodeChannel; friend class IndexChannel; + std::shared_ptr _mem_tracker; + ObjectPool* _pool; const RowDescriptor& _input_row_desc; @@ -350,7 +353,6 @@ class OlapTableSink : public DataSink { DorisNodesInfo* _nodes_info = nullptr; RuntimeProfile* _profile = nullptr; - MemTracker* _mem_tracker = nullptr; std::set _partition_ids; diff --git a/be/src/exec/topn_node.cpp b/be/src/exec/topn_node.cpp index 304820f969b390..29fd60e1c43e83 100644 --- a/be/src/exec/topn_node.cpp +++ b/be/src/exec/topn_node.cpp @@ -62,7 +62,7 @@ Status TopNNode::init(const TPlanNode& tnode, RuntimeState* state) { Status TopNNode::prepare(RuntimeState* state) { SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(ExecNode::prepare(state)); - _tuple_pool.reset(new MemPool(mem_tracker())); + _tuple_pool.reset(new MemPool(mem_tracker().get())); RETURN_IF_ERROR(_sort_exec_exprs.prepare( state, child(0)->row_desc(), _row_descriptor, expr_mem_tracker())); // AddExprCtxsToFree(_sort_exec_exprs); @@ -99,7 +99,7 @@ Status TopNNode::open(RuntimeState* state) { // Limit of 0, no need to fetch anything from children. if (_limit != 0) { - RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker()); + RowBatch batch(child(0)->row_desc(), state->batch_size(), mem_tracker().get()); bool eos = false; do { @@ -248,7 +248,7 @@ void TopNNode::push_down_predicate( if ((*iter)->root()->is_bound(&_tuple_ids)) { // LOG(INFO) << "push down success expr is " << (*iter)->debug_string(); // (*iter)->get_child(0)->prepare(state, row_desc()); - (*iter)->prepare(state, row_desc(), _expr_mem_tracker.get()); + (*iter)->prepare(state, row_desc(), _expr_mem_tracker); (*iter)->open(state); _conjunct_ctxs.push_back(*iter); iter = expr_ctxs->erase(iter); diff --git a/be/src/exec/union_node.cpp b/be/src/exec/union_node.cpp index 86ff768be3d4b5..a6ff9f75ae0316 100644 --- a/be/src/exec/union_node.cpp +++ b/be/src/exec/union_node.cpp @@ -84,8 +84,8 @@ Status UnionNode::prepare(RuntimeState* state) { // Prepare result expr lists. for (int i = 0; i < _child_expr_lists.size(); ++i) { - RETURN_IF_ERROR(Expr::prepare( - _child_expr_lists[i], state, child(i)->row_desc(), expr_mem_tracker())); + RETURN_IF_ERROR(Expr::prepare(_child_expr_lists[i], state, child(i)->row_desc(), + expr_mem_tracker())); // TODO(zc) // AddExprCtxsToFree(_child_expr_lists[i]); DCHECK_EQ(_child_expr_lists[i].size(), _tuple_desc->slots().size()); @@ -152,7 +152,7 @@ Status UnionNode::get_next_materialized(RuntimeState* state, RowBatch* row_batch if (_child_batch.get() == nullptr) { DCHECK_LT(_child_idx, _children.size()); _child_batch.reset(new RowBatch( - child(_child_idx)->row_desc(), state->batch_size(), mem_tracker())); + child(_child_idx)->row_desc(), state->batch_size(), mem_tracker().get())); _child_row_idx = 0; // open the current child unless it's the first child, which was already opened in // UnionNode::open(). diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp index 02906ac028ae8f..03b76d0d367cfa 100755 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ b/be/src/exprs/agg_fn_evaluator.cpp @@ -148,7 +148,7 @@ Status AggFnEvaluator::prepare( MemPool* pool, const SlotDescriptor* intermediate_slot_desc, const SlotDescriptor* output_slot_desc, - MemTracker* mem_tracker, + std::shared_ptr mem_tracker, FunctionContext** agg_fn_ctx) { DCHECK(pool != NULL); DCHECK(intermediate_slot_desc != NULL); @@ -160,7 +160,7 @@ Status AggFnEvaluator::prepare( _string_buffer_len = 0; _mem_tracker = mem_tracker; - Status status = Expr::prepare(_input_exprs_ctxs, state, desc, pool->mem_tracker()); + Status status = Expr::prepare(_input_exprs_ctxs, state, desc, _mem_tracker); RETURN_IF_ERROR(status); ObjectPool* obj_pool = state->obj_pool(); @@ -276,7 +276,7 @@ Status AggFnEvaluator::open(RuntimeState* state, FunctionContext* agg_fn_ctx) { void AggFnEvaluator::close(RuntimeState* state) { Expr::close(_input_exprs_ctxs, state); if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->release(_total_mem_consumption); + _mem_tracker->Release(_total_mem_consumption); } } @@ -459,7 +459,7 @@ void AggFnEvaluator::update_mem_limlits(int len) { _accumulated_mem_consumption += len; // per 16M , update mem_tracker one time if (UNLIKELY(_accumulated_mem_consumption > 16777216)) { - _mem_tracker->consume(_accumulated_mem_consumption); + _mem_tracker->Consume(_accumulated_mem_consumption); _total_mem_consumption += _accumulated_mem_consumption; _accumulated_mem_consumption = 0; } diff --git a/be/src/exprs/agg_fn_evaluator.h b/be/src/exprs/agg_fn_evaluator.h index eac15ab4b12a5f..c96c52e79df975 100755 --- a/be/src/exprs/agg_fn_evaluator.h +++ b/be/src/exprs/agg_fn_evaluator.h @@ -85,7 +85,7 @@ class AggFnEvaluator { MemPool* pool, const SlotDescriptor* intermediate_slot_desc, const SlotDescriptor* output_slot_desc, - MemTracker* mem_tracker, + std::shared_ptr mem_tracker, FunctionContext** agg_fn_ctx); Status open(RuntimeState* state, FunctionContext* agg_fn_ctx); @@ -213,7 +213,7 @@ class AggFnEvaluator { std::vector _input_exprs_ctxs; boost::scoped_array _string_buffer; //for count distinct int _string_buffer_len; //for count distinct - MemTracker* _mem_tracker; // saved c'tor param + std::shared_ptr _mem_tracker; // saved c'tor param const TypeDescriptor _return_type; const TypeDescriptor _intermediate_type; diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index b5ade12204ab8f..f71c5b3e9fb9c3 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -539,7 +539,7 @@ Status Expr::prepare( const std::vector& ctxs, RuntimeState* state, const RowDescriptor& row_desc, - MemTracker* tracker) { + std::shared_ptr tracker) { for (int i = 0; i < ctxs.size(); ++i) { RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker)); } @@ -871,7 +871,7 @@ void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) { Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, ObjectPool* pool, Expr** scalar_expr, - MemTracker* tracker) { + std::shared_ptr tracker) { *scalar_expr = nullptr; Expr* root; RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root)); @@ -894,7 +894,7 @@ Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, } Status Expr::create(const vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, vector* exprs, MemTracker* tracker) { + RuntimeState* state, ObjectPool* pool, vector* exprs, std::shared_ptr tracker) { exprs->clear(); for (const TExpr& texpr: texprs) { Expr* expr; @@ -906,12 +906,12 @@ Status Expr::create(const vector& texprs, const RowDescriptor& row_desc, } Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, - RuntimeState* state, Expr** scalar_expr, MemTracker* tracker) { + RuntimeState* state, Expr** scalar_expr, std::shared_ptr tracker) { return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr, tracker); } Status Expr::create(const vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, vector* exprs, MemTracker* tracker) { + RuntimeState* state, vector* exprs, std::shared_ptr tracker) { return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs, tracker); } diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index fa72194ad39776..38ea87b5b1424a 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -207,26 +207,26 @@ class Expr { /// tuple row descriptor of the input tuple row. On failure, 'expr' is set to NULL and /// the expr tree (if created) will be closed. Error status will be returned too. static Status create(const TExpr& texpr, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, Expr** expr, MemTracker* tracker); + RuntimeState* state, ObjectPool* pool, Expr** expr, std::shared_ptr tracker); /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is /// the tuple row descriptor of the input tuple row. Returns error status on failure. static Status create(const TExpr& texpr, const RowDescriptor& row_desc, - RuntimeState* state, Expr** expr, MemTracker* tracker); + RuntimeState* state, Expr** expr, std::shared_ptr tracker); /// Convenience functions creating multiple ScalarExpr. static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, std::vector* exprs, MemTracker* tracker); + RuntimeState* state, ObjectPool* pool, std::vector* exprs, std::shared_ptr tracker); /// Convenience functions creating multiple ScalarExpr. static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs, MemTracker* tracker); + RuntimeState* state, std::vector* exprs, std::shared_ptr tracker); /// Convenience function for preparing multiple expr trees. /// Allocations from 'ctxs' will be counted against 'tracker'. static Status prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc, MemTracker* tracker); + const RowDescriptor& row_desc, std::shared_ptr tracker); /// Convenience function for opening multiple expr trees. static Status open(const std::vector& ctxs, RuntimeState* state); diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 99506f2df7227a..ba7a572bd36969 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -51,13 +51,13 @@ ExprContext::~ExprContext() { // TODO(zc): memory tracker Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc, - MemTracker* tracker) { - DCHECK(tracker != NULL) << std::endl << get_stack_trace(); + std::shared_ptr tracker) { + DCHECK(tracker != nullptr) << std::endl << get_stack_trace(); DCHECK(_pool.get() == NULL); _prepared = true; - // TODO: use param tracker to replace instance_mem_tracker + // TODO: use param tracker to replace instance_mem_tracker, be careful about tracker's life cycle // _pool.reset(new MemPool(new MemTracker(-1))); - _pool.reset(new MemPool(state->instance_mem_tracker())); + _pool.reset(new MemPool(state->instance_mem_tracker().get())); return _root->prepare(state, row_desc, this); } diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index ebc0822e1040fa..8e62b35651362f 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -52,7 +52,7 @@ class ExprContext { /// Prepare expr tree for evaluation. /// Allocations from this context will be counted against 'tracker'. Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - MemTracker* tracker); + std::shared_ptr tracker); /// Must be called after calling Prepare(). Does not need to be called on clones. /// Idempotent (this allows exprs to be opened multiple times in subplans without diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc index 56409d79abc195..a969a0b0e602d6 100644 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ b/be/src/exprs/new_agg_fn_evaluator.cc @@ -89,7 +89,7 @@ typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); const int DEFAULT_MULTI_DISTINCT_COUNT_STRING_BUFFER_SIZE = 1024; -NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, MemTracker* tracker, bool is_clone) +NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, std::shared_ptr tracker, bool is_clone) : _total_mem_consumption(0), _accumulated_mem_consumption(0), is_clone_(is_clone), @@ -100,7 +100,7 @@ NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, Mem NewAggFnEvaluator::~NewAggFnEvaluator() { if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->release(_total_mem_consumption); + _mem_tracker->Release(_total_mem_consumption); } DCHECK(closed_); } @@ -115,7 +115,7 @@ const TypeDescriptor& NewAggFnEvaluator::intermediate_type() const { Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** result, - MemTracker* tracker, const RowDescriptor& row_desc) { + std::shared_ptr tracker, const RowDescriptor& row_desc) { *result = nullptr; // Create a new AggFn evaluator. @@ -169,7 +169,7 @@ Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, Objec Status NewAggFnEvaluator::Create(const vector& agg_fns, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, vector* evals, - MemTracker* tracker, const RowDescriptor& row_desc) { + std::shared_ptr tracker, const RowDescriptor& row_desc) { for (const AggFn* agg_fn : agg_fns) { NewAggFnEvaluator* agg_fn_eval; RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h index 5d482e92cd027c..d3f11de166d90f 100644 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ b/be/src/exprs/new_agg_fn_evaluator.h @@ -69,13 +69,13 @@ class NewAggFnEvaluator { /// from 'mem_pool'. Note that it's the responsibility to call Close() all evaluators /// even if this function returns error status on initialization failure. static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, - MemPool* mem_pool, NewAggFnEvaluator** eval, MemTracker* tracker, + MemPool* mem_pool, NewAggFnEvaluator** eval, std::shared_ptr tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; /// Convenience functions for creating evaluators for multiple aggregate functions. static Status Create(const std::vector& agg_fns, RuntimeState* state, ObjectPool* pool, MemPool* mem_pool, std::vector* evals, - MemTracker* tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; + std::shared_ptr tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT; ~NewAggFnEvaluator(); @@ -223,7 +223,7 @@ class NewAggFnEvaluator { /// Owned by the exec node which owns this evaluator. MemPool* mem_pool_ = nullptr; - MemTracker* _mem_tracker; // saved c'tor param + std::shared_ptr _mem_tracker; // saved c'tor param /// This contains runtime state such as constant input arguments to the aggregate /// functions and a FreePool from which the intermediate values are allocated. @@ -245,7 +245,7 @@ class NewAggFnEvaluator { doris_udf::AnyVal* staging_merge_input_val_ = nullptr; /// Use Create() instead. - NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, MemTracker* tracker, bool is_clone); + NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, std::shared_ptr tracker, bool is_clone); /// Return the intermediate type of the aggregate function. inline const SlotDescriptor& intermediate_slot_desc() const; diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 0ebbbc0010645a..d87b33b687cbb2 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -75,9 +75,9 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* } // Registered to handle "/memz", and prints out memory allocation statistics. -void mem_usage_handler(MemTracker* mem_tracker, const WebPageHandler::ArgumentMap& args, +void mem_usage_handler(std::shared_ptr mem_tracker, const WebPageHandler::ArgumentMap& args, std::stringstream* output) { - if (mem_tracker != NULL) { + if (mem_tracker != nullptr) { (*output) << "
"
                   << "Mem Limit: " << PrettyPrinter::print(mem_tracker->limit(), TUnit::BYTES)
                   << std::endl
@@ -103,7 +103,7 @@ void mem_usage_handler(MemTracker* mem_tracker, const WebPageHandler::ArgumentMa
 #endif
 }
 
-void add_default_path_handlers(WebPageHandler* web_page_handler, MemTracker* process_mem_tracker) {
+void add_default_path_handlers(WebPageHandler* web_page_handler, std::shared_ptr process_mem_tracker) {
     // TODO(yingchun): logs_handler is not implemented yet, so not show it on navigate bar
     web_page_handler->register_page("/logs", "Logs", logs_handler, false /* is_on_nav_bar */);
     web_page_handler->register_page("/varz", "Configs", config_handler, true /* is_on_nav_bar */);
diff --git a/be/src/http/default_path_handlers.h b/be/src/http/default_path_handlers.h
index 06f5c3f9bbfd03..af13d3e5c9666e 100644
--- a/be/src/http/default_path_handlers.h
+++ b/be/src/http/default_path_handlers.h
@@ -19,6 +19,7 @@
 #define DORIS_BE_SRC_COMMON_UTIL_DEFAULT_PATH_HANDLERS_H
 
 #include 
+#include 
 
 namespace doris {
 
@@ -27,7 +28,7 @@ class WebPageHandler;
 
 // Adds a set of default path handlers to the webserver to display
 // logs and configuration flags
-void add_default_path_handlers(WebPageHandler* web_page_handler, MemTracker* process_mem_tracker);
+void add_default_path_handlers(WebPageHandler* web_page_handler, std::shared_ptr process_mem_tracker);
 }
 
 #endif // IMPALA_UTIL_DEFAULT_PATH_HANDLERS_H
diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h
index 7e7df6917530c8..ecd46e0ce06993 100644
--- a/be/src/olap/aggregate_func.h
+++ b/be/src/olap/aggregate_func.h
@@ -460,7 +460,7 @@ struct AggregateFuncTraitsdata = reinterpret_cast(hll);
 
-        mem_pool->mem_tracker()->consume(sizeof(HyperLogLog));
+        mem_pool->mem_tracker()->Consume(sizeof(HyperLogLog));
 
         agg_pool->add(hll);
     }
@@ -507,7 +507,7 @@ struct AggregateFuncTraitsdata = (char*) bitmap;
 
-        mem_pool->mem_tracker()->consume(sizeof(BitmapValue));
+        mem_pool->mem_tracker()->Consume(sizeof(BitmapValue));
 
         agg_pool->add(bitmap);
     }
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 0a89bc41a0ed5b..a549321de9434f 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -27,17 +27,22 @@
 
 namespace doris {
 
-OLAPStatus DeltaWriter::open(WriteRequest* req, MemTracker* mem_tracker, DeltaWriter** writer) {
-    *writer = new DeltaWriter(req, mem_tracker, StorageEngine::instance());
+OLAPStatus DeltaWriter::open(WriteRequest* req, std::shared_ptr parent, DeltaWriter** writer) {
+    *writer = new DeltaWriter(req, parent, StorageEngine::instance());
     return OLAP_SUCCESS;
 }
 
-DeltaWriter::DeltaWriter(WriteRequest* req, MemTracker* parent, StorageEngine* storage_engine) :
-        _req(*req), _tablet(nullptr), _cur_rowset(nullptr), _new_rowset(nullptr),
-        _new_tablet(nullptr), _rowset_writer(nullptr), _tablet_schema(nullptr),
-        _delta_written_success(false), _storage_engine(storage_engine) {
-    _mem_tracker.reset(new MemTracker(-1, "delta writer", parent));
-}
+DeltaWriter::DeltaWriter(WriteRequest* req, std::shared_ptr parent, StorageEngine* storage_engine)
+        : _req(*req),
+          _tablet(nullptr),
+          _cur_rowset(nullptr),
+          _new_rowset(nullptr),
+          _new_tablet(nullptr),
+          _rowset_writer(nullptr),
+          _tablet_schema(nullptr),
+          _delta_written_success(false),
+          _storage_engine(storage_engine),
+          _mem_tracker(new MemTracker(-1, "DeltaWriter", parent)) {}
 
 DeltaWriter::~DeltaWriter() {
     if (_is_init && !_delta_written_success) {
@@ -195,7 +200,7 @@ OLAPStatus DeltaWriter::flush_memtable_and_wait() {
 void DeltaWriter::_reset_mem_table() {
     _mem_table.reset(new MemTable(_tablet->tablet_id(), _schema.get(), _tablet_schema, _req.slots,
                                   _req.tuple_desc, _tablet->keys_type(), _rowset_writer.get(),
-                                  _mem_tracker.get()));
+                                  _mem_tracker));
 }
 
 OLAPStatus DeltaWriter::close() {
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index 64828d59c975f3..034ecc7f01a4b6 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -56,7 +56,7 @@ struct WriteRequest {
 // This class is NOT thread-safe, external synchronization is required.
 class DeltaWriter {
 public:
-    static OLAPStatus open(WriteRequest* req, MemTracker* mem_tracker, DeltaWriter** writer);
+    static OLAPStatus open(WriteRequest* req, std::shared_ptr parent, DeltaWriter** writer);
 
     ~DeltaWriter();
 
@@ -83,7 +83,7 @@ class DeltaWriter {
     int64_t mem_consumption() const;
 
 private:
-    DeltaWriter(WriteRequest* req, MemTracker* parent, StorageEngine* storage_engine);
+    DeltaWriter(WriteRequest* req, std::shared_ptr parent, StorageEngine* storage_engine);
 
     // push a full memtable to flush executor
     OLAPStatus _flush_memtable_async();
@@ -107,7 +107,7 @@ class DeltaWriter {
 
     StorageEngine* _storage_engine;
     std::unique_ptr _flush_token;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 };
 
 }  // namespace doris
diff --git a/be/src/olap/fs/file_block_manager.cpp b/be/src/olap/fs/file_block_manager.cpp
index 255d22e0b2f581..0d0cbc9702421f 100644
--- a/be/src/olap/fs/file_block_manager.cpp
+++ b/be/src/olap/fs/file_block_manager.cpp
@@ -381,7 +381,7 @@ Status FileReadableBlock::readv(uint64_t offset, const Slice* results, size_t re
 FileBlockManager::FileBlockManager(Env* env, BlockManagerOptions opts) :
         _env(DCHECK_NOTNULL(env)),
         _opts(std::move(opts)),
-        _mem_tracker(new MemTracker(-1, "file_block_manager", _opts.parent_mem_tracker.get())) {
+        _mem_tracker(new MemTracker(-1, "file_block_manager", _opts.parent_mem_tracker)) {
     if (_opts.enable_metric) {
         _metrics.reset(new internal::BlockManagerMetrics());
     }
diff --git a/be/src/olap/fs/file_block_manager.h b/be/src/olap/fs/file_block_manager.h
index 5f12aa4f2fccaa..4a4782b80e3a13 100644
--- a/be/src/olap/fs/file_block_manager.h
+++ b/be/src/olap/fs/file_block_manager.h
@@ -110,7 +110,7 @@ class FileBlockManager : public BlockManager {
 
     // Tracks memory consumption of any allocations numerous enough to be
     // interesting.
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // DISALLOW_COPY_AND_ASSIGN(FileBlockManager);
 
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 2dd059bd9294fc..88eaf162c73be0 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -31,22 +31,20 @@ namespace doris {
 
 MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
                    const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-                   KeysType keys_type, RowsetWriter* rowset_writer, MemTracker* mem_tracker)
-    : _tablet_id(tablet_id),
-      _schema(schema),
-      _tablet_schema(tablet_schema),
-      _tuple_desc(tuple_desc),
-      _slot_descs(slot_descs),
-      _keys_type(keys_type),
-      _row_comparator(_schema),
-      _rowset_writer(rowset_writer) {
-
-    _schema_size = _schema->schema_size();
-    _mem_tracker.reset(new MemTracker(-1, "memtable", mem_tracker));
-    _buffer_mem_pool.reset(new MemPool(_mem_tracker.get()));
-    _table_mem_pool.reset(new MemPool(_mem_tracker.get()));
-    _skip_list = new Table(_row_comparator, _table_mem_pool.get(), _keys_type == KeysType::DUP_KEYS);
-}
+                   KeysType keys_type, RowsetWriter* rowset_writer, std::shared_ptr parent_tracker)
+        : _tablet_id(tablet_id),
+          _schema(schema),
+          _tablet_schema(tablet_schema),
+          _tuple_desc(tuple_desc),
+          _slot_descs(slot_descs),
+          _keys_type(keys_type),
+          _row_comparator(_schema),
+          _mem_tracker(new MemTracker(-1, "MemTable", parent_tracker)),
+          _buffer_mem_pool(new MemPool(_mem_tracker.get())),
+          _table_mem_pool(new MemPool(_mem_tracker.get())),
+          _schema_size(_schema->schema_size()),
+          _skip_list(new Table(_row_comparator, _table_mem_pool.get(), _keys_type == KeysType::DUP_KEYS)),
+          _rowset_writer(rowset_writer) {}
 
 MemTable::~MemTable() {
     delete _skip_list;
diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h
index a7142ac5127347..00b7009f65c395 100644
--- a/be/src/olap/memtable.h
+++ b/be/src/olap/memtable.h
@@ -39,7 +39,7 @@ class MemTable {
 public:
     MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
              const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-             KeysType keys_type, RowsetWriter* rowset_writer, MemTracker* mem_tracker);
+             KeysType keys_type, RowsetWriter* rowset_writer, std::shared_ptr parent_tracker);
     ~MemTable();
 
     int64_t tablet_id() const { return _tablet_id; }
@@ -72,7 +72,7 @@ class MemTable {
     KeysType _keys_type;
 
     RowCursorComparator _row_comparator;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     // This is a buffer, to hold the memory referenced by the rows that have not
     // been inserted into the SkipList
     std::unique_ptr _buffer_mem_pool;
diff --git a/be/src/olap/merger.cpp b/be/src/olap/merger.cpp
index e6e6e1b4f801e5..c8b262c515881c 100644
--- a/be/src/olap/merger.cpp
+++ b/be/src/olap/merger.cpp
@@ -48,7 +48,7 @@ OLAPStatus Merger::merge_rowsets(TabletSharedPtr tablet,
                  "failed to init row cursor when merging rowsets of tablet " + tablet->full_name());
     row_cursor.allocate_memory_for_string_type(tablet->tablet_schema());
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
 
     // The following procedure would last for long time, half of one day, etc.
diff --git a/be/src/olap/olap_index.cpp b/be/src/olap/olap_index.cpp
index 3976bc894064ef..4b549030de1a1d 100644
--- a/be/src/olap/olap_index.cpp
+++ b/be/src/olap/olap_index.cpp
@@ -35,14 +35,13 @@ using std::vector;
 namespace doris {
 
 MemIndex::MemIndex()
-    : _key_length(0),
-      _num_entries(0),
-      _index_size(0),
-      _data_size(0),
-      _num_rows(0) {
-    _tracker.reset(new MemTracker(-1));
-    _mem_pool.reset(new MemPool(_tracker.get()));
-}
+        : _key_length(0),
+          _num_entries(0),
+          _index_size(0),
+          _data_size(0),
+          _num_rows(0),
+          _tracker(new MemTracker(-1)),
+          _mem_pool(new MemPool(_tracker.get())) {}
 
 MemIndex::~MemIndex() {
     _num_entries = 0;
diff --git a/be/src/olap/olap_index.h b/be/src/olap/olap_index.h
index 0db0e97ec53059..c4d3714d9ddf52 100644
--- a/be/src/olap/olap_index.h
+++ b/be/src/olap/olap_index.h
@@ -330,7 +330,7 @@ class MemIndex {
     size_t _num_rows;
     std::vector* _short_key_columns;
 
-    std::unique_ptr _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _mem_pool;
     DISALLOW_COPY_AND_ASSIGN(MemIndex);
 };
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index f4ffa59ae9d49b..fe3c8ff02de1d7 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -201,7 +201,7 @@ class Reader {
     TabletSharedPtr tablet() { return _tablet; }
 
 private:
-    std::unique_ptr _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _predicate_mem_pool;
     std::set _load_bf_columns;
     std::vector _return_columns;
diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp
index aa00e599c6027e..cf00d159748841 100644
--- a/be/src/olap/row_block.cpp
+++ b/be/src/olap/row_block.cpp
@@ -37,12 +37,11 @@ using std::vector;
 
 namespace doris {
 
-RowBlock::RowBlock(const TabletSchema* schema) :
-        _capacity(0),
-        _schema(schema) {
-    _tracker.reset(new MemTracker(-1));
-    _mem_pool.reset(new MemPool(_tracker.get()));
-}
+RowBlock::RowBlock(const TabletSchema* schema)
+        : _capacity(0),
+          _schema(schema),
+          _tracker(new MemTracker(-1)),
+          _mem_pool(new MemPool(_tracker.get())) {}
 
 RowBlock::~RowBlock() {
     delete[] _mem_buf;
diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h
index cb991284360ec6..5548f354dcd9bf 100644
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@@ -137,7 +137,7 @@ class RowBlock {
     size_t _limit = 0;
     uint8_t _block_status = DEL_PARTIAL_SATISFIED;
 
-    std::unique_ptr _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _mem_pool;
     // 由于内部持有内存资源,所以这里禁止拷贝和赋值
     DISALLOW_COPY_AND_ASSIGN(RowBlock);
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 1ab87828829e3f..c70477dcbdab20 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -27,19 +27,20 @@ using strings::Substitute;
 namespace doris {
 
 RowBlockV2::RowBlockV2(const Schema& schema, uint16_t capacity)
-    : _schema(schema),
-      _capacity(capacity),
-      _column_datas(_schema.num_columns(), nullptr),
-      _column_null_bitmaps(_schema.num_columns(), nullptr),
-      _pool(new MemPool(&_tracker)),
-      _selection_vector(nullptr) {
+        : _schema(schema),
+          _capacity(capacity),
+          _column_datas(_schema.num_columns(), nullptr),
+          _column_null_bitmaps(_schema.num_columns(), nullptr),
+          _tracker(new MemTracker(-1, "RowBlockV2")),
+          _pool(new MemPool(_tracker.get())),
+          _selection_vector(nullptr) {
     auto bitmap_size = BitmapSize(capacity);
     for (auto cid : _schema.column_ids()) {
         size_t data_size = _schema.column(cid)->type_info()->size() * _capacity;
         _column_datas[cid] = new uint8_t[data_size];
 
         if (_schema.column(cid)->is_nullable()) {
-            _column_null_bitmaps[cid] = new uint8_t[bitmap_size];;
+            _column_null_bitmaps[cid] = new uint8_t[bitmap_size];
         }
     }
     _selection_vector = new uint16_t[_capacity];
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index 671659232be9e2..374c00f4b09c8e 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -123,7 +123,7 @@ class RowBlockV2 {
     std::vector _column_null_bitmaps;
     size_t _num_rows;
     // manages the memory for slice's data
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _pool;
 
     // index of selected rows for rows passed the predicate
diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp
index b6dddbfb2b41cb..9140b58fa9b10c 100644
--- a/be/src/olap/rowset/segment_reader.cpp
+++ b/be/src/olap/rowset/segment_reader.cpp
@@ -32,44 +32,37 @@ namespace doris {
 
 static const uint32_t MIN_FILTER_BLOCK_NUM = 10;
 
-SegmentReader::SegmentReader(
-        const std::string file,
-        SegmentGroup* segment_group,
-        uint32_t segment_id,
-        const std::vector& used_columns,
-        const std::set& load_bf_columns,
-        const Conditions* conditions,
-        const DeleteHandler* delete_handler,
-        const DelCondSatisfied delete_status,
-        Cache* lru_cache,
-        RuntimeState* runtime_state,
-        OlapReaderStatistics* stats) :
-        _file_name(file),
-        _segment_group(segment_group),
-        _segment_id(segment_id),
-        _used_columns(used_columns),
-        _load_bf_columns(load_bf_columns),
-        _conditions(conditions),
-        _delete_handler(delete_handler),
-        _delete_status(delete_status),
-        _eof(false),
-        _end_block(-1),
-        // 确保第一次调用_move_to_next_row,会执行seek_to_block
-        _block_count(0),
-        _num_rows_in_block(0),
-        _null_supported(false),
-        _mmap_buffer(NULL),
-        _include_blocks(NULL),
-        _is_using_mmap(false),
-        _is_data_loaded(false),
-        _buffer_size(0),
-        _shared_buffer(NULL),
-        _lru_cache(lru_cache),
-        _runtime_state(runtime_state),
-        _stats(stats) {
-    _tracker.reset(new MemTracker(-1));
-    _mem_pool.reset(new MemPool(_tracker.get()));
-}
+SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group,
+                             uint32_t segment_id, const std::vector& used_columns,
+                             const std::set& load_bf_columns,
+                             const Conditions* conditions, const DeleteHandler* delete_handler,
+                             const DelCondSatisfied delete_status, Cache* lru_cache,
+                             RuntimeState* runtime_state, OlapReaderStatistics* stats)
+        : _file_name(file),
+          _segment_group(segment_group),
+          _segment_id(segment_id),
+          _used_columns(used_columns),
+          _load_bf_columns(load_bf_columns),
+          _conditions(conditions),
+          _delete_handler(delete_handler),
+          _delete_status(delete_status),
+          _eof(false),
+          _end_block(-1),
+          // 确保第一次调用_move_to_next_row,会执行seek_to_block
+          _block_count(0),
+          _num_rows_in_block(0),
+          _null_supported(false),
+          _mmap_buffer(NULL),
+          _include_blocks(NULL),
+          _is_using_mmap(false),
+          _is_data_loaded(false),
+          _buffer_size(0),
+          _tracker(new MemTracker(-1)),
+          _mem_pool(new MemPool(_tracker.get())),
+          _shared_buffer(NULL),
+          _lru_cache(lru_cache),
+          _runtime_state(runtime_state),
+          _stats(stats) {}
 
 SegmentReader::~SegmentReader() {
     SAFE_DELETE(_shared_buffer);
@@ -258,7 +251,7 @@ OLAPStatus SegmentReader::seek_to_block(
 
         if (_runtime_state != NULL) {
             MemTracker::update_limits(_buffer_size, _runtime_state->mem_trackers());
-            if (MemTracker::limit_exceeded(*_runtime_state->mem_trackers())) {
+            if (MemTracker::limit_exceeded(_runtime_state->mem_trackers())) {
                 return OLAP_ERR_FETCH_MEMORY_EXCEEDED;
             }
         }
diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h
index 542fbe783ba8db..69018de228bb7e 100644
--- a/be/src/olap/rowset/segment_reader.h
+++ b/be/src/olap/rowset/segment_reader.h
@@ -343,7 +343,7 @@ class SegmentReader {
     std::vector _cache_handle;
     const FileHeader* _file_header;
 
-    std::unique_ptr _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _mem_pool;
 
     StorageByteBuffer* _shared_buffer;
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index f94c528e4b0285..961adfc76e6cbd 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -33,7 +33,8 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
     _data_page_builder(nullptr),
     _dict_builder(nullptr),
     _encoding_type(DICT_ENCODING),
-    _pool(&_tracker) {
+    _tracker(new MemTracker()),
+    _pool(_tracker.get()) {
     // initially use DICT_ENCODING
     // TODO: the data page builder type can be created by Factory according to user config
     _data_page_builder.reset(new BitshufflePageBuilder(options));
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h
index 4fbe946c936f9c..635fe590c02c86 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h
@@ -91,7 +91,7 @@ class BinaryDictPageBuilder : public PageBuilder {
     // used to remember the insertion order of dict keys
     std::vector _dict_items;
     // TODO(zc): rethink about this mem pool
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
     faststring _buffer;
     faststring _first_value;
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
index 8ac3e5e24548a0..e8ff04305495bd 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h
@@ -73,12 +73,12 @@ class BitmapIndexReader {
 class BitmapIndexIterator {
 public:
     explicit BitmapIndexIterator(BitmapIndexReader* reader)
-        : _reader(reader),
-          _dict_column_iter(reader->_dict_column_reader.get()),
-          _bitmap_column_iter(reader->_bitmap_column_reader.get()),
-          _current_rowid(0),
-          _pool(new MemPool(&_tracker)) {
-    }
+            : _reader(reader),
+              _dict_column_iter(reader->_dict_column_reader.get()),
+              _bitmap_column_iter(reader->_bitmap_column_reader.get()),
+              _current_rowid(0),
+              _tracker(new MemTracker()),
+              _pool(new MemPool(_tracker.get())) {}
 
     bool has_null_bitmap() const { return _reader->_has_null; }
 
@@ -119,7 +119,7 @@ class BitmapIndexIterator {
     IndexedColumnIterator _dict_column_iter;
     IndexedColumnIterator _bitmap_column_iter;
     rowid_t _current_rowid;
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _pool;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
index eb98767f72f1d0..62c5328f975cf1 100644
--- a/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bitmap_index_writer.cpp
@@ -65,7 +65,10 @@ class BitmapIndexWriterImpl : public BitmapIndexWriter {
     using MemoryIndexType = typename BitmapIndexTraits::MemoryIndexType;
 
     explicit BitmapIndexWriterImpl(const TypeInfo* typeinfo)
-        : _typeinfo(typeinfo), _reverted_index_size(0), _tracker(), _pool(&_tracker) {}
+            : _typeinfo(typeinfo),
+              _reverted_index_size(0),
+              _tracker(new MemTracker()),
+              _pool(_tracker.get()) {}
 
     ~BitmapIndexWriterImpl() = default;
 
@@ -183,7 +186,7 @@ class BitmapIndexWriterImpl : public BitmapIndexWriter {
     Roaring _null_bitmap;
     // unique value to its row id list
     MemoryIndexType _mem_index;
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
index d50b101802056d..cf3e3c23c91b39 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h
@@ -72,7 +72,8 @@ class BloomFilterIndexIterator {
     explicit BloomFilterIndexIterator(BloomFilterIndexReader* reader)
         : _reader(reader),
           _bloom_filter_iter(reader->_bloom_filter_reader.get()),
-          _pool(new MemPool(&_tracker)) {
+          _tracker(new MemTracker()),
+          _pool(new MemPool(_tracker.get())) {
     }
 
     // Read bloom filter at the given ordinal into `bf`.
@@ -85,7 +86,7 @@ class BloomFilterIndexIterator {
 private:
     BloomFilterIndexReader* _reader;
     IndexedColumnIterator _bloom_filter_iter;
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _pool;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
index 9b0ae1303873ce..0044622567a45a 100644
--- a/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp
@@ -71,9 +71,13 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
     using ValueDict = typename BloomFilterTraits::ValueDict;
 
     explicit BloomFilterIndexWriterImpl(const BloomFilterOptions& bf_options,
-            const TypeInfo* typeinfo)
-        : _bf_options(bf_options), _typeinfo(typeinfo),
-          _tracker(), _pool(&_tracker), _has_null(false), _bf_buffer_size(0) { }
+                                        const TypeInfo* typeinfo)
+            : _bf_options(bf_options),
+              _typeinfo(typeinfo),
+              _tracker(new MemTracker(-1, "BloomFilterIndexWriterImpl")),
+              _pool(_tracker.get()),
+              _has_null(false),
+              _bf_buffer_size(0) {}
 
     ~BloomFilterIndexWriterImpl() = default;
 
@@ -164,7 +168,7 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter {
 private:
     BloomFilterOptions _bf_options;
     const TypeInfo* _typeinfo;
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
     bool _has_null;
     uint64_t _bf_buffer_size;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index 1d2ac88058bdb9..65d4d4945a4d53 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -309,14 +309,16 @@ class FileColumnIterator : public ColumnIterator {
 class DefaultValueColumnIterator : public ColumnIterator {
 public:
     DefaultValueColumnIterator(bool has_default_value, const std::string& default_value,
-            bool is_nullable, FieldType type, size_t schema_length) : _has_default_value(has_default_value),
-                                                _default_value(default_value),
-                                                _is_nullable(is_nullable),
-                                                _type(type),
-                                                _schema_length(schema_length),
-                                                _is_default_value_null(false),
-                                                _type_size(0),
-                                                _pool(new MemPool(&_tracker)){ }
+                               bool is_nullable, FieldType type, size_t schema_length)
+            : _has_default_value(has_default_value),
+              _default_value(default_value),
+              _is_nullable(is_nullable),
+              _type(type),
+              _schema_length(schema_length),
+              _is_default_value_null(false),
+              _type_size(0),
+              _tracker(new MemTracker()),
+              _pool(new MemPool(_tracker.get())) {}
 
     Status init(const ColumnIteratorOptions& opts) override;
 
@@ -343,7 +345,7 @@ class DefaultValueColumnIterator : public ColumnIterator {
     bool _is_default_value_null;
     size_t _type_size;
     void* _mem_value = nullptr;
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _pool;
 
     // current rowid
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index 7082204ab448e9..1a4bf2dc369a50 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -37,13 +37,12 @@ namespace doris {
 namespace segment_v2 {
 
 IndexedColumnWriter::IndexedColumnWriter(const IndexedColumnWriterOptions& options,
-                                         const TypeInfo* typeinfo,
-                                         fs::WritableBlock* wblock)
+                                         const TypeInfo* typeinfo, fs::WritableBlock* wblock)
         : _options(options),
           _typeinfo(typeinfo),
           _wblock(wblock),
-          _mem_tracker(-1),
-          _mem_pool(&_mem_tracker),
+          _mem_tracker(new MemTracker()),
+          _mem_pool(_mem_tracker.get()),
           _num_values(0),
           _num_data_pages(0),
           _validx_key_coder(nullptr),
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index d84347a59c4a2e..d2704d1069041f 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -92,7 +92,7 @@ class IndexedColumnWriter {
     const TypeInfo* _typeinfo;
     fs::WritableBlock* _wblock;
     // only used for `_first_value`
-    MemTracker _mem_tracker;
+    std::shared_ptr _mem_tracker;
     MemPool _mem_pool;
 
     ordinal_t _num_values;
diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
index 52066767821512..83cd124423e8a3 100644
--- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp
+++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp
@@ -31,7 +31,8 @@ namespace doris {
 
 namespace segment_v2 {
 
-ZoneMapIndexWriter::ZoneMapIndexWriter(Field* field) : _field(field), _pool(&_tracker) {
+ZoneMapIndexWriter::ZoneMapIndexWriter(Field* field)
+        : _field(field), _tracker(new MemTracker(-1, "ZoneMapIndexWriter")), _pool(_tracker.get()) {
     _page_zone_map.min_value = _field->allocate_value(&_pool);
     _page_zone_map.max_value = _field->allocate_value(&_pool);
     _reset_zone_map(&_page_zone_map);
@@ -114,8 +115,8 @@ Status ZoneMapIndexReader::load(bool use_page_cache, bool kept_in_memory) {
     RETURN_IF_ERROR(reader.load(use_page_cache, kept_in_memory));
     IndexedColumnIterator iter(&reader);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    auto tracker = std::make_shared(-1, "temp in ZoneMapIndexReader");
+    MemPool pool(tracker.get());
     _page_zone_maps.resize(reader.num_values());
 
     // read and cache all page zone maps
diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h
index 3070cccd6ba668..0d47229372e571 100644
--- a/be/src/olap/rowset/segment_v2/zone_map_index.h
+++ b/be/src/olap/rowset/segment_v2/zone_map_index.h
@@ -96,7 +96,7 @@ class ZoneMapIndexWriter {
     ZoneMap _segment_zone_map;
     // TODO(zc): we should replace this memory pool later, we only allocate min/max
     // for field. But MemPool allocate 4KB least, it will a waste for most cases.
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
 
     // serialized ZoneMapPB for each data page
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 8dcd8d7c8f226c..37bcca8c048a68 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -806,7 +806,7 @@ bool RowBlockMerger::merge(const vector& row_block_arr, RowsetWriter*
                            uint64_t* merged_rows) {
     uint64_t tmp_merged_rows = 0;
     RowCursor row_cursor;
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     std::unique_ptr agg_object_pool(new ObjectPool());
     if (row_cursor.init(_tablet->tablet_schema()) != OLAP_SUCCESS) {
diff --git a/be/src/olap/task/engine_checksum_task.cpp b/be/src/olap/task/engine_checksum_task.cpp
index d4624ad2008361..c201e5aae289ee 100644
--- a/be/src/olap/task/engine_checksum_task.cpp
+++ b/be/src/olap/task/engine_checksum_task.cpp
@@ -91,7 +91,7 @@ OLAPStatus EngineChecksumTask::_compute_checksum() {
     }
 
     RowCursor row;
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     std::unique_ptr agg_object_pool(new ObjectPool());
     res = row.init(tablet->tablet_schema(), reader_params.return_columns);
diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc
index 18f5b6383d07bf..3d634a23817f6c 100644
--- a/be/src/runtime/buffered_block_mgr2.cc
+++ b/be/src/runtime/buffered_block_mgr2.cc
@@ -53,16 +53,16 @@ SpinLock BufferedBlockMgr2::_s_block_mgrs_lock;
 
 class BufferedBlockMgr2::Client {
 public:
-    Client(BufferedBlockMgr2* mgr, int num_reserved_buffers, MemTracker* tracker,
-            RuntimeState* state) :
-            _mgr(mgr),
-            _state(state),
-            _tracker(tracker),
-            _query_tracker(_mgr->_mem_tracker->parent()),
-            _num_reserved_buffers(num_reserved_buffers),
-            _num_tmp_reserved_buffers(0),
-            _num_pinned_buffers(0) {
-        DCHECK(tracker != NULL);
+    Client(BufferedBlockMgr2* mgr, int num_reserved_buffers,
+           const std::shared_ptr& tracker, RuntimeState* state)
+            : _mgr(mgr),
+              _state(state),
+              _tracker(tracker),
+              _query_tracker(new MemTracker(-1, "BufferedBlockMgr2", _mgr->_mem_tracker->parent())),
+              _num_reserved_buffers(num_reserved_buffers),
+              _num_tmp_reserved_buffers(0),
+              _num_pinned_buffers(0) {
+        DCHECK(tracker != nullptr);
     }
 
     // A null dtor to pass codestyle check
@@ -81,11 +81,11 @@ class BufferedBlockMgr2::Client {
     // enforced. Even when we give a buffer to a client, the buffer is still owned and
     // counts against the block mgr tracker (i.e. there is a fixed pool of buffers
     // regardless of if they are in the block mgr or the clients).
-    MemTracker* _tracker;
+    std::shared_ptr _tracker;
 
     // This is the common ancestor between the block mgr tracker and the client tracker.
     // When memory is transferred to the client, we want it to stop at this tracker.
-    MemTracker* _query_tracker;
+    std::shared_ptr _query_tracker;
 
     // Number of buffers reserved by this client.
     int _num_reserved_buffers;
@@ -100,8 +100,8 @@ class BufferedBlockMgr2::Client {
         DCHECK(buffer != NULL);
         if (buffer->len == _mgr->max_block_size()) {
             ++_num_pinned_buffers;
-            _tracker->consume_local(buffer->len, _query_tracker);
-            // _tracker->consume(buffer->len);
+            _tracker->ConsumeLocal(buffer->len, _query_tracker.get());
+            // _tracker->Consume(buffer->len);
         }
     }
 
@@ -110,8 +110,8 @@ class BufferedBlockMgr2::Client {
         if (buffer->len == _mgr->max_block_size()) {
             DCHECK_GT(_num_pinned_buffers, 0);
             --_num_pinned_buffers;
-            _tracker->release_local(buffer->len, _query_tracker);
-            // _tracker->release(buffer->len);
+            _tracker->ReleaseLocal(buffer->len, _query_tracker.get());
+            // _tracker->Release(buffer->len);
         }
     }
 
@@ -223,11 +223,11 @@ BufferedBlockMgr2::BufferedBlockMgr2(RuntimeState* state, TmpFileMgr* tmp_file_m
 }
 
 Status BufferedBlockMgr2::create(
-        RuntimeState* state, MemTracker* parent,
+        RuntimeState* state, const std::shared_ptr& parent,
         RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr,
         int64_t mem_limit, int64_t block_size,
-        shared_ptr* block_mgr) {
-    DCHECK(parent != NULL);
+        boost::shared_ptr* block_mgr) {
+    DCHECK(parent != nullptr);
     block_mgr->reset();
     {
         // we do not use global BlockMgrsMap for now, to avoid mem-exceeded different fragments
@@ -263,13 +263,13 @@ int64_t BufferedBlockMgr2::available_buffers(Client* client) const {
 int64_t BufferedBlockMgr2::remaining_unreserved_buffers() const {
     int64_t num_buffers = _free_io_buffers.size() +
         _unpinned_blocks.size() + _non_local_outstanding_writes;
-    num_buffers += _mem_tracker->spare_capacity() / max_block_size();
+    num_buffers += _mem_tracker->SpareCapacity(MemLimit::HARD) / max_block_size();
     num_buffers -= _unfullfilled_reserved_buffers;
     return num_buffers;
 }
 
 Status BufferedBlockMgr2::register_client(
-        int num_reserved_buffers, MemTracker* tracker,
+        int num_reserved_buffers, const std::shared_ptr& tracker,
         RuntimeState* state, Client** client) {
     DCHECK_GE(num_reserved_buffers, 0);
     Client* a_client = new Client(this, num_reserved_buffers, tracker, state);
@@ -324,10 +324,10 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) {
     DCHECK_GT(buffers_needed, 0) << "Trying to consume 0 memory";
     unique_lock lock(_lock);
 
-    if (size < max_block_size() && _mem_tracker->try_consume(size)) {
+    if (size < max_block_size() && _mem_tracker->TryConsume(size)) {
         // For small allocations (less than a block size), just let the allocation through.
-        client->_tracker->consume_local(size, client->_query_tracker);
-        // client->_tracker->consume(size);
+        client->_tracker->ConsumeLocal(size, client->_query_tracker.get());
+        // client->_tracker->Consume(size);
         return true;
     }
 
@@ -336,10 +336,10 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) {
         return false;
     }
 
-    if (_mem_tracker->try_consume(size)) {
+    if (_mem_tracker->TryConsume(size)) {
         // There was still unallocated memory, don't need to recycle allocated blocks.
-        client->_tracker->consume_local(size, client->_query_tracker);
-        // client->_tracker->consume(size);
+        client->_tracker->ConsumeLocal(size, client->_query_tracker.get());
+        // client->_tracker->Consume(size);
         return true;
     }
 
@@ -384,7 +384,7 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) {
         }
         client->_num_tmp_reserved_buffers -= additional_tmp_reservations;
         _unfullfilled_reserved_buffers -= additional_tmp_reservations;
-        _mem_tracker->release(buffers_acquired * max_block_size());
+        _mem_tracker->Release(buffers_acquired * max_block_size());
         return false;
     }
 
@@ -392,19 +392,19 @@ bool BufferedBlockMgr2::consume_memory(Client* client, int64_t size) {
     _unfullfilled_reserved_buffers -= buffers_acquired;
 
     DCHECK_GE(buffers_acquired * max_block_size(), size);
-    _mem_tracker->release(buffers_acquired * max_block_size());
-    if (!_mem_tracker->try_consume(size)) {
+    _mem_tracker->Release(buffers_acquired * max_block_size());
+    if (!_mem_tracker->TryConsume(size)) {
         return false;
     }
-    client->_tracker->consume_local(size, client->_query_tracker);
-    // client->_tracker->consume(size);
+    client->_tracker->ConsumeLocal(size, client->_query_tracker.get());
+    // client->_tracker->Consume(size);
     DCHECK(validate()) << endl << debug_internal();
     return true;
 }
 
 void BufferedBlockMgr2::release_memory(Client* client, int64_t size) {
-    _mem_tracker->release(size);
-    client->_tracker->release_local(size, client->_query_tracker);
+    _mem_tracker->Release(size);
+    client->_tracker->ReleaseLocal(size, client->_query_tracker.get());
 }
 
 void BufferedBlockMgr2::cancel() {
@@ -457,7 +457,7 @@ Status BufferedBlockMgr2::get_new_block(
 
         if (len > 0 && len < _max_block_size) {
             DCHECK(unpin_block == NULL);
-            if (client->_tracker->try_consume(len)) {
+            if (client->_tracker->TryConsume(len)) {
                 // TODO: Have a cache of unused blocks of size 'len' (0, _max_block_size)
                 uint8_t* buffer = new uint8_t[len];
                 // Descriptors for non-I/O sized buffers are deleted when the block is deleted.
@@ -586,7 +586,7 @@ BufferedBlockMgr2::~BufferedBlockMgr2() {
 
     // Free memory resources.
     BOOST_FOREACH(BufferDescriptor* buffer, _all_io_buffers) {
-        _mem_tracker->release(buffer->len);
+        _mem_tracker->Release(buffer->len);
         delete[] buffer->buffer;
     }
     DCHECK_EQ(_mem_tracker->consumption(), 0);
@@ -606,7 +606,7 @@ int BufferedBlockMgr2::num_reserved_buffers_remaining(Client* client) const {
     return std::max(client->_num_reserved_buffers - client->_num_pinned_buffers, 0);
 }
 
-MemTracker* BufferedBlockMgr2::get_tracker(Client* client) const {
+std::shared_ptr BufferedBlockMgr2::get_tracker(Client* client) const {
     return client->_tracker;
 }
 
@@ -936,7 +936,7 @@ void BufferedBlockMgr2::delete_block(Block* block) {
         if (block->_buffer_desc->len != _max_block_size) {
             // Just delete the block for now.
             delete[] block->_buffer_desc->buffer;
-            block->_client->_tracker->release(block->_buffer_desc->len);
+            block->_client->_tracker->Release(block->_buffer_desc->len);
             delete block->_buffer_desc;
             block->_buffer_desc = NULL;
         } else {
@@ -1076,7 +1076,7 @@ Status BufferedBlockMgr2::find_buffer(
 
     // First, try to allocate a new buffer.
     if (_free_io_buffers.size() < _block_write_threshold &&
-            _mem_tracker->try_consume(_max_block_size)) {
+            _mem_tracker->TryConsume(_max_block_size)) {
         uint8_t* new_buffer = new uint8_t[_max_block_size];
         *buffer_desc = _obj_pool.add(new BufferDescriptor(new_buffer, _max_block_size));
         (*buffer_desc)->all_buffers_it = _all_io_buffers.insert(
@@ -1243,15 +1243,15 @@ string BufferedBlockMgr2::debug_internal() const {
         << "  Num available buffers: " << remaining_unreserved_buffers() << endl
         << "  Total pinned buffers: " << _total_pinned_buffers << endl
         << "  Unfullfilled reserved buffers: " << _unfullfilled_reserved_buffers << endl
-        << "  Remaining memory: " << _mem_tracker->spare_capacity()
-        << " (#blocks=" << (_mem_tracker->spare_capacity() / _max_block_size) << ")" << endl
+        << "  Remaining memory: " << _mem_tracker->SpareCapacity(MemLimit::HARD)
+        << " (#blocks=" << (_mem_tracker->SpareCapacity(MemLimit::HARD) / _max_block_size) << ")" << endl
         << "  Block write threshold: " << _block_write_threshold;
     return ss.str();
 }
 
 void BufferedBlockMgr2::init(
         DiskIoMgr* io_mgr, RuntimeProfile* parent_profile,
-        MemTracker* parent_tracker, int64_t mem_limit) {
+        const std::shared_ptr& parent_tracker, int64_t mem_limit) {
     unique_lock l(_lock);
     if (_initialized) {
         return;
@@ -1278,7 +1278,7 @@ void BufferedBlockMgr2::init(
     // Create a new mem_tracker and allocate buffers.
     // _mem_tracker.reset(new MemTracker(
     //             profile(), mem_limit, -1, "Block Manager", parent_tracker));
-    _mem_tracker.reset(new MemTracker(mem_limit, "Block Manager", parent_tracker));
+    _mem_tracker.reset(new MemTracker(mem_limit, "Block Manager2", parent_tracker));
 
     _initialized = true;
 }
diff --git a/be/src/runtime/buffered_block_mgr2.h b/be/src/runtime/buffered_block_mgr2.h
index 982fa07e7366f5..d8e5201240a45d 100644
--- a/be/src/runtime/buffered_block_mgr2.h
+++ b/be/src/runtime/buffered_block_mgr2.h
@@ -294,8 +294,7 @@ class BufferedBlockMgr2 {
     // - mem_limit: maximum memory that will be used by the block mgr.
     // - buffer_size: maximum size of each buffer.
     static Status create(
-            // RuntimeState* state, MemTracker* parent,
-            RuntimeState* state, MemTracker* parent,
+            RuntimeState* state, const std::shared_ptr& parent,
             RuntimeProfile* profile, TmpFileMgr* tmp_file_mgr,
             int64_t mem_limit, int64_t buffer_size,
             boost::shared_ptr* block_mgr);
@@ -312,7 +311,7 @@ class BufferedBlockMgr2 {
     // Buffers used by this client are reflected in tracker.
     // TODO: The fact that we allow oversubscription is problematic.
     // as the code expects the reservations to always be granted (currently not the case).
-    Status register_client(int num_reserved_buffers, MemTracker* tracker,
+    Status register_client(int num_reserved_buffers, const std::shared_ptr& tracker,
             RuntimeState* state, Client** client);
 
     // Clears all reservations for this client.
@@ -388,7 +387,7 @@ class BufferedBlockMgr2 {
 
     int num_pinned_buffers(Client* client) const;
     int num_reserved_buffers_remaining(Client* client) const;
-    MemTracker* get_tracker(Client* client) const;
+    std::shared_ptr get_tracker(Client* client) const;
     int64_t max_block_size() const { {
         return _max_block_size; }
     }
@@ -425,7 +424,7 @@ class BufferedBlockMgr2 {
 
     // Initializes the block mgr. Idempotent and thread-safe.
     void init(DiskIoMgr* io_mgr, RuntimeProfile* profile,
-            MemTracker* parent_tracker, int64_t mem_limit);
+              const std::shared_ptr& parent_tracker, int64_t mem_limit);
 
     // Initializes _tmp_files. This is initialized the first time we need to write to disk.
     // Must be called with _lock taken.
@@ -524,7 +523,7 @@ class BufferedBlockMgr2 {
     ObjectPool _obj_pool;
 
     // Track buffers allocated by the block manager.
-    boost::scoped_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // The temporary file manager used to allocate temporary file space.
     TmpFileMgr* _tmp_file_mgr;
diff --git a/be/src/runtime/buffered_tuple_stream2.cc b/be/src/runtime/buffered_tuple_stream2.cc
index 135eb1440a3c0e..24444b78e1a4c2 100644
--- a/be/src/runtime/buffered_tuple_stream2.cc
+++ b/be/src/runtime/buffered_tuple_stream2.cc
@@ -488,7 +488,7 @@ Status BufferedTupleStream2::get_rows(scoped_ptr* batch, bool* got_row
     }
     RETURN_IF_ERROR(prepare_for_read(false));
     batch->reset(
-            new RowBatch(_desc, num_rows(), _block_mgr->get_tracker(_block_mgr_client)));
+            new RowBatch(_desc, num_rows(), _block_mgr->get_tracker(_block_mgr_client).get()));
     bool eos = false;
     // Loop until get_next fills the entire batch. Each call can stop at block
     // boundaries. We generally want it to stop, so that blocks can be freed
diff --git a/be/src/runtime/buffered_tuple_stream3.cc b/be/src/runtime/buffered_tuple_stream3.cc
index add66234fcb9c4..bddbbc48950dcd 100644
--- a/be/src/runtime/buffered_tuple_stream3.cc
+++ b/be/src/runtime/buffered_tuple_stream3.cc
@@ -695,7 +695,7 @@ void BufferedTupleStream3::UnpinStream(UnpinMode mode) {
 }
 */
 Status BufferedTupleStream3::GetRows(
-    MemTracker* tracker, scoped_ptr* batch, bool* got_rows) {
+    const std::shared_ptr& tracker, scoped_ptr* batch, bool* got_rows) {
   if (num_rows() > numeric_limits::max()) {
     // RowBatch::num_rows_ is a 32-bit int, avoid an overflow.
     return Status::InternalError(Substitute("Trying to read $0 rows into in-memory batch failed. Limit "
@@ -710,7 +710,7 @@ Status BufferedTupleStream3::GetRows(
   // TODO chenhao 
   // capacity in RowBatch use int, but _num_rows is int64_t
   // it may be precision loss
-  batch->reset(new RowBatch(*desc_, num_rows(), tracker));
+  batch->reset(new RowBatch(*desc_, num_rows(), tracker.get()));
   bool eos = false;
   // Loop until GetNext fills the entire batch. Each call can stop at page
   // boundaries. We generally want it to stop, so that pages can be freed
diff --git a/be/src/runtime/buffered_tuple_stream3.h b/be/src/runtime/buffered_tuple_stream3.h
index ebc26d3a8c81d8..d93c8004a4f957 100644
--- a/be/src/runtime/buffered_tuple_stream3.h
+++ b/be/src/runtime/buffered_tuple_stream3.h
@@ -336,7 +336,7 @@ class BufferedTupleStream3 {
   /// process. If the current unused reservation is not sufficient to pin the stream in
   /// memory, this will try to increase the reservation. If that fails, 'got_rows' is set
   /// to false.
-  Status GetRows(MemTracker* tracker, boost::scoped_ptr* batch,
+  Status GetRows(const std::shared_ptr& tracker, boost::scoped_ptr* batch,
       bool* got_rows) WARN_UNUSED_RESULT;
 
   /// Must be called once at the end to cleanup all resources. If 'batch' is non-NULL,
diff --git a/be/src/runtime/bufferpool/buffer_pool.cc b/be/src/runtime/bufferpool/buffer_pool.cc
index e66bcc232f70c7..da640f6ed6e377 100644
--- a/be/src/runtime/bufferpool/buffer_pool.cc
+++ b/be/src/runtime/bufferpool/buffer_pool.cc
@@ -118,8 +118,8 @@ BufferPool::BufferPool(int64_t min_buffer_len, int64_t buffer_bytes_limit,
 
 BufferPool::~BufferPool() {}
 
-Status BufferPool::RegisterClient(const string& name, //TmpFileMgr::FileGroup* file_group,
-    ReservationTracker* parent_reservation, MemTracker* mem_tracker,
+Status BufferPool::RegisterClient(const string& name,
+    ReservationTracker* parent_reservation, const std::shared_ptr& mem_tracker,
     int64_t reservation_limit, RuntimeProfile* profile, ClientHandle* client) {
   DCHECK(!client->is_registered());
   DCHECK(parent_reservation != NULL);
@@ -375,7 +375,7 @@ void BufferPool::SubReservation::Close() {
 }
 
 BufferPool::Client::Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group,
-    const string& name, ReservationTracker* parent_reservation, MemTracker* mem_tracker,
+    const string& name, ReservationTracker* parent_reservation, const std::shared_ptr& mem_tracker,
     int64_t reservation_limit, RuntimeProfile* profile)
   : pool_(pool),
     //file_group_(file_group),
@@ -386,7 +386,7 @@ BufferPool::Client::Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group
   // Set up a child profile with buffer pool info.
   RuntimeProfile* child_profile = profile->create_child("Buffer pool", true, true);
   reservation_.InitChildTracker(
-      child_profile, parent_reservation, mem_tracker, reservation_limit);
+      child_profile, parent_reservation, mem_tracker.get(), reservation_limit);
   counters_.alloc_time = ADD_TIMER(child_profile, "AllocTime");
   counters_.cumulative_allocations =
       ADD_COUNTER(child_profile, "CumulativeAllocations", TUnit::UNIT);
diff --git a/be/src/runtime/bufferpool/buffer_pool.h b/be/src/runtime/bufferpool/buffer_pool.h
index 4309c94869f374..56892c273eeadf 100644
--- a/be/src/runtime/bufferpool/buffer_pool.h
+++ b/be/src/runtime/bufferpool/buffer_pool.h
@@ -176,8 +176,8 @@ class BufferPool : public CacheLineAligned {
   /// The client's reservation is created as a child of 'parent_reservation' with limit
   /// 'reservation_limit' and associated with MemTracker 'mem_tracker'. The initial
   /// reservation is 0 bytes.
-  Status RegisterClient(const std::string& name, //TmpFileMgr::FileGroup* file_group,
-      ReservationTracker* parent_reservation, MemTracker* mem_tracker,
+  Status RegisterClient(const std::string& name,
+      ReservationTracker* parent_reservation, const std::shared_ptr& mem_tracker,
       int64_t reservation_limit, RuntimeProfile* profile,
       ClientHandle* client) WARN_UNUSED_RESULT;
 
diff --git a/be/src/runtime/bufferpool/buffer_pool_internal.h b/be/src/runtime/bufferpool/buffer_pool_internal.h
index be764c2af8a066..1ad3b02e01f5d4 100644
--- a/be/src/runtime/bufferpool/buffer_pool_internal.h
+++ b/be/src/runtime/bufferpool/buffer_pool_internal.h
@@ -138,7 +138,7 @@ class BufferPool::Client {
  public:
   Client(BufferPool* pool, //TmpFileMgr::FileGroup* file_group, 
      const std::string& name,
-      ReservationTracker* parent_reservation, MemTracker* mem_tracker,
+      ReservationTracker* parent_reservation, const std::shared_ptr& mem_tracker,
       int64_t reservation_limit, RuntimeProfile* profile);
 
   ~Client() {
diff --git a/be/src/runtime/bufferpool/reservation_tracker.cc b/be/src/runtime/bufferpool/reservation_tracker.cc
index 7fcc2bdfd06e34..41620f31574a46 100644
--- a/be/src/runtime/bufferpool/reservation_tracker.cc
+++ b/be/src/runtime/bufferpool/reservation_tracker.cc
@@ -75,10 +75,10 @@ void ReservationTracker::InitChildTracker(RuntimeProfile* profile,
     MemTracker* parent_mem_tracker = GetParentMemTracker();
     if (parent_mem_tracker != nullptr) {
       // Make sure the parent links of the MemTrackers correspond to our parent links.
-      DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent());
+      DCHECK_EQ(parent_mem_tracker, mem_tracker_->parent().get());
       // Make sure we don't have a lower limit than the ancestor, since we don't enforce
       // limits at lower links.
-      DCHECK_EQ(mem_tracker_->lowest_limit(), parent_mem_tracker->lowest_limit());
+      DCHECK_EQ(mem_tracker_->GetLowestLimit(MemLimit::HARD), parent_mem_tracker->GetLowestLimit(MemLimit::HARD));
     } else {
       // Make sure we didn't leave a gap in the links. E.g. this tracker's grandparent
       // shouldn't have a MemTracker.
@@ -114,7 +114,7 @@ void ReservationTracker::InitCounters(
     counters_.reservation_limit = ADD_COUNTER(profile, "ReservationLimit", TUnit::BYTES);
     COUNTER_SET(counters_.reservation_limit, reservation_limit);
   }
-  if (mem_tracker_ != nullptr) mem_tracker_->enable_reservation_reporting(counters_);
+  if (mem_tracker_ != nullptr) mem_tracker_->EnableReservationReporting(counters_);
 }
 
 void ReservationTracker::Close() {
@@ -191,12 +191,12 @@ bool ReservationTracker::TryConsumeFromMemTracker(int64_t reservation_increase)
   if (GetParentMemTracker() == nullptr) {
     // At the topmost link, which may be a MemTracker with a limit, we need to use
     // TryConsume() to check the limit.
-    return mem_tracker_->try_consume(reservation_increase);
+    return mem_tracker_->TryConsume(reservation_increase);
   } else {
     // For lower links, there shouldn't be a limit to enforce, so we just need to
     // update the consumption of the linked MemTracker since the reservation is
     // already reflected in its parent.
-    mem_tracker_->consume_local(reservation_increase, GetParentMemTracker());
+    mem_tracker_->ConsumeLocal(reservation_increase, GetParentMemTracker());
     return true;
   }
 }
@@ -205,9 +205,9 @@ void ReservationTracker::ReleaseToMemTracker(int64_t reservation_decrease) {
   DCHECK_GE(reservation_decrease, 0);
   if (mem_tracker_ == nullptr) return;
   if (GetParentMemTracker() == nullptr) {
-    mem_tracker_->release(reservation_decrease);
+    mem_tracker_->Release(reservation_decrease);
   } else {
-    mem_tracker_->release_local(reservation_decrease, GetParentMemTracker());
+    mem_tracker_->ReleaseLocal(reservation_decrease, GetParentMemTracker());
   }
 }
 
diff --git a/be/src/runtime/data_spliter.cpp b/be/src/runtime/data_spliter.cpp
index d06e7cbbdf3073..778e49e05c4f8a 100644
--- a/be/src/runtime/data_spliter.cpp
+++ b/be/src/runtime/data_spliter.cpp
@@ -89,14 +89,13 @@ Status DataSpliter::prepare(RuntimeState* state) {
     std::stringstream title;
     title << "DataSplitSink (dst_fragment_instance_id=" << print_id(state->fragment_instance_id()) << ")";
     RETURN_IF_ERROR(DataSink::prepare(state));
-    RETURN_IF_ERROR(Expr::prepare(
-            _partition_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
+    RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
     for (auto& iter : _rollup_map) {
-        RETURN_IF_ERROR(iter.second->prepare(state, _row_desc, _expr_mem_tracker.get()));
+        RETURN_IF_ERROR(iter.second->prepare(state, _row_desc, _expr_mem_tracker));
     }
     _profile = state->obj_pool()->add(new RuntimeProfile(title.str()));
     for (auto iter : _partition_infos) {
-        RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker.get()));
+        RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker));
     }
     return Status::OK();
 }
@@ -327,7 +326,7 @@ Status DataSpliter::close(RuntimeState* state, Status close_status) {
         }
     }
   
-    _expr_mem_tracker->close();
+    _expr_mem_tracker.reset();
     _closed = true;
     if (is_ok) {
         return Status::OK();
diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc
index 32618901ea2b80..f5df75242e4895 100644
--- a/be/src/runtime/data_stream_recvr.cc
+++ b/be/src/runtime/data_stream_recvr.cc
@@ -242,7 +242,7 @@ void DataStreamRecvr::SenderQueue::add_batch(
         // Note: if this function makes a row batch, the batch *must* be added
         // to _batch_queue. It is not valid to create the row batch and destroy
         // it in this thread.
-        batch = new RowBatch(_recvr->row_desc(), pb_batch, _recvr->mem_tracker());
+        batch = new RowBatch(_recvr->row_desc(), pb_batch, _recvr->mem_tracker().get());
     }
    
     VLOG_ROW << "added #rows=" << batch->num_rows()
@@ -352,7 +352,7 @@ void DataStreamRecvr::transfer_all_resources(RowBatch* transfer_batch) {
 }
 
 DataStreamRecvr::DataStreamRecvr(
-        DataStreamMgr* stream_mgr, MemTracker* parent_tracker,
+        DataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker,
         const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id,
         PlanNodeId dest_node_id, int num_senders, bool is_merging, 
         int total_buffer_limit, RuntimeProfile* profile, 
@@ -427,8 +427,7 @@ void DataStreamRecvr::close() {
     _mgr->deregister_recvr(fragment_instance_id(), dest_node_id());
     _mgr = NULL;
     _merger.reset();
-    _mem_tracker->close();
-//    _mem_tracker->unregister_from_parent();
+    // TODO: Maybe shared tracker doesn't need to be reset manually
     _mem_tracker.reset();
 }
 
diff --git a/be/src/runtime/data_stream_recvr.h b/be/src/runtime/data_stream_recvr.h
index 104ee769c3bc17..5fcbde7704469f 100644
--- a/be/src/runtime/data_stream_recvr.h
+++ b/be/src/runtime/data_stream_recvr.h
@@ -98,7 +98,7 @@ class DataStreamRecvr {
     const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
     PlanNodeId dest_node_id() const { return _dest_node_id; }
     const RowDescriptor& row_desc() const { return _row_desc; }
-    MemTracker* mem_tracker() const { return _mem_tracker.get(); }
+    std::shared_ptr mem_tracker() const { return _mem_tracker; }
 
     void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) {
         _sub_plan_query_statistics_recvr->insert(statistics, sender_id);
@@ -108,7 +108,7 @@ class DataStreamRecvr {
     friend class DataStreamMgr;
     class SenderQueue;
 
-    DataStreamRecvr(DataStreamMgr* stream_mgr, MemTracker* parent_tracker,
+    DataStreamRecvr(DataStreamMgr* stream_mgr, const std::shared_ptr& parent_tracker,
             const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id,
             PlanNodeId dest_node_id, int num_senders, bool is_merging, 
             int total_buffer_limit, RuntimeProfile* profile, 
@@ -155,7 +155,7 @@ class DataStreamRecvr {
     AtomicInt _num_buffered_bytes;
 
     // Memtracker for batches in the sender queue(s).
-    boost::scoped_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // One or more queues of row batches received from senders. If _is_merging is true,
     // there is one SenderQueue for each sender. Otherwise, row batches from all senders
diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp
index a642e00948b78e..5d61dc534c01df 100644
--- a/be/src/runtime/data_stream_sender.cpp
+++ b/be/src/runtime/data_stream_sender.cpp
@@ -386,19 +386,16 @@ Status DataStreamSender::prepare(RuntimeState* state) {
     _mem_tracker.reset(
             new MemTracker(_profile, -1, "DataStreamSender", state->instance_mem_tracker()));
 
-    if (_part_type == TPartitionType::UNPARTITIONED 
-            || _part_type == TPartitionType::RANDOM) {
+    if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) {
         // Randomize the order we open/transmit to channels to avoid thundering herd problems.
         srand(reinterpret_cast(this));
         random_shuffle(_channels.begin(), _channels.end());
     } else if (_part_type == TPartitionType::HASH_PARTITIONED) {
-        RETURN_IF_ERROR(Expr::prepare(
-                _partition_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
+        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
     } else {
-        RETURN_IF_ERROR(Expr::prepare(
-                _partition_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
+        RETURN_IF_ERROR(Expr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
         for (auto iter : _partition_infos) {
-            RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker.get()));
+            RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker));
         }
     }
 
diff --git a/be/src/runtime/data_stream_sender.h b/be/src/runtime/data_stream_sender.h
index 38ce7b815aa035..c39074bb60d9a0 100644
--- a/be/src/runtime/data_stream_sender.h
+++ b/be/src/runtime/data_stream_sender.h
@@ -151,7 +151,7 @@ class DataStreamSender : public DataSink {
     RuntimeProfile::Counter* _uncompressed_bytes_counter;
     RuntimeProfile::Counter* _ignore_rows;
 
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // Throughput per total time spent in sender
     RuntimeProfile::Counter* _overall_throughput;
diff --git a/be/src/runtime/disk_io_mgr.cc b/be/src/runtime/disk_io_mgr.cc
index 529d5a042ce871..2372048cc1aae1 100644
--- a/be/src/runtime/disk_io_mgr.cc
+++ b/be/src/runtime/disk_io_mgr.cc
@@ -215,7 +215,6 @@ void DiskIoMgr::BufferDescriptor::reset(RequestContext* reader,
     _len = 0;
     _eosr = false;
     _status = Status::OK();
-    _mem_tracker = NULL;
 }
 
 void DiskIoMgr::BufferDescriptor::return_buffer() {
@@ -223,21 +222,21 @@ void DiskIoMgr::BufferDescriptor::return_buffer() {
     _io_mgr->return_buffer(this);
 }
 
-// void DiskIoMgr::BufferDescriptor::SetMemTracker(MemTracker* tracker) {
-void DiskIoMgr::BufferDescriptor::set_mem_tracker(MemTracker* tracker) {
+void DiskIoMgr::BufferDescriptor::set_mem_tracker(std::shared_ptr tracker) {
     // Cached buffers don't count towards mem usage.
     if (_scan_range->_cached_buffer != NULL) {
         return;
     }
-    if (_mem_tracker == tracker) {
+    if (_mem_tracker.get() == tracker.get()) {
         return;
     }
-    if (_mem_tracker != NULL) {
-        _mem_tracker->release(_buffer_len);
+    // TODO(yingchun): use TransferTo?
+    if (_mem_tracker != nullptr) {
+        _mem_tracker->Release(_buffer_len);
     }
-    _mem_tracker = tracker;
-    if (_mem_tracker != NULL) {
-        _mem_tracker->consume(_buffer_len);
+    _mem_tracker = std::move(tracker);
+    if (_mem_tracker != nullptr) {
+        _mem_tracker->Consume(_buffer_len);
     }
 }
 
@@ -360,9 +359,8 @@ DiskIoMgr::~DiskIoMgr() {
      */
 }
 
-// Status DiskIoMgr::init(MemTracker* process_mem_tracker) {
-Status DiskIoMgr::init(MemTracker* process_mem_tracker) {
-    DCHECK(process_mem_tracker != NULL);
+Status DiskIoMgr::init(const std::shared_ptr& process_mem_tracker) {
+    DCHECK(process_mem_tracker != nullptr);
     _process_mem_tracker = process_mem_tracker;
     // If we hit the process limit, see if we can reclaim some memory by removing
     // previously allocated (but unused) io buffers.
@@ -406,11 +404,10 @@ Status DiskIoMgr::init(MemTracker* process_mem_tracker) {
     return Status::OK();
 }
 
-// Status DiskIoMgr::register_context(RequestContext** request_context, MemTracker* mem_tracker) {
-Status DiskIoMgr::register_context(RequestContext** request_context, MemTracker* mem_tracker) {
+Status DiskIoMgr::register_context(RequestContext** request_context, std::shared_ptr mem_tracker) {
     DCHECK(_request_context_cache.get() != NULL) << "Must call init() first.";
     *request_context = _request_context_cache->get_new_context();
-    (*request_context)->reset(mem_tracker);
+    (*request_context)->reset(std::move(mem_tracker));
     return Status::OK();
 }
 
@@ -720,7 +717,7 @@ char* DiskIoMgr::get_free_buffer(int64_t* buffer_size) {
         ++_num_allocated_buffers;
         // Update the process mem usage.  This is checked the next time we start
         // a read for the next reader (DiskIoMgr::GetNextScanRange)
-        _process_mem_tracker->consume(*buffer_size);
+        _process_mem_tracker->Consume(*buffer_size);
         buffer = new char[*buffer_size];
     } else {
         buffer = _free_buffers[idx].front();
@@ -738,7 +735,7 @@ void DiskIoMgr::gc_io_buffers() {
         for (list::iterator iter = _free_buffers[idx].begin();
                 iter != _free_buffers[idx].end(); ++iter) {
             int64_t buffer_size = (1 << idx) * _min_buffer_size;
-            _process_mem_tracker->release(buffer_size);
+            _process_mem_tracker->Release(buffer_size);
             --_num_allocated_buffers;
             delete[] *iter;
 
@@ -751,7 +748,7 @@ void DiskIoMgr::gc_io_buffers() {
 
 void DiskIoMgr::return_free_buffer(BufferDescriptor* desc) {
     return_free_buffer(desc->_buffer, desc->_buffer_len);
-    desc->set_mem_tracker(NULL);
+    desc->set_mem_tracker(nullptr);
     desc->_buffer = NULL;
 }
 
@@ -765,7 +762,7 @@ void DiskIoMgr::return_free_buffer(char* buffer, int64_t buffer_size) {
     if (!config::disable_mem_pools && _free_buffers[idx].size() < config::max_free_io_buffers) {
         _free_buffers[idx].push_back(buffer);
     } else {
-        _process_mem_tracker->release(buffer_size);
+        _process_mem_tracker->Release(buffer_size);
         --_num_allocated_buffers;
         delete[] buffer;
     }
@@ -823,8 +820,8 @@ bool DiskIoMgr::get_next_request_range(DiskQueue* disk_queue, RequestRange** ran
         // TODO: we can do a lot better here.  The reader can likely make progress
         // with fewer io buffers.
         bool process_limit_exceeded = _process_mem_tracker->limit_exceeded();
-        bool reader_limit_exceeded = (*request_context)->_mem_tracker != NULL
-                ? (*request_context)->_mem_tracker->any_limit_exceeded() : false;
+        bool reader_limit_exceeded = (*request_context)->_mem_tracker != nullptr
+                ? (*request_context)->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) : false;
         // bool reader_limit_exceeded = (*request_context)->_mem_tracker != NULL
         //     ? (*request_context)->_mem_tracker->limit_exceeded() : false;
 
@@ -1020,12 +1017,12 @@ void DiskIoMgr::read_range(DiskQueue* disk_queue, RequestContext* reader, ScanRa
     DCHECK_GT(bytes_remaining, 0);
     int64_t buffer_size = std::min(bytes_remaining, static_cast(_max_buffer_size));
     bool enough_memory = true;
-    if (reader->_mem_tracker != NULL) {
-        enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY;
+    if (reader->_mem_tracker != nullptr) {
+        enough_memory = reader->_mem_tracker->SpareCapacity(MemLimit::HARD) > LOW_MEMORY;
         if (!enough_memory) {
             // Low memory, GC and try again.
             gc_io_buffers();
-            enough_memory = reader->_mem_tracker->spare_capacity() > LOW_MEMORY;
+            enough_memory = reader->_mem_tracker->SpareCapacity(MemLimit::HARD) > LOW_MEMORY;
         }
     }
 
diff --git a/be/src/runtime/disk_io_mgr.h b/be/src/runtime/disk_io_mgr.h
index 03229dfd2c8ad8..9b66e1875154e3 100644
--- a/be/src/runtime/disk_io_mgr.h
+++ b/be/src/runtime/disk_io_mgr.h
@@ -244,10 +244,9 @@ class DiskIoMgr {
         // Returns the offset within the scan range that this buffer starts at
         int64_t scan_range_offset() const { return _scan_range_offset; }
 
-        // Updates this buffer buffer to be owned by the new tracker. Consumption is
+        // Updates this buffer to be owned by the new tracker. Consumption is
         // release from the current tracker and added to the new one.
-        // void SetMemTracker(MemTracker* tracker);
-        void set_mem_tracker(MemTracker* mem_tracker);
+        void set_mem_tracker(std::shared_ptr tracker);
 
         // Returns the buffer to the IoMgr. This must be called for every buffer
         // returned by get_next()/read() that did not return an error. This is non-blocking.
@@ -268,8 +267,7 @@ class DiskIoMgr {
         RequestContext* _reader;
 
         // The current tracker this buffer is associated with.
-        // MemTracker* _mem_tracker;
-        MemTracker* _mem_tracker;
+        std::shared_ptr _mem_tracker;
 
         // Scan range that this buffer is for.
         ScanRange* _scan_range;
@@ -548,8 +546,7 @@ class DiskIoMgr {
     ~DiskIoMgr();
 
     // Initialize the IoMgr. Must be called once before any of the other APIs.
-    // Status init(MemTracker* process_mem_tracker);
-    Status init(MemTracker* process_mem_tracker);
+    Status init(const std::shared_ptr& process_mem_tracker);
 
     // Allocates tracking structure for a request context.
     // Register a new request context which is returned in *request_context.
@@ -559,10 +556,8 @@ class DiskIoMgr {
     //    used for this reader will be tracked by this. If the limit is exceeded
     //    the reader will be cancelled and MEM_LIMIT_EXCEEDED will be returned via
     //    get_next().
-    // Status register_context(RequestContext** request_context,
-    //         MemTracker* reader_mem_tracker = NULL);
     Status register_context(RequestContext** request_context,
-            MemTracker* reader_mem_tracker = NULL);
+                            std::shared_ptr reader_mem_tracker = std::shared_ptr());
 
     // Unregisters context from the disk IoMgr. This must be called for every
     // register_context() regardless of cancellation and must be called in the
@@ -704,8 +699,7 @@ class DiskIoMgr {
     ObjectPool _pool;
 
     // Process memory tracker; needed to account for io buffers.
-    // MemTracker* _process_mem_tracker;
-    MemTracker* _process_mem_tracker;
+    std::shared_ptr _process_mem_tracker;
 
     // Number of worker(read) threads per disk. Also the max depth of queued
     // work to the disk.
diff --git a/be/src/runtime/disk_io_mgr_internal.h b/be/src/runtime/disk_io_mgr_internal.h
index ad212e7fe1563b..a3b229deadfefb 100644
--- a/be/src/runtime/disk_io_mgr_internal.h
+++ b/be/src/runtime/disk_io_mgr_internal.h
@@ -138,8 +138,7 @@ class DiskIoMgr::RequestContext {
     RequestContext(DiskIoMgr* parent, int num_disks);
 
     // Resets this object.
-    // void reset(MemTracker* tracker);
-    void reset(MemTracker* tracker);
+    void reset(std::shared_ptr tracker);
 
     // Decrements the number of active disks for this reader.  If the disk count
     // goes to 0, the disk complete condition variable is signaled.
@@ -196,8 +195,7 @@ class DiskIoMgr::RequestContext {
     DiskIoMgr* _parent;
 
     // Memory used for this reader.  This is unowned by this object.
-    // MemTracker* _mem_tracker;
-    MemTracker* _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // Total bytes read for this reader
     RuntimeProfile::Counter* _bytes_read_counter;
diff --git a/be/src/runtime/disk_io_mgr_reader_context.cc b/be/src/runtime/disk_io_mgr_reader_context.cc
index 7d342295204e9f..6e349950a5082b 100644
--- a/be/src/runtime/disk_io_mgr_reader_context.cc
+++ b/be/src/runtime/disk_io_mgr_reader_context.cc
@@ -152,8 +152,7 @@ DiskIoMgr::RequestContext::RequestContext(DiskIoMgr* parent, int num_disks) :
 }
 
 // Resets this object.
-// void DiskIoMgr::RequestContext::reset(MemTracker* tracker) {
-void DiskIoMgr::RequestContext::reset(MemTracker* tracker) {
+void DiskIoMgr::RequestContext::reset(std::shared_ptr tracker) {
     DCHECK_EQ(_state, Inactive);
     _status = Status::OK();
 
@@ -163,7 +162,7 @@ void DiskIoMgr::RequestContext::reset(MemTracker* tracker) {
     _disks_accessed_bitmap = NULL;
 
     _state = Active;
-    _mem_tracker = tracker;
+    _mem_tracker = std::move(tracker);
 
     _num_unstarted_scan_ranges = 0;
     _num_disks_with_ranges = 0;
diff --git a/be/src/runtime/dpp_sink.cpp b/be/src/runtime/dpp_sink.cpp
index b4aa6c63d67c69..74953e56c85c4e 100644
--- a/be/src/runtime/dpp_sink.cpp
+++ b/be/src/runtime/dpp_sink.cpp
@@ -594,7 +594,7 @@ Status Translator::prepare(RuntimeState* state) {
 
     // 4. new batch for writer
     _batch_to_write.reset(
-            new RowBatch(_row_desc, state->batch_size(), state->instance_mem_tracker()));
+            new RowBatch(_row_desc, state->batch_size(), state->instance_mem_tracker().get()));
     if (_batch_to_write.get() == nullptr) {
         return Status::InternalError("No memory to allocate RowBatch.");
     }
@@ -828,7 +828,7 @@ Status Translator::process(RuntimeState* state) {
         SCOPED_TIMER(_agg_timer);
         bool eos = false;
         while (!eos) {
-            RowBatch batch(_row_desc, state->batch_size(), state->instance_mem_tracker());
+            RowBatch batch(_row_desc, state->batch_size(), state->instance_mem_tracker().get());
 
             RETURN_IF_ERROR(_sorter->get_next(&batch, &eos));
 
diff --git a/be/src/runtime/dpp_sink_internal.cpp b/be/src/runtime/dpp_sink_internal.cpp
index 2f54b5bea6b425..d2d5b19ed38e7f 100644
--- a/be/src/runtime/dpp_sink_internal.cpp
+++ b/be/src/runtime/dpp_sink_internal.cpp
@@ -70,11 +70,9 @@ Status RollupSchema::from_thrift(
 }
 
 Status RollupSchema::prepare(
-        RuntimeState* state, const RowDescriptor& row_desc, MemTracker* mem_tracker) {
-    RETURN_IF_ERROR(Expr::prepare(
-            _key_ctxs, state, row_desc, mem_tracker));
-    RETURN_IF_ERROR(Expr::prepare(
-            _value_ctxs, state, row_desc, mem_tracker));
+        RuntimeState* state, const RowDescriptor& row_desc, const std::shared_ptr& mem_tracker) {
+    RETURN_IF_ERROR(Expr::prepare(_key_ctxs, state, row_desc, mem_tracker));
+    RETURN_IF_ERROR(Expr::prepare(_value_ctxs, state, row_desc, mem_tracker));
     return Status::OK();
 }
 
@@ -224,11 +222,10 @@ Status PartitionInfo::from_thrift(
     return Status::OK();
 }
 
-Status PartitionInfo::prepare(
-        RuntimeState* state, const RowDescriptor& row_desc, MemTracker* mem_tracker) {
+Status PartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                              const std::shared_ptr& mem_tracker) {
     if (_distributed_expr_ctxs.size() > 0) {
-        RETURN_IF_ERROR(Expr::prepare(
-                _distributed_expr_ctxs, state, row_desc, mem_tracker));
+        RETURN_IF_ERROR(Expr::prepare(_distributed_expr_ctxs, state, row_desc, mem_tracker));
     }
     return Status::OK();
 }
diff --git a/be/src/runtime/dpp_sink_internal.h b/be/src/runtime/dpp_sink_internal.h
index b11833e38d85fa..af093b8b2500b0 100644
--- a/be/src/runtime/dpp_sink_internal.h
+++ b/be/src/runtime/dpp_sink_internal.h
@@ -49,7 +49,7 @@ class RollupSchema {
                               const TRollupSchema& t_schema,
                               RollupSchema* schema);
 
-    Status prepare(RuntimeState* state, const RowDescriptor& row_desc, MemTracker* mem_tracker);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc, const std::shared_ptr& mem_tracker);
 
     Status open(RuntimeState* state);
 
@@ -259,7 +259,7 @@ class PartitionInfo {
                               const TRangePartition& t_partition,
                               PartitionInfo* partition);
 
-    Status prepare(RuntimeState* state, const RowDescriptor& row_desc, MemTracker*);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc, const std::shared_ptr& mem_tracker);
 
     Status open(RuntimeState* state);
 
diff --git a/be/src/runtime/exec_env.cpp b/be/src/runtime/exec_env.cpp
index 8eea2c9afc6210..d22150da47b472 100644
--- a/be/src/runtime/exec_env.cpp
+++ b/be/src/runtime/exec_env.cpp
@@ -21,14 +21,11 @@
 
 namespace doris {
 
-ExecEnv::ExecEnv() {
-}
+ExecEnv::ExecEnv() {}
 
-ExecEnv::~ExecEnv() {
-}
+ExecEnv::~ExecEnv() {}
 
 const std::string& ExecEnv::token() const {
     return _master_info->token;
 }
-
-}
+} // namespace doris
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index 245026113e0210..f13dc3be2643e2 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -59,8 +59,9 @@ class PluginMgr;
 class BackendServiceClient;
 class FrontendServiceClient;
 class TPaloBrokerServiceClient;
-class TExtDataSourceServiceClient; 
-template class ClientCache;
+class TExtDataSourceServiceClient;
+template 
+class ClientCache;
 class HeartbeatFlags;
 
 // Execution environment for queries/plan fragments.
@@ -89,21 +90,25 @@ class ExecEnv {
     ~ExecEnv();
 
     const std::string& token() const;
-    ExternalScanContextMgr* external_scan_context_mgr() {return _external_scan_context_mgr;}
+    ExternalScanContextMgr* external_scan_context_mgr() { return _external_scan_context_mgr; }
     MetricRegistry* metrics() const { return _metrics; }
     DataStreamMgr* stream_mgr() { return _stream_mgr; }
     ResultBufferMgr* result_mgr() { return _result_mgr; }
-    ResultQueueMgr* result_queue_mgr() {return _result_queue_mgr;}
+    ResultQueueMgr* result_queue_mgr() { return _result_queue_mgr; }
     ClientCache* client_cache() { return _backend_client_cache; }
     ClientCache* frontend_client_cache() { return _frontend_client_cache; }
     ClientCache* broker_client_cache() { return _broker_client_cache; }
-    ClientCache* extdatasource_client_cache() { return _extdatasource_client_cache; }
+    ClientCache* extdatasource_client_cache() {
+        return _extdatasource_client_cache;
+    }
 
     // using template to simplify client cache management
-    template
-    ClientCache* get_client_cache() { return nullptr; }
+    template 
+    ClientCache* get_client_cache() {
+        return nullptr;
+    }
 
-    MemTracker* process_mem_tracker() { return _mem_tracker; }
+    std::shared_ptr process_mem_tracker() { return _mem_tracker; }
     PoolMemTrackerRegistry* pool_mem_trackers() { return _pool_mem_trackers; }
     ThreadResourceMgr* thread_mgr() { return _thread_mgr; }
     PriorityThreadPool* thread_pool() { return _thread_pool; }
@@ -134,15 +139,14 @@ class ExecEnv {
     HeartbeatFlags* heartbeat_flags() { return _heartbeat_flags; }
 
     PluginMgr* plugin_mgr() { return _plugin_mgr; }
-    
+
 private:
     Status _init(const std::vector& store_paths);
     void _destory();
 
     Status _init_mem_tracker();
     /// Initialise 'buffer_pool_' and 'buffer_reservation_' with given capacity.
-    void _init_buffer_pool(int64_t min_page_len,
-                           int64_t capacity, int64_t clean_pages_limit);
+    void _init_buffer_pool(int64_t min_page_len, int64_t capacity, int64_t clean_pages_limit);
 
 private:
     std::vector _store_paths;
@@ -156,7 +160,7 @@ class ExecEnv {
     ClientCache* _frontend_client_cache = nullptr;
     ClientCache* _broker_client_cache = nullptr;
     ClientCache* _extdatasource_client_cache = nullptr;
-    MemTracker* _mem_tracker = nullptr;
+    std::shared_ptr _mem_tracker;
     PoolMemTrackerRegistry* _pool_mem_trackers = nullptr;
     ThreadResourceMgr* _thread_mgr = nullptr;
     PriorityThreadPool* _thread_pool = nullptr;
@@ -184,20 +188,29 @@ class ExecEnv {
     RoutineLoadTaskExecutor* _routine_load_task_executor = nullptr;
     SmallFileMgr* _small_file_mgr = nullptr;
     HeartbeatFlags* _heartbeat_flags = nullptr;
-    
+
     PluginMgr* _plugin_mgr = nullptr;
 };
 
-
 template <>
-inline ClientCache* ExecEnv::get_client_cache() { return _backend_client_cache; }
+inline ClientCache* ExecEnv::get_client_cache() {
+    return _backend_client_cache;
+}
 template <>
-inline ClientCache* ExecEnv::get_client_cache() { return _frontend_client_cache; }
+inline ClientCache* ExecEnv::get_client_cache() {
+    return _frontend_client_cache;
+}
 template <>
-inline ClientCache* ExecEnv::get_client_cache() { return _broker_client_cache; }
+inline ClientCache*
+ExecEnv::get_client_cache() {
+    return _broker_client_cache;
+}
 template <>
-inline ClientCache* ExecEnv::get_client_cache() { return _extdatasource_client_cache; }
-
+inline ClientCache*
+ExecEnv::get_client_cache() {
+    return _extdatasource_client_cache;
 }
 
+} // namespace doris
+
 #endif
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index ea0b830d47b361..b7fb080e53a5c7 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -80,7 +80,6 @@ Status ExecEnv::_init(const std::vector& store_paths) {
     _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host);
     _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host);
     _extdatasource_client_cache = new ExtDataSourceServiceClientCache(config::max_client_cache_size_per_host);
-    _mem_tracker = nullptr;
     _pool_mem_trackers = new PoolMemTrackerRegistry();
     _thread_mgr = new ThreadResourceMgr();
     _thread_pool = new PriorityThreadPool(
@@ -178,7 +177,7 @@ Status ExecEnv::_init_mem_tracker() {
         return Status::InternalError(ss.str());
     }
 
-    _mem_tracker = new MemTracker(bytes_limit);
+    _mem_tracker.reset(new MemTracker(bytes_limit, "ExecEnv root", MemTracker::GetRootTracker()));
 
     LOG(INFO) << "Using global memory limit: " << PrettyPrinter::print(bytes_limit, TUnit::BYTES);
     RETURN_IF_ERROR(_disk_io_mgr->init(_mem_tracker));
@@ -224,7 +223,6 @@ void ExecEnv::_destory() {
     delete _thread_pool;
     delete _thread_mgr;
     delete _pool_mem_trackers;
-    delete _mem_tracker;
     delete _broker_client_cache;
     delete _extdatasource_client_cache;
     delete _frontend_client_cache;
diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp
index 3aea0348cb46a6..3abb05c7827aa9 100644
--- a/be/src/runtime/export_sink.cpp
+++ b/be/src/runtime/export_sink.cpp
@@ -70,7 +70,7 @@ Status ExportSink::prepare(RuntimeState* state) {
     _mem_tracker.reset(new MemTracker(-1, "ExportSink", state->instance_mem_tracker()));
 
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker.get()));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
 
     // TODO(lingbin): add some Counter
     _bytes_written_counter = ADD_COUNTER(profile(), "BytesExported", TUnit::BYTES);
diff --git a/be/src/runtime/export_sink.h b/be/src/runtime/export_sink.h
index 167d0a55d93f9f..ce8e2fbdb5140e 100644
--- a/be/src/runtime/export_sink.h
+++ b/be/src/runtime/export_sink.h
@@ -79,7 +79,7 @@ class ExportSink : public DataSink {
 
     RuntimeProfile* _profile;
 
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     RuntimeProfile::Counter* _bytes_written_counter;
     RuntimeProfile::Counter* _rows_written_counter;
diff --git a/be/src/runtime/initial_reservations.cc b/be/src/runtime/initial_reservations.cc
index 9c2bd7f66f7b8c..2daf7aa0a5e544 100644
--- a/be/src/runtime/initial_reservations.cc
+++ b/be/src/runtime/initial_reservations.cc
@@ -37,13 +37,12 @@ using std::numeric_limits;
 namespace doris {
 
 InitialReservations::InitialReservations(ObjectPool* obj_pool,
-    ReservationTracker* query_reservation, MemTracker* query_mem_tracker,
+    ReservationTracker* query_reservation, std::shared_ptr query_mem_tracker,
     int64_t initial_reservation_total_claims)
-  : initial_reservation_mem_tracker_(obj_pool->add(
-      new MemTracker(-1, "Unclaimed reservations", query_mem_tracker, false))),
+  : initial_reservation_mem_tracker_(MemTracker::CreateTracker(-1, "Unclaimed reservations", query_mem_tracker, false)),
       remaining_initial_reservation_claims_(initial_reservation_total_claims) {
   initial_reservations_.InitChildTracker(nullptr, query_reservation,
-      initial_reservation_mem_tracker_, numeric_limits::max());
+      initial_reservation_mem_tracker_.get(), numeric_limits::max());
 }
 
 Status InitialReservations::Init(
@@ -85,6 +84,7 @@ void InitialReservations::Return(BufferPool::ClientHandle* src, int64_t bytes) {
 
 void InitialReservations::ReleaseResources() {
   initial_reservations_.Close();
-  initial_reservation_mem_tracker_->close();
+  // TODO(HW): Close() is private. make this tracker shared later
+  // initial_reservation_mem_tracker_->Close();
 }
 }
diff --git a/be/src/runtime/initial_reservations.h b/be/src/runtime/initial_reservations.h
index 0863e5d44503d4..ad69a33998d46e 100644
--- a/be/src/runtime/initial_reservations.h
+++ b/be/src/runtime/initial_reservations.h
@@ -42,7 +42,7 @@ class InitialReservations {
   /// claimed over the lifetime of the query. The total bytes claimed via Claim()
   /// cannot exceed this. Allocated objects are stored in 'obj_pool'.
   InitialReservations(ObjectPool* obj_pool, ReservationTracker* query_reservation,
-      MemTracker* query_mem_tracker, int64_t initial_reservation_total_claims);
+      std::shared_ptr query_mem_tracker, int64_t initial_reservation_total_claims);
 
   /// Initialize the query's pool of initial reservations by acquiring the minimum
   /// reservation required for the query on this host. Fails if the reservation could
@@ -70,7 +70,7 @@ class InitialReservations {
   // Return() returns reservations to.
   ReservationTracker initial_reservations_;
 
-  MemTracker* const initial_reservation_mem_tracker_;
+  std::shared_ptr const initial_reservation_mem_tracker_;
 
   /// The total bytes of additional reservations that we expect to be claimed.
   /// initial_reservations_->GetReservation() <= remaining_initial_reservation_claims_.
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index 44f181509302c8..575bb9ceb5eaa1 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -24,7 +24,7 @@
 namespace doris {
 
 LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit,
-                         int64_t timeout_s, MemTracker* mem_tracker) :
+                         int64_t timeout_s, const std::shared_ptr& mem_tracker) :
         _load_id(load_id), _timeout_s(timeout_s) {
     _mem_tracker.reset(new MemTracker(mem_limit, _load_id.to_string(), mem_tracker));
     // _last_updated_time should be set before being inserted to
@@ -50,7 +50,7 @@ Status LoadChannel::open(const PTabletWriterOpenRequest& params) {
         } else {
             // create a new tablets channel
             TabletsChannelKey key(params.id(), index_id);
-            channel.reset(new TabletsChannel(key, _mem_tracker.get()));
+            channel.reset(new TabletsChannel(key, _mem_tracker));
             _tablets_channels.insert({index_id, channel});
         }
     }
diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h
index 50d67d9715c25b..8bea5954bf78ea 100644
--- a/be/src/runtime/load_channel.h
+++ b/be/src/runtime/load_channel.h
@@ -39,7 +39,7 @@ class TabletsChannel;
 class LoadChannel {
 public:
     LoadChannel(const UniqueId& load_id, int64_t mem_limit,
-                int64_t timeout_s, MemTracker* mem_tracker);
+                int64_t timeout_s, const std::shared_ptr& mem_tracker);
     ~LoadChannel();
 
     // open a new load channel if not exist
@@ -75,7 +75,7 @@ class LoadChannel {
 
     UniqueId _load_id;
     // Tracks the total memory comsupted by current load job on this BE
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // lock protect the tablets channel map
     std::mutex _lock;
diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp
index b9a6eca045a36f..ad238313d905ab 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -103,7 +103,7 @@ Status LoadChannelMgr::open(const PTabletWriterOpenRequest& params) {
             int64_t job_timeout_s = calc_job_timeout_s(timeout_in_req_s);
 
             channel.reset(new LoadChannel(load_id, job_max_memory,
-                                          job_timeout_s, _mem_tracker.get()));
+                                          job_timeout_s, _mem_tracker));
             _load_channels.insert({load_id, channel});
         }
     }
diff --git a/be/src/runtime/load_channel_mgr.h b/be/src/runtime/load_channel_mgr.h
index 0f9fb46ddcc3ab..9af843561872dc 100644
--- a/be/src/runtime/load_channel_mgr.h
+++ b/be/src/runtime/load_channel_mgr.h
@@ -70,7 +70,7 @@ class LoadChannelMgr {
     Cache* _lastest_success_channel = nullptr;
 
     // check the total load mem consumption of this Backend
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // thread to clean timeout load channels
     std::thread _load_channels_clean_thread;
diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp
index 37ca1970c9baec..4232cde20092f4 100644
--- a/be/src/runtime/mem_pool.cpp
+++ b/be/src/runtime/mem_pool.cpp
@@ -49,7 +49,7 @@ MemPool::~MemPool() {
         total_bytes_released += chunk.chunk.size;
         ChunkAllocator::instance()->free(chunk.chunk);
     }
-    mem_tracker_->release(total_bytes_released);
+    mem_tracker_->Release(total_bytes_released);
     DorisMetrics::instance()->memory_pool_bytes_total.increment(-total_bytes_released);
 }
 
@@ -75,7 +75,7 @@ void MemPool::free_all() {
     total_allocated_bytes_ = 0;
     total_reserved_bytes_ = 0;
 
-    mem_tracker_->release(total_bytes_released);
+    mem_tracker_->Release(total_bytes_released);
     DorisMetrics::instance()->memory_pool_bytes_total.increment(-total_bytes_released);
 }
 
@@ -119,15 +119,15 @@ bool MemPool::find_chunk(size_t min_size, bool check_limits) {
 
     chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size);
     if (check_limits) {
-        if (!mem_tracker_->try_consume(chunk_size)) return false;
+        if (!mem_tracker_->TryConsume(chunk_size)) return false;
     } else {
-        mem_tracker_->consume(chunk_size);
+        mem_tracker_->Consume(chunk_size);
     }
 
     // Allocate a new chunk. Return early if allocate fails.
     Chunk chunk;
     if (!ChunkAllocator::instance()->allocate(chunk_size, &chunk)) {
-        mem_tracker_->release(chunk_size);
+        mem_tracker_->Release(chunk_size);
         return false;
     }
     ASAN_POISON_MEMORY_REGION(chunk.data, chunk_size);
@@ -174,8 +174,8 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) {
 
     // Skip unnecessary atomic ops if the mem_trackers are the same.
     if (src->mem_tracker_ != mem_tracker_) {
-        src->mem_tracker_->release(total_transfered_bytes);
-        mem_tracker_->consume(total_transfered_bytes);
+        src->mem_tracker_->Release(total_transfered_bytes);
+        mem_tracker_->Consume(total_transfered_bytes);
     }
 
     // insert new chunks after current_chunk_idx_
@@ -213,8 +213,8 @@ void MemPool::exchange_data(MemPool* other) {
     std::swap(chunks_, other->chunks_);
 
     // update MemTracker
-    mem_tracker_->consume(delta_size);
-    other->mem_tracker_->release(delta_size);
+    mem_tracker_->Consume(delta_size);
+    other->mem_tracker_->Release(delta_size);
 }
 
 string MemPool::debug_string() {
diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index 0f0a0c06676e7e..8b340287c13f50 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -20,185 +20,241 @@
 #include 
 #include 
 #include 
-//#include 
-//#include 
-//include 
+#include 
 
 #include "exec/exec_node.h"
+#include "gutil/once.h"
 #include "gutil/strings/substitute.h"
+#include "runtime/bufferpool/reservation_tracker_counters.h"
 #include "runtime/exec_env.h"
 #include "runtime/runtime_state.h"
+#include "service/backend_options.h"
 #include "util/debug_util.h"
 #include "util/doris_metrics.h"
+#include "util/debug_util.h"
 #include "util/mem_info.h"
 #include "util/pretty_printer.h"
-#include "util/stack_util.h"
 #include "util/uid_util.h"
+#include "util/stack_util.h"
 
-//using std::shared_ptr;
-//using std::weak_ptr;
-//using std::lexical_cast;
-#include 
-
-#include "runtime/bufferpool/reservation_tracker_counters.h"
+using std::endl;
+using std::greater;
+using std::pair;
+using std::priority_queue;
+using std::string;
+using strings::Substitute;
 
 namespace doris {
 
-const std::string MemTracker::COUNTER_NAME = "PeakMemoryUsage";
+const string MemTracker::COUNTER_NAME = "PeakMemoryUsage";
 
 // Name for request pool MemTrackers. '$0' is replaced with the pool name.
-const std::string REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT = "RequestPool=$0";
+const string REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT = "RequestPool=$0";
+
+/// Calculate the soft limit for a MemTracker based on the hard limit 'limit'.
+static int64_t CalcSoftLimit(int64_t limit) {
+  if (limit < 0) return -1;
+  double frac = std::max(0.0, std::min(1.0, config::soft_mem_limit_frac));
+  return static_cast(limit * frac);
+}
+
+// The ancestor for all trackers. Every tracker is visible from the root down.
+static std::shared_ptr root_tracker;
+static GoogleOnceType root_tracker_once = GOOGLE_ONCE_INIT;
+
+void MemTracker::CreateRootTracker() {
+  root_tracker.reset(new MemTracker(-1, "root", std::shared_ptr()));
+  root_tracker->Init();
+}
 
 MemTracker::MemTracker(
-        int64_t byte_limit, const std::string& label, MemTracker* parent, bool auto_unregister, bool log_usage_if_zero)
-    : _limit(byte_limit),
-    _label(label),
-    _parent(parent),
-    _consumption(&_local_counter),
-    _local_counter(TUnit::BYTES),
-    _consumption_metric(NULL),
-    _log_usage_if_zero(log_usage_if_zero),
-    _num_gcs_metric(NULL),
-    _bytes_freed_by_last_gc_metric(NULL),
-    _bytes_over_limit_metric(NULL),
-    _limit_metric(NULL),
-    _auto_unregister(auto_unregister) {
-        if (parent != NULL) _parent->add_child_tracker(this);
-        Init();
-    }
+    int64_t byte_limit, const string& label, const std::shared_ptr& parent, bool auto_unregister, bool log_usage_if_zero)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(&local_counter_),
+    local_counter_(TUnit::BYTES),
+    consumption_metric_(nullptr),
+    log_usage_if_zero_(log_usage_if_zero),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr),
+    auto_unregister_(auto_unregister) {
+  Init();
+}
 
 MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
-        const std::string& label, MemTracker* parent)
-    : _limit(byte_limit),
-    _label(label),
-    _parent(parent),
-    _consumption(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
-    _local_counter(TUnit::BYTES),
-    _consumption_metric(NULL),
-    _log_usage_if_zero(true),
-    _num_gcs_metric(NULL),
-    _bytes_freed_by_last_gc_metric(NULL),
-    _bytes_over_limit_metric(NULL),
-    _limit_metric(NULL) {
-        if (parent != NULL) _parent->add_child_tracker(this);
-        Init();
-    }
+    const std::string& label, const std::shared_ptr& parent)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
+    local_counter_(TUnit::BYTES),
+    consumption_metric_(nullptr),
+    log_usage_if_zero_(true),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
+  Init();
+}
 
-MemTracker::MemTracker(
-        UIntGauge* consumption_metric, int64_t byte_limit, const std::string& label)
-    : _limit(byte_limit),
-    _label(label),
-    _parent(NULL),
-    _consumption(&_local_counter),
-    _local_counter(TUnit::BYTES),
-    _consumption_metric(consumption_metric),
-    _log_usage_if_zero(true),
-    _num_gcs_metric(NULL),
-    _bytes_freed_by_last_gc_metric(NULL),
-    _bytes_over_limit_metric(NULL),
-    _limit_metric(NULL) {
-        Init();
-    }
+MemTracker::MemTracker(IntGauge* consumption_metric,
+    int64_t byte_limit, const string& label, const std::shared_ptr& parent)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(&local_counter_),
+    local_counter_(TUnit::BYTES),
+    consumption_metric_(consumption_metric),
+    log_usage_if_zero_(true),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
+  Init();
+}
 
 void MemTracker::Init() {
-    DCHECK_GE(_limit, -1);
-    // populate _all_trackers and _limit_trackers
-    MemTracker* tracker = this;
-    while (tracker != NULL) {
-        _all_trackers.push_back(tracker);
-        if (tracker->has_limit()) _limit_trackers.push_back(tracker);
-        tracker = tracker->_parent;
-    }
-    DCHECK_GT(_all_trackers.size(), 0);
-    DCHECK_EQ(_all_trackers[0], this);
+  DCHECK_GE(limit_, -1);
+  DCHECK_LE(soft_limit_, limit_);
+  // if (parent_ != nullptr) parent_->AddChildTracker(std::shared_ptr(this));
+  // populate all_trackers_ and limit_trackers_
+  MemTracker* tracker = this;
+  while (tracker != nullptr) {
+    all_trackers_.push_back(tracker);
+    if (tracker->has_limit()) limit_trackers_.push_back(tracker);
+    tracker = tracker->parent_.get();
+  }
+  DCHECK_GT(all_trackers_.size(), 0);
+  DCHECK_EQ(all_trackers_[0], this);
 }
 
-// TODO chenhao , set MemTracker close state
-void MemTracker::close() {}
+void MemTracker::AddChildTracker(const std::shared_ptr& tracker) {
+  lock_guard l(child_trackers_lock_);
+  tracker->child_tracker_it_ = child_trackers_.insert(child_trackers_.end(), tracker);
+}
 
-void MemTracker::enable_reservation_reporting(const ReservationTrackerCounters& counters) {
-    ReservationTrackerCounters* new_counters = new ReservationTrackerCounters(counters);
-    _reservation_counters.store(new_counters);
+void MemTracker::EnableReservationReporting(const ReservationTrackerCounters& counters) {
+  delete reservation_counters_.swap(new ReservationTrackerCounters(counters));
 }
 
-int64_t MemTracker::GetPoolMemReserved() const {
-    // Pool trackers should have a _pool_name and no limit.
-    DCHECK(!_pool_name.empty());
-    DCHECK_EQ(_limit, -1) << LogUsage(UNLIMITED_DEPTH);
-
-    int64_t mem_reserved = 0L;
-    std::lock_guard l(_child_trackers_lock);
-    for (MemTracker* child : _child_trackers) {
-        int64_t child_limit = child->limit();
-        if (child_limit > 0) {
-            // Make sure we don't overflow if the query limits are set to ridiculous values.
-            mem_reserved += std::min(child_limit, MemInfo::physical_mem());
-        } else {
-            DCHECK_EQ(child_limit, -1) << child->LogUsage(UNLIMITED_DEPTH);
-            mem_reserved += child->consumption();
-        }
-    }
-    return mem_reserved;
+int64_t MemTracker::GetLowestLimit(MemLimit mode) const {
+  if (limit_trackers_.empty()) return -1;
+  int64_t min_limit = numeric_limits::max();
+  for (MemTracker* limit_tracker : limit_trackers_) {
+    DCHECK(limit_tracker->has_limit());
+    min_limit = std::min(min_limit, limit_tracker->GetLimit(mode));
+  }
+  return min_limit;
 }
 
-MemTracker* PoolMemTrackerRegistry::GetRequestPoolMemTracker(
-        const std::string& pool_name, bool create_if_not_present) {
-    DCHECK(!pool_name.empty());
-    std::lock_guard l(_pool_to_mem_trackers_lock);
-    PoolTrackersMap::iterator it = _pool_to_mem_trackers.find(pool_name);
-    if (it != _pool_to_mem_trackers.end()) {
-        MemTracker* tracker = it->second.get();
-        DCHECK(pool_name == tracker->_pool_name);
-        return tracker;
-    }
-    if (!create_if_not_present) return nullptr;
-    // First time this pool_name registered, make a new object.
-    MemTracker* tracker =
-        new MemTracker(-1, strings::Substitute(REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT, pool_name),
-                ExecEnv::GetInstance()->process_mem_tracker());
-    tracker->_pool_name = pool_name;
-    _pool_to_mem_trackers.emplace(pool_name, std::unique_ptr(tracker));
-    return tracker;
+int64_t MemTracker::SpareCapacity(MemLimit mode) const {
+  int64_t result = std::numeric_limits::max();
+  for (const auto& tracker : limit_trackers_) {
+    int64_t mem_left = tracker->GetLimit(mode) - tracker->consumption();
+    result = std::min(result, mem_left);
+  }
+  return result;
 }
 
-MemTracker* MemTracker::CreateQueryMemTracker(const TUniqueId& id,
-        const TQueryOptions& query_options, const std::string& pool_name, ObjectPool* obj_pool) {
-    int64_t byte_limit = -1;
-    if (query_options.__isset.mem_limit && query_options.mem_limit > 0) {
-        byte_limit = query_options.mem_limit;
-    }
-    if (byte_limit != -1) {
-        if (byte_limit > MemInfo::physical_mem()) {
-            LOG(WARNING) << "Memory limit " << PrettyPrinter::print(byte_limit, TUnit::BYTES)
-                << " exceeds physical memory of "
-                << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES);
-        }
-        VLOG_QUERY << "Using query memory limit: "
-            << PrettyPrinter::print(byte_limit, TUnit::BYTES);
+void MemTracker::RefreshConsumptionFromMetric() {
+  DCHECK(consumption_metric_ != nullptr);
+  consumption_->set(consumption_metric_->value());
+}
+
+int64_t MemTracker::GetPoolMemReserved() {
+  // Pool trackers should have a pool_name_ and no limit.
+  DCHECK(!pool_name_.empty());
+  DCHECK_EQ(limit_, -1) << LogUsage(UNLIMITED_DEPTH);
+
+  int64_t mem_reserved = 0L;
+  lock_guard l(child_trackers_lock_);
+  for (const auto& child_weak : child_trackers_) {
+    std::shared_ptr child = child_weak.lock();
+    if (child) {
+      int64_t child_limit = child->limit();
+      bool query_exec_finished = child->query_exec_finished_.load() != 0;
+      if (child_limit > 0 && !query_exec_finished) {
+        // Make sure we don't overflow if the query limits are set to ridiculous values.
+        mem_reserved += std::min(child_limit, MemInfo::physical_mem());
+      } else {
+        DCHECK(query_exec_finished || child_limit == -1)
+                    << child->LogUsage(UNLIMITED_DEPTH);
+        mem_reserved += child->consumption();
+      }
     }
+  }
+  return mem_reserved;
+}
 
-    MemTracker* pool_tracker =
-        ExecEnv::GetInstance()->pool_mem_trackers()->GetRequestPoolMemTracker(
-                pool_name, true);
-    return pool_tracker;
+MemTracker* PoolMemTrackerRegistry::GetRequestPoolMemTracker(
+    const string& pool_name, bool create_if_not_present) {
+  DCHECK(!pool_name.empty());
+  lock_guard l(pool_to_mem_trackers_lock_);
+  PoolTrackersMap::iterator it = pool_to_mem_trackers_.find(pool_name);
+  if (it != pool_to_mem_trackers_.end()) {
+    MemTracker* tracker = it->second.get();
+    DCHECK(pool_name == tracker->pool_name_);
+    return tracker;
+  }
+  if (!create_if_not_present) return nullptr;
+  // First time this pool_name registered, make a new object.
+  MemTracker* tracker =
+      new MemTracker(-1, Substitute(REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT, pool_name),
+          ExecEnv::GetInstance()->process_mem_tracker());
+  tracker->pool_name_ = pool_name;
+  pool_to_mem_trackers_.emplace(pool_name, std::unique_ptr(tracker));
+  return tracker;
 }
 
 MemTracker::~MemTracker() {
-    int64_t remaining_bytes = consumption();
-    // work around some scenario where consume() is not paired with release()
-    // e.g., in the initialization of hll and bitmap aggregator (see aggregate_func.h)
-    // TODO(gaodayue) should be replaced with `DCHECK_EQ(consumption(), 0);` when
-    // we fixed thoses invalid usages
-    if (remaining_bytes > 0) {
-        for (auto tracker : _all_trackers) {
-            tracker->_consumption->add(-remaining_bytes);
-        }
+  delete reservation_counters_.load();
+
+  if (parent()) {
+    DCHECK(consumption() == 0) << "Memory tracker " << debug_string()
+                               << " has unreleased consumption " << consumption();
+    parent_->Release(consumption());
+    if (auto_unregister_) {  // TODO(yingchun): when auto_unregister_ is false, and can it be false?
+      unregister_from_parent();
     }
-    delete _reservation_counters.load();
+  }
+}
 
-    if (_auto_unregister && parent()) {
-        unregister_from_parent();
-    }
+//void MemTracker::RegisterMetrics(MetricGroup* metrics, const string& prefix) {
+//  num_gcs_metric_ = metrics->AddCounter(Substitute("$0.num-gcs", prefix), 0);
+//
+//  // TODO: Consider a total amount of bytes freed counter
+//  bytes_freed_by_last_gc_metric_ = metrics->AddGauge(
+//      Substitute("$0.bytes-freed-by-last-gc", prefix), -1);
+//
+//  bytes_over_limit_metric_ = metrics->AddGauge(
+//      Substitute("$0.bytes-over-limit", prefix), -1);
+//
+//  limit_metric_ = metrics->AddGauge(Substitute("$0.limit", prefix), limit_);
+//}
+
+void MemTracker::TransferTo(MemTracker* dst, int64_t bytes) {
+  DCHECK_EQ(all_trackers_.back(), dst->all_trackers_.back())
+      << "Must have same root";
+  // Find the common ancestor and update trackers between 'this'/'dst' and
+  // the common ancestor. This logic handles all cases, including the
+  // two trackers being the same or being ancestors of each other because
+  // 'all_trackers_' includes the current tracker.
+  int ancestor_idx = all_trackers_.size() - 1;
+  int dst_ancestor_idx = dst->all_trackers_.size() - 1;
+  while (ancestor_idx > 0 && dst_ancestor_idx > 0
+      && all_trackers_[ancestor_idx - 1] == dst->all_trackers_[dst_ancestor_idx - 1]) {
+    --ancestor_idx;
+    --dst_ancestor_idx;
+  }
+  MemTracker* common_ancestor = all_trackers_[ancestor_idx];
+  ReleaseLocal(bytes, common_ancestor);
+  dst->ConsumeLocal(bytes, common_ancestor);
 }
 
 // Calling this on the query tracker results in output like:
@@ -219,154 +275,221 @@ MemTracker::~MemTracker() {
 //      DataStreamSender (dst_id=4): Total=680.00 B Peak=680.00 B
 //
 // If 'reservation_metrics_' are set, we ge a more granular breakdown:
-//   TrackerName: Limit=5.00 MB BufferPoolUsed/Reservation=0/5.00 MB OtherMemory=1.04 MB
+//   TrackerName: Limit=5.00 MB Reservation=5.00 MB OtherMemory=1.04 MB
 //                Total=6.04 MB Peak=6.45 MB
 //
-std::string MemTracker::LogUsage(int max_recursive_depth, const std::string& prefix,
-                                 int64_t* logged_consumption) const {
-    int64_t curr_consumption = consumption();
-    int64_t peak_consumption = _consumption->value();
-    if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
-
-    if (!_log_usage_if_zero && curr_consumption == 0) return "";
-
-    std::stringstream ss;
-    ss << prefix << _label << ":";
-    //if (CheckLimitExceeded()) ss << " memory limit exceeded.";
-    if (limit_exceeded()) ss << " memory limit exceeded.";
-    if (_limit > 0) ss << " Limit=" << PrettyPrinter::print(_limit, TUnit::BYTES);
-
-    ReservationTrackerCounters* reservation_counters = _reservation_counters.load();
-    if (reservation_counters != nullptr) {
-        int64_t reservation = reservation_counters->peak_reservation->current_value();
-        int64_t used_reservation = reservation_counters->peak_used_reservation->current_value();
-        int64_t reservation_limit = 0;
-        //TODO chenhao, reservation_limit is null when ReservationTracker
-        // does't have reservation limit
-        if (reservation_counters->reservation_limit != nullptr) {
-            reservation_limit = reservation_counters->reservation_limit->value();
-        }
-        ss << " BufferPoolUsed/Reservation=" << PrettyPrinter::print(used_reservation, TUnit::BYTES)
-           << "/" << PrettyPrinter::print(reservation, TUnit::BYTES);
-        if (reservation_limit != std::numeric_limits::max()) {
-            ss << " BufferPoolLimit=" << PrettyPrinter::print(reservation_limit, TUnit::BYTES);
-        }
-        ss << " OtherMemory=" << PrettyPrinter::print(curr_consumption - reservation, TUnit::BYTES);
-    }
-    ss << " Total=" << PrettyPrinter::print(curr_consumption, TUnit::BYTES)
-       << " Peak=" << PrettyPrinter::print(peak_consumption, TUnit::BYTES);
-
-    // This call does not need the children, so return early.
-    if (max_recursive_depth == 0) return ss.str();
-
-    std::string new_prefix = strings::Substitute("  $0", prefix);
-    int64_t child_consumption;
-    std::string child_trackers_usage;
-    {
-        std::lock_guard l(_child_trackers_lock);
-        child_trackers_usage =
-                LogUsage(max_recursive_depth - 1, new_prefix, _child_trackers, &child_consumption);
+string MemTracker::LogUsage(int max_recursive_depth, const string& prefix,
+    int64_t* logged_consumption) {
+  // Make sure the consumption is up to date.
+  if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric();
+  int64_t curr_consumption = consumption();
+  int64_t peak_consumption = consumption_->value();
+  if (logged_consumption != nullptr) *logged_consumption = curr_consumption;
+
+  if (!log_usage_if_zero_ && curr_consumption == 0) return "";
+
+  stringstream ss;
+  ss << prefix << label_ << ":";
+  if (CheckLimitExceeded(MemLimit::HARD)) ss << " memory limit exceeded.";
+  if (limit_ > 0) ss << " Limit=" << PrettyPrinter::print(limit_, TUnit::BYTES);
+
+  ReservationTrackerCounters* reservation_counters = reservation_counters_.load();
+  if (reservation_counters != nullptr) {
+    int64_t reservation = reservation_counters->peak_reservation->current_value();
+    ss << " Reservation=" << PrettyPrinter::print(reservation, TUnit::BYTES);
+    if (reservation_counters->reservation_limit != nullptr) {
+      int64_t limit = reservation_counters->reservation_limit->value();
+      ss << " ReservationLimit=" << PrettyPrinter::print(limit, TUnit::BYTES);
     }
-    if (!child_trackers_usage.empty()) ss << "\n" << child_trackers_usage;
-
-    if (_consumption_metric != nullptr) {
-        // Log the difference between the metric value and children as "untracked" memory so
-        // that the values always add up. This value is not always completely accurate because
-        // we did not necessarily get a consistent snapshot of the consumption values for all
-        // children at a single moment in time, but is good enough for our purposes.
-        int64_t untracked_bytes = curr_consumption - child_consumption;
-        ss << "\n" << new_prefix << "Untracked Memory: Total=";
-        ss << "\n"
-           << new_prefix
-           << "Untracked Memory: Total=" << PrettyPrinter::print(untracked_bytes, TUnit::BYTES);
+    ss << " OtherMemory="
+       << PrettyPrinter::print(curr_consumption - reservation, TUnit::BYTES);
+  }
+  ss << " Total=" << PrettyPrinter::print(curr_consumption, TUnit::BYTES);
+  // Peak consumption is not accurate if the metric is lazily updated (i.e.
+  // this is a non-root tracker that exists only for reporting purposes).
+  // Only report peak consumption if we actually call Consume()/Release() on
+  // this tracker or an descendent.
+  if (consumption_metric_ == nullptr || parent_ == nullptr) {
+    ss << " Peak=" << PrettyPrinter::print(peak_consumption, TUnit::BYTES);
+  }
+
+  // This call does not need the children, so return early.
+  if (max_recursive_depth == 0) return ss.str();
+
+  // Recurse and get information about the children
+  string new_prefix = Substitute("  $0", prefix);
+  int64_t child_consumption;
+  string child_trackers_usage;
+  {
+    lock_guard l(child_trackers_lock_);
+    child_trackers_usage = LogUsage(max_recursive_depth - 1, new_prefix,
+        child_trackers_, &child_consumption);
+  }
+  if (!child_trackers_usage.empty()) ss << "\n" << child_trackers_usage;
+
+  if (parent_ == nullptr) {
+    // Log the difference between the metric value and children as "untracked" memory so
+    // that the values always add up. This value is not always completely accurate because
+    // we did not necessarily get a consistent snapshot of the consumption values for all
+    // children at a single moment in time, but is good enough for our purposes.
+    int64_t untracked_bytes = curr_consumption - child_consumption;
+    ss << "\n"
+       << new_prefix << "Untracked Memory: Total="
+       << PrettyPrinter::print(untracked_bytes, TUnit::BYTES);
+  }
+  return ss.str();
+}
+
+string MemTracker::LogUsage(int max_recursive_depth, const string& prefix,
+    const std::list>& trackers, int64_t* logged_consumption) {
+  *logged_consumption = 0;
+  vector usage_strings;
+  for (const auto& tracker_weak : trackers) {
+    std::shared_ptr tracker = tracker_weak.lock();
+    if (tracker) {
+      int64_t tracker_consumption;
+      string usage_string = tracker->LogUsage(max_recursive_depth, prefix,
+                                              &tracker_consumption);
+      if (!usage_string.empty()) usage_strings.push_back(usage_string);
+      *logged_consumption += tracker_consumption;
     }
+  }
+  return boost::join(usage_strings, "\n");
+}
 
-    return ss.str();
+string MemTracker::LogTopNQueries(int limit) {
+  if (limit == 0) return "";
+  if (this->is_query_mem_tracker_) return LogUsage(0);
+  priority_queue, vector>,
+      std::greater>>
+      min_pq;
+  GetTopNQueries(min_pq, limit);
+  vector usage_strings(min_pq.size());
+  while (!min_pq.empty()) {
+    usage_strings.push_back(min_pq.top().second);
+    min_pq.pop();
+  }
+  std::reverse(usage_strings.begin(), usage_strings.end());
+  return boost::join(usage_strings, "\n");
 }
 
-std::string MemTracker::LogUsage(int max_recursive_depth, const std::string& prefix,
-                                 const std::list& trackers,
-                                 int64_t* logged_consumption) {
-    *logged_consumption = 0;
-    std::vector usage_strings;
-    for (MemTracker* tracker : trackers) {
-        int64_t tracker_consumption;
-        std::string usage_string =
-                tracker->LogUsage(max_recursive_depth, prefix, &tracker_consumption);
-        if (!usage_string.empty()) usage_strings.push_back(usage_string);
-        *logged_consumption += tracker_consumption;
+void MemTracker::GetTopNQueries(
+    priority_queue, vector>,
+        greater>>& min_pq,
+    int limit) {
+  lock_guard l(child_trackers_lock_);
+  for (const auto& child_weak : child_trackers_) {
+    std::shared_ptr child = child_weak.lock();
+    if (child) {
+      if (!child->is_query_mem_tracker_) {
+        child->GetTopNQueries(min_pq, limit);
+      } else {
+        min_pq.push(pair(child->consumption(), child->LogUsage(0)));
+        if (min_pq.size() > limit) min_pq.pop();
+      }
     }
-    return boost::join(usage_strings, "\n");
+  }
+}
+
+MemTracker* MemTracker::GetQueryMemTracker() {
+  MemTracker* tracker = this;
+  while (tracker != nullptr && !tracker->is_query_mem_tracker_) {
+    tracker = tracker->parent_.get();
+  }
+  return tracker;
 }
 
-Status MemTracker::MemLimitExceeded(RuntimeState* state, const std::string& details,
-        int64_t failed_allocation_size) {
-    DCHECK_GE(failed_allocation_size, 0);
-    std::stringstream ss;
-    if (details.size() != 0) ss << details << std::endl;
-    if (failed_allocation_size != 0) {
-        ss << label() << " could not allocate "
-            << PrettyPrinter::print(failed_allocation_size, TUnit::BYTES)
-            << " without exceeding limit." << std::endl;
+Status MemTracker::MemLimitExceeded(MemTracker* mtracker, RuntimeState* state,
+    const std::string& details, int64_t failed_allocation_size) {
+  DCHECK_GE(failed_allocation_size, 0);
+  stringstream ss;
+  if (details.size() != 0) ss << details << endl;
+  if (failed_allocation_size != 0) {
+    if (mtracker != nullptr) ss << mtracker->label();
+    ss << " could not allocate "
+       << PrettyPrinter::print(failed_allocation_size, TUnit::BYTES)
+       << " without exceeding limit." << endl;
+  }
+  ss << "Error occurred on backend " << BackendOptions::get_localhost();
+  if (state != nullptr) ss << " by fragment " << print_id(state->fragment_instance_id());
+  ss << endl;
+  ExecEnv* exec_env = ExecEnv::GetInstance();
+  MemTracker* process_tracker = exec_env->process_mem_tracker().get();
+  const int64_t process_capacity = process_tracker->SpareCapacity(MemLimit::HARD);
+  ss << "Memory left in process limit: "
+     << PrettyPrinter::print(process_capacity, TUnit::BYTES) << endl;
+
+  // Always log the query tracker (if available).
+  MemTracker* query_tracker = nullptr;
+  if (mtracker != nullptr) {
+    query_tracker = mtracker->GetQueryMemTracker();
+    if (query_tracker != nullptr) {
+      if (query_tracker->has_limit()) {
+        const int64_t query_capacity =
+            query_tracker->limit() - query_tracker->consumption();
+        ss << "Memory left in query limit: "
+           << PrettyPrinter::print(query_capacity, TUnit::BYTES) << endl;
+      }
+      ss << query_tracker->LogUsage(UNLIMITED_DEPTH);
     }
-    //ss << "Error occurred on backend " << GetBackendString();
-    if (state != nullptr) ss << " by fragment " << state->fragment_instance_id();
-    ss << std::endl;
-    ExecEnv* exec_env = ExecEnv::GetInstance();
-    //ExecEnv* exec_env = nullptr;
-    MemTracker* process_tracker = exec_env->process_mem_tracker();
-    const int64_t process_capacity = process_tracker->spare_capacity();
-    ss << "Memory left in process limit: "
-        << PrettyPrinter::print(process_capacity, TUnit::BYTES) << std::endl;
-
-    // Choose which tracker to log the usage of. Default to the process tracker so we can
-    // get the full view of memory consumption.
-    // FIXME(cmy): call LogUsage() lead to crash here, fix it later
-    // MemTracker* tracker_to_log = process_tracker;
-    // if (state != nullptr && state->query_mem_tracker()->has_limit()) {
-    //     MemTracker* query_tracker = state->query_mem_tracker();
-    //     const int64_t query_capacity = query_tracker->limit() - query_tracker->consumption();
-    //     ss << "Memory left in query limit: "
-    //         << PrettyPrinter::print(query_capacity, TUnit::BYTES) << std::endl;
-    //     // Log the query tracker only if the query limit was closer to being exceeded.
-    //     if (query_capacity < process_capacity) tracker_to_log = query_tracker;
-    // }
-    // ss << tracker_to_log->LogUsage();
-    // Status status = Status::MemLimitExceeded(ss.str());
-    LIMIT_EXCEEDED(this, state, ss.str());
+  }
+
+  // Log the process level if the process tracker is close to the limit or
+  // if this tracker is not within a query's MemTracker hierarchy.
+  if (process_capacity < failed_allocation_size || query_tracker == nullptr) {
+    // IMPALA-5598: For performance reasons, limit the levels of recursion when
+    // dumping the process tracker to only two layers.
+    ss << process_tracker->LogUsage(PROCESS_MEMTRACKER_LIMITED_DEPTH);
+  }
+
+  Status status = Status::MemoryLimitExceeded(ss.str());
+  if (state != nullptr) state->log_error(status.to_string());
+  return status;
 }
 
 void MemTracker::AddGcFunction(GcFunction f) {
-    _gc_functions.push_back(f);
+  gc_functions_.push_back(f);
+}
+
+bool MemTracker::LimitExceededSlow(MemLimit mode) {
+  if (mode == MemLimit::HARD && bytes_over_limit_metric_ != nullptr) {
+    bytes_over_limit_metric_->set_value(consumption() - limit_);
+  }
+  return GcMemory(GetLimit(mode));
 }
 
 bool MemTracker::GcMemory(int64_t max_consumption) {
-    if (max_consumption < 0) return true;
-    std::lock_guard l(_gc_lock);
-    if (_consumption_metric != NULL) RefreshConsumptionFromMetric();
-    int64_t pre_gc_consumption = consumption();
-    // Check if someone gc'd before us
-    if (pre_gc_consumption < max_consumption) return false;
-    if (_num_gcs_metric != NULL) _num_gcs_metric->increment(1);
-
-    int64_t curr_consumption = pre_gc_consumption;
-    // Try to free up some memory
-    for (int i = 0; i < _gc_functions.size(); ++i) {
-        // Try to free up the amount we are over plus some extra so that we don't have to
-        // immediately GC again. Don't free all the memory since that can be unnecessarily
-        // expensive.
-        const int64_t EXTRA_BYTES_TO_FREE = 512L * 1024L * 1024L;
-        int64_t bytes_to_free = curr_consumption - max_consumption + EXTRA_BYTES_TO_FREE;
-        _gc_functions[i](bytes_to_free);
-        if (_consumption_metric != NULL) RefreshConsumptionFromMetric();
-        curr_consumption = consumption();
-        if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break;
-    }
+  if (max_consumption < 0) return true;
+  lock_guard l(gc_lock_);
+  if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric();
+  int64_t pre_gc_consumption = consumption();
+  // Check if someone gc'd before us
+  if (pre_gc_consumption < max_consumption) return false;
+  if (num_gcs_metric_ != nullptr) num_gcs_metric_->increment(1);
+
+  int64_t curr_consumption = pre_gc_consumption;
+  // Try to free up some memory
+  for (int i = 0; i < gc_functions_.size(); ++i) {
+    // Try to free up the amount we are over plus some extra so that we don't have to
+    // immediately GC again. Don't free all the memory since that can be unnecessarily
+    // expensive.
+    const int64_t EXTRA_BYTES_TO_FREE = 512L * 1024L * 1024L;
+    int64_t bytes_to_free = curr_consumption - max_consumption + EXTRA_BYTES_TO_FREE;
+    gc_functions_[i](bytes_to_free);
+    if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric();
+    curr_consumption = consumption();
+    if (max_consumption - curr_consumption <= EXTRA_BYTES_TO_FREE) break;
+  }
+
+  if (bytes_freed_by_last_gc_metric_ != nullptr) {
+    bytes_freed_by_last_gc_metric_->set_value(pre_gc_consumption - curr_consumption);
+  }
+  return curr_consumption > max_consumption;
+}
 
-    if (_bytes_freed_by_last_gc_metric != NULL) {
-        _bytes_freed_by_last_gc_metric->set_value(pre_gc_consumption - curr_consumption);
-    }
-    return curr_consumption > max_consumption;
+std::shared_ptr MemTracker::GetRootTracker() {
+  GoogleOnceInit(&root_tracker_once, &MemTracker::CreateRootTracker);
+  return root_tracker;
 }
 
-} // end namespace doris
+} // namespace doris
diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h
index cbcc500ef71fa6..4709bd4ac0dd61 100644
--- a/be/src/runtime/mem_tracker.h
+++ b/be/src/runtime/mem_tracker.h
@@ -15,26 +15,34 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#ifndef DORIS_BE_SRC_QUERY_BE_RUNTIME_MEM_LIMIT_H
-#define DORIS_BE_SRC_QUERY_BE_RUNTIME_MEM_LIMIT_H
-
-#include 
+#pragma once
 
+#include 
+#include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
 #include 
 
-#include "common/status.h"
-#include "gen_cpp/Types_types.h"
+#include "gen_cpp/Types_types.h" // for TUniqueId
 #include "util/metrics.h"
 #include "util/runtime_profile.h"
 #include "util/spinlock.h"
+#include "common/status.h"
 
 namespace doris {
 
+/// Mode argument passed to various MemTracker methods to indicate whether a soft or hard
+/// limit should be used.
+enum class MemLimit { HARD, SOFT };
+
 class ObjectPool;
 class MemTracker;
-class ReservationTrackerCounters;
+struct ReservationTrackerCounters;
 class RuntimeState;
 class TQueryOptions;
 
@@ -42,6 +50,12 @@ class TQueryOptions;
 /// and can be arranged into a tree structure such that the consumption tracked
 /// by a MemTracker is also tracked by its ancestors.
 ///
+/// A MemTracker has a hard and a soft limit derived from the limit. If the hard limit
+/// is exceeded, all memory allocations and queries should fail until we are under the
+/// limit again. The soft limit can be exceeded without causing query failures, but
+/// consumers of memory that can tolerate running without more memory should not allocate
+/// memory in excess of the soft limit.
+///
 /// We use a five-level hierarchy of mem trackers: process, pool, query, fragment
 /// instance. Specific parts of the fragment (exec nodes, sinks, etc) will add a
 /// fifth level when they are initialized. This function also initializes a user
@@ -49,11 +63,14 @@ class TQueryOptions;
 ///
 /// By default, memory consumption is tracked via calls to Consume()/Release(), either to
 /// the tracker itself or to one of its descendents. Alternatively, a consumption metric
-/// can specified, and then the metric's value is used as the consumption rather than the
-/// tally maintained by Consume() and Release(). A tcmalloc metric is used to track
+/// can be specified, and then the metric's value is used as the consumption rather than
+/// the tally maintained by Consume() and Release(). A tcmalloc metric is used to track
 /// process memory consumption, since the process memory usage may be higher than the
 /// computed total memory (tcmalloc does not release deallocated memory immediately).
-//
+/// Other consumption metrics are used in trackers below the process level to account
+/// for memory (such as free buffer pool buffers) that is not tracked by Consume() and
+/// Release().
+///
 /// GcFunctions can be attached to a MemTracker in order to free up memory if the limit is
 /// reached. If LimitExceeded() is called and the limit is exceeded, it will first call
 /// the GcFunctions to try to free memory and recheck the limit. For example, the process
@@ -64,437 +81,504 @@ class TQueryOptions;
 /// call back into MemTrackers, except to release memory.
 //
 /// This class is thread-safe.
-class MemTracker {
-public:
-    /// 'byte_limit' < 0 means no limit
-    /// 'label' is the label used in the usage string (LogUsage())
-    /// If 'auto_unregister' is true, never call unregister_from_parent().
-    /// If 'log_usage_if_zero' is false, this tracker (and its children) will not be included
-    /// in LogUsage() output if consumption is 0.
-    MemTracker(int64_t byte_limit = -1, const std::string& label = std::string(),
-               MemTracker* parent = NULL, bool auto_unregister = false, bool log_usage_if_zero = true);
-
-    /// C'tor for tracker for which consumption counter is created as part of a profile.
-    /// The counter is created with name COUNTER_NAME.
-    MemTracker(RuntimeProfile* profile, int64_t byte_limit,
-            const std::string& label = std::string(), MemTracker* parent = NULL);
-
-    /// C'tor for tracker that uses consumption_metric as the consumption value.
-    /// Consume()/Release() can still be called. This is used for the process tracker.
-    MemTracker(UIntGauge* consumption_metric, int64_t byte_limit = -1,
-      const std::string& label = std::string());
-
-    ~MemTracker();
-
-    /// Closes this MemTracker. After closing it is invalid to consume memory on this
-    /// tracker and the tracker's consumption counter (which may be owned by a
-    /// RuntimeProfile, not this MemTracker) can be safely destroyed. MemTrackers without
-    /// consumption metrics in the context of a daemon must always be closed.
-    /// Idempotent: calling multiple times has no effect.
-    void close();
-
-    // Removes this tracker from _parent->_child_trackers.
-    void unregister_from_parent() {
-        DCHECK(_parent != NULL);
-        std::lock_guard l(_parent->_child_trackers_lock);
-        _parent->_child_trackers.erase(_child_tracker_it);
-        _child_tracker_it = _parent->_child_trackers.end();
+class MemTracker : public std::enable_shared_from_this {
+ public:
+  // TODO(yingchun): change to std::shared_ptr parent
+  /// 'byte_limit' < 0 means no limit
+  /// 'label' is the label used in the usage string (LogUsage())
+  /// If 'auto_unregister' is true, never call unregister_from_parent().
+  /// If 'log_usage_if_zero' is false, this tracker (and its children) will not be
+  /// included
+  /// in LogUsage() output if consumption is 0.
+  MemTracker(int64_t byte_limit = -1, const std::string& label = std::string(),
+             const std::shared_ptr& parent = std::shared_ptr(),
+             bool auto_unregister = false, bool log_usage_if_zero = true);
+
+  /// C'tor for tracker for which consumption counter is created as part of a profile.
+  /// The counter is created with name COUNTER_NAME.
+  MemTracker(RuntimeProfile* profile, int64_t byte_limit,
+      const std::string& label = std::string(), const std::shared_ptr& parent = std::shared_ptr());
+
+  /// C'tor for tracker that uses consumption_metric as the consumption value.
+  /// Consume()/Release() can still be called. This is used for the root process tracker
+  /// (if 'parent' is NULL). It is also to report on other categories of memory under the
+  /// process tracker, e.g. buffer pool free buffers (if 'parent - non-NULL).
+  MemTracker(IntGauge* consumption_metric, int64_t byte_limit = -1,
+      const std::string& label = std::string(), const std::shared_ptr& parent = std::shared_ptr());
+
+  ~MemTracker();
+
+  // Removes this tracker from parent_->child_trackers_.
+  void unregister_from_parent() {
+      DCHECK(parent_ != nullptr);
+      std::lock_guard l(parent_->child_trackers_lock_);
+      parent_->child_trackers_.erase(child_tracker_it_);
+      child_tracker_it_ = parent_->child_trackers_.end();
+  }
+
+  /// Include counters from a ReservationTracker in logs and other diagnostics.
+  /// The counters should be owned by the fragment's RuntimeProfile.
+  void EnableReservationReporting(const ReservationTrackerCounters& counters);
+
+  // Gets a shared_ptr to the "root" tracker, creating it if necessary.
+  static std::shared_ptr GetRootTracker();
+
+  // delete static CreateQueryMemTracker(), cuz it cannot use shared tracker
+
+  /// Increases consumption of this tracker and its ancestors by 'bytes'.
+  void Consume(int64_t bytes) {
+    // DCHECK_GE(bytes, 0);
+    if (bytes < 0) {
+      Release(-bytes);
+      return;
+    }
+    if (bytes == 0) {
+      return;
     }
 
-    /// Include counters from a ReservationTracker in logs and other diagnostics.
-    /// The counters should be owned by the fragment's RuntimeProfile.
-    void enable_reservation_reporting(const ReservationTrackerCounters& counters);
-
-    /// Construct a MemTracker object for query 'id'. The query limits are determined based
-    /// on 'query_options'. The MemTracker is a child of the request pool MemTracker for
-    /// 'pool_name', which is created if needed. The returned MemTracker is owned by
-    /// 'obj_pool'.
-    static MemTracker* CreateQueryMemTracker(const TUniqueId& id,
-            const TQueryOptions& query_options, const std::string& pool_name,
-            ObjectPool* obj_pool);
-
-    // Returns a MemTracker object for query 'id'.  Calling this with the same id will
-    // return the same MemTracker object.  An example of how this is used is to pass it
-    // the same query id for all fragments of that query running on this machine.  This
-    // way, we have per-query limits rather than per-fragment.
-    // The first time this is called for an id, a new MemTracker object is created with
-    // 'parent' as the parent tracker.
-    // byte_limit and parent must be the same for all GetMemTracker() calls with the
-    // same id.
-    static std::shared_ptr get_query_mem_tracker(const TUniqueId& id,
-            int64_t byte_limit, MemTracker* parent);
-
-    void consume(int64_t bytes) {
-        if (bytes <= 0) {
-            if (bytes < 0) release(-bytes);
-            return;
-        }
-
-        if (_consumption_metric != NULL) {
-            RefreshConsumptionFromMetric();
-            return;
-        }
-        for (std::vector::iterator tracker = _all_trackers.begin();
-             tracker != _all_trackers.end(); ++tracker) {
-            (*tracker)->_consumption->add(bytes);
-            if ((*tracker)->_consumption_metric == NULL) {
-                DCHECK_GE((*tracker)->_consumption->current_value(), 0);
+    if (consumption_metric_ != nullptr) {
+      RefreshConsumptionFromMetric();
+      return;  // TODO(yingchun): why return not update tracker?
+    }
+    for (auto& tracker : all_trackers_) {
+      tracker->consumption_->add(bytes);
+      if (tracker->consumption_metric_ == nullptr) {
+        DCHECK_GE(tracker->consumption_->current_value(), 0);
+      }
+    }
+  }
+
+  /// Increases the consumption of this tracker and the ancestors up to (but
+  /// not including) end_tracker. This is useful if we want to move tracking between
+  /// trackers that share a common (i.e. end_tracker) ancestor. This happens when we want
+  /// to update tracking on a particular mem tracker but the consumption against
+  /// the limit recorded in one of its ancestors already happened.
+  void ConsumeLocal(int64_t bytes, MemTracker* end_tracker) {
+    DCHECK_GE(bytes, 0);
+    if (UNLIKELY(bytes < 0)) return; // needed in RELEASE, hits DCHECK in DEBUG
+    ChangeConsumption(bytes, end_tracker);
+  }
+
+  /// Same as above, but it decreases the consumption.
+  void ReleaseLocal(int64_t bytes, MemTracker* end_tracker) {
+    DCHECK_GE(bytes, 0);
+    if (UNLIKELY(bytes < 0)) return; // needed in RELEASE, hits DCHECK in DEBUG
+    ChangeConsumption(-bytes, end_tracker);
+  }
+
+  /// Increases consumption of this tracker and its ancestors by 'bytes' only if
+  /// they can all consume 'bytes' without exceeding limit (hard or soft) specified
+  /// by 'mode'. If any limit would be exceed, no MemTrackers are updated. If the
+  /// caller can tolerate an allocation failing, it should set mode=SOFT so that
+  /// other callers that may not tolerate allocation failures have a better chance
+  /// of success. Returns true if the consumption was successfully updated.
+  WARN_UNUSED_RESULT
+  bool TryConsume(int64_t bytes, MemLimit mode = MemLimit::HARD) {
+    // DCHECK_GE(bytes, 0);
+    if (bytes <= 0) {
+        Release(-bytes);
+        return true;
+    }
+    // if (UNLIKELY(bytes == 0)) return true;
+    // if (UNLIKELY(bytes < 0)) return false; // needed in RELEASE, hits DCHECK in DEBUG
+    if (consumption_metric_ != nullptr) RefreshConsumptionFromMetric();
+    int i;
+    // Walk the tracker tree top-down.
+    for (i = all_trackers_.size() - 1; i >= 0; --i) {
+      MemTracker* tracker = all_trackers_[i];
+      const int64_t limit = tracker->GetLimit(mode);
+      if (limit < 0) {
+        tracker->consumption_->add(bytes); // No limit at this tracker.
+      } else {
+        // If TryConsume fails, we can try to GC, but we may need to try several times if
+        // there are concurrent consumers because we don't take a lock before trying to
+        // update consumption_.
+        while (true) {
+          if (LIKELY(tracker->consumption_->try_add(bytes, limit))) break;
+
+          VLOG_RPC << "TryConsume failed, bytes=" << bytes
+                   << " consumption=" << tracker->consumption_->current_value()
+                   << " limit=" << limit << " attempting to GC";
+          if (UNLIKELY(tracker->GcMemory(limit - bytes))) {
+            DCHECK_GE(i, 0);
+            // Failed for this mem tracker. Roll back the ones that succeeded.
+            for (int j = all_trackers_.size() - 1; j > i; --j) {
+              all_trackers_[j]->consumption_->add(-bytes);
             }
+            return false;
+          }
+          VLOG_RPC << "GC succeeded, TryConsume bytes=" << bytes
+                   << " consumption=" << tracker->consumption_->current_value()
+                   << " limit=" << limit;
         }
+      }
     }
-
-    /// Increases/Decreases the consumption of this tracker and the ancestors up to (but
-    /// not including) end_tracker. This is useful if we want to move tracking between
-    /// trackers that share a common (i.e. end_tracker) ancestor. This happens when we want
-    /// to update tracking on a particular mem tracker but the consumption against
-    /// the limit recorded in one of its ancestors already happened.
-    void consume_local(int64_t bytes, MemTracker* end_tracker) {
-        DCHECK(_consumption_metric == NULL) << "Should not be called on root.";
-        for (int i = 0; i < _all_trackers.size(); ++i) {
-            if (_all_trackers[i] == end_tracker) return;
-            DCHECK(!_all_trackers[i]->has_limit());
-            _all_trackers[i]->_consumption->add(bytes);
-        }
-        DCHECK(false) << "end_tracker is not an ancestor";
+    // Everyone succeeded, return.
+    DCHECK_EQ(i, -1);
+    return true;
+  }
+
+  /// Decreases consumption of this tracker and its ancestors by 'bytes'.
+  void Release(int64_t bytes) {
+    // DCHECK_GE(bytes, 0);
+    if (bytes < 0) {
+      Consume(-bytes);
+      return;
     }
 
-    void release_local(int64_t bytes, MemTracker* end_tracker) {
-        consume_local(-bytes, end_tracker);
+    if (bytes == 0) {
+      return;
     }
 
-    /// Increases consumption of this tracker and its ancestors by 'bytes' only if
-    /// they can all consume 'bytes'. If this brings any of them over, none of them
-    /// are updated.
-    /// Returns true if the try succeeded.
-    WARN_UNUSED_RESULT
-    bool try_consume(int64_t bytes) {
-        if (_consumption_metric != NULL) RefreshConsumptionFromMetric();
-        if (UNLIKELY(bytes <= 0)) return true;
-        int i;
-        // Walk the tracker tree top-down.
-        for (i = _all_trackers.size() - 1; i >= 0; --i) {
-            MemTracker* tracker = _all_trackers[i];
-            const int64_t limit = tracker->limit();
-            if (limit < 0) {
-                tracker->_consumption->add(bytes); // No limit at this tracker.
-            } else {
-                // If TryConsume fails, we can try to GC, but we may need to try several times if
-                // there are concurrent consumers because we don't take a lock before trying to
-                // update _consumption.
-                while (true) {
-                    if (LIKELY(tracker->_consumption->try_add(bytes, limit))) break;
-
-                    VLOG_RPC << "TryConsume failed, bytes=" << bytes
-                        << " consumption=" << tracker->_consumption->current_value()
-                        << " limit=" << limit << " attempting to GC";
-                    if (UNLIKELY(tracker->GcMemory(limit - bytes))) {
-                        DCHECK_GE(i, 0);
-                        // Failed for this mem tracker. Roll back the ones that succeeded.
-                        for (int j = _all_trackers.size() - 1; j > i; --j) {
-                            _all_trackers[j]->_consumption->add(-bytes);
-                        }
-                        return false;
-                    }
-                    VLOG_RPC << "GC succeeded, TryConsume bytes=" << bytes
-                        << " consumption=" << tracker->_consumption->current_value()
-                        << " limit=" << limit;
-                }
-            }
-        }
-        // Everyone succeeded, return.
-        DCHECK_EQ(i, -1);
+    // if (UNLIKELY(bytes <= 0)) return; // < 0 needed in RELEASE, hits DCHECK in DEBUG
+
+    if (consumption_metric_ != nullptr) {
+      RefreshConsumptionFromMetric();
+      return;
+    }
+    for (auto& tracker : all_trackers_) {
+      tracker->consumption_->add(-bytes);
+      /// If a UDF calls FunctionContext::TrackAllocation() but allocates less than the
+      /// reported amount, the subsequent call to FunctionContext::Free() may cause the
+      /// process mem tracker to go negative until it is synced back to the tcmalloc
+      /// metric. Don't blow up in this case. (Note that this doesn't affect non-process
+      /// trackers since we can enforce that the reported memory usage is internally
+      /// consistent.)
+      if (tracker->consumption_metric_ == nullptr) {
+        DCHECK_GE(tracker->consumption_->current_value(), 0)
+            << std::endl
+            << tracker->LogUsage(UNLIMITED_DEPTH);
+      }
+    }
+  }
+
+  /// Transfer 'bytes' of consumption from this tracker to 'dst', updating
+  /// all ancestors up to the first shared ancestor. Must not be used if
+  /// 'dst' has a limit, or an ancestor with a limit, that is not a common
+  /// ancestor with the tracker, because this does not check memory limits.
+  void TransferTo(MemTracker* dst, int64_t bytes);
+
+  /// Returns true if a valid limit of this tracker or one of its ancestors is
+  /// exceeded.
+  bool AnyLimitExceeded(MemLimit mode) {
+    for (const auto& tracker : limit_trackers_) {
+      if (tracker->LimitExceeded(mode)) {
         return true;
+      }
+    }
+    return false;
+  }
+
+  /// If this tracker has a limit, checks the limit and attempts to free up some memory if
+  /// the hard limit is exceeded by calling any added GC functions. Returns true if the
+  /// limit is exceeded after calling the GC functions. Returns false if there is no limit
+  /// or consumption is under the limit.
+  bool LimitExceeded(MemLimit mode) {
+    if (UNLIKELY(CheckLimitExceeded(mode))) return LimitExceededSlow(mode);
+    return false;
+  }
+  
+  // Return limit exceeded tracker or null
+  MemTracker* find_limit_exceeded_tracker() {
+    for (const auto& tracker : limit_trackers_) {
+      if (tracker->limit_exceeded()) {
+        return tracker;
+      }
+    }
+    return nullptr;
+  }
+
+  /// Returns the maximum consumption that can be made without exceeding the limit on
+  /// this tracker or any of its parents. Returns int64_t::max() if there are no
+  /// limits and a negative value if any limit is already exceeded.
+  int64_t SpareCapacity(MemLimit mode) const;
+
+  /// Refresh the memory consumption value from the consumption metric. Only valid to
+  /// call if this tracker has a consumption metric.
+  void RefreshConsumptionFromMetric();
+
+  // TODO(yingchun): following functions are old style which have no MemLimit parameter
+  bool limit_exceeded() const { return limit_ >= 0 && limit_ < consumption(); }
+
+  int64_t limit() const { return limit_; }
+  bool has_limit() const { return limit_ >= 0; }
+
+  int64_t soft_limit() const { return soft_limit_; }
+  int64_t GetLimit(MemLimit mode) const {
+    if (mode == MemLimit::SOFT) return soft_limit();
+    DCHECK_ENUM_EQ(mode, MemLimit::HARD);
+    return limit();
+  }
+  const std::string& label() const { return label_; }
+
+  /// Returns the lowest limit for this tracker and its ancestors. Returns
+  /// -1 if there is no limit.
+  int64_t GetLowestLimit(MemLimit mode) const;
+
+  /// Returns the memory 'reserved' by this resource pool mem tracker, which is the sum
+  /// of the memory reserved by the queries in it (i.e. its child trackers). The mem
+  /// reserved for a query that is currently executing is its limit_, if set (which
+  /// should be the common case with admission control). Otherwise, if the query has
+  /// no limit or the query is finished executing, the current consumption is used.
+  int64_t GetPoolMemReserved();
+
+  /// Returns the memory consumed in bytes.
+  int64_t consumption() const { return consumption_->current_value(); }
+
+  /// Note that if consumption_ is based on consumption_metric_, this will the max value
+  /// we've recorded in consumption(), not necessarily the highest value
+  /// consumption_metric_ has ever reached.
+  int64_t peak_consumption() const { return consumption_->value(); }
+
+  std::shared_ptr parent() const { return parent_; }
+
+  /// Signature for function that can be called to free some memory after limit is
+  /// reached. The function should try to free at least 'bytes_to_free' bytes of
+  /// memory. See the class header for further details on the expected behaviour of
+  /// these functions.
+  typedef std::function GcFunction;
+
+  /// Add a function 'f' to be called if the limit is reached, if none of the other
+  /// previously-added GC functions were successful at freeing up enough memory.
+  /// 'f' does not need to be thread-safe as long as it is added to only one MemTracker.
+  /// Note that 'f' must be valid for the lifetime of this MemTracker.
+  void AddGcFunction(GcFunction f);
+
+  /// Register this MemTracker's metrics. Each key will be of the form
+  /// ".".
+  // TODO(yingchun): remove comments
+  //void RegisterMetrics(MetricGroup* metrics, const std::string& prefix);
+
+  /// Logs the usage of this tracker and optionally its children (recursively).
+  /// If 'logged_consumption' is non-NULL, sets the consumption value logged.
+  /// 'max_recursive_depth' specifies the maximum number of levels of children
+  /// to include in the dump. If it is zero, then no children are dumped.
+  /// Limiting the recursive depth reduces the cost of dumping, particularly
+  /// for the process MemTracker.
+  /// TODO: once all memory is accounted in ReservationTracker hierarchy, move
+  /// reporting there.
+  std::string LogUsage(int max_recursive_depth,
+      const std::string& prefix = "", int64_t* logged_consumption = nullptr);
+  /// Dumping the process MemTracker is expensive. Limiting the recursive depth
+  /// to two levels limits the level of detail to a one-line summary for each query
+  /// MemTracker, avoiding all MemTrackers below that level. This provides a summary
+  /// of process usage with substantially lower cost than the full dump.
+  static const int PROCESS_MEMTRACKER_LIMITED_DEPTH = 2;
+  /// Unlimited dumping is useful for query memtrackers or error conditions that
+  /// are not performance sensitive
+  static const int UNLIMITED_DEPTH = INT_MAX;
+
+  /// Logs the usage of 'limit' number of queries based on maximum total memory
+  /// consumption.
+  std::string LogTopNQueries(int limit);
+
+  /// Log the memory usage when memory limit is exceeded and return a status object with
+  /// details of the allocation which caused the limit to be exceeded.
+  /// If 'failed_allocation_size' is greater than zero, logs the allocation size. If
+  /// 'failed_allocation_size' is zero, nothing about the allocation size is logged.
+  /// If 'state' is non-NULL, logs the error to 'state'.
+  Status MemLimitExceeded(RuntimeState* state, const std::string& details,
+      int64_t failed_allocation = 0) WARN_UNUSED_RESULT {
+    return MemLimitExceeded(this, state, details, failed_allocation);
+  }
+
+  /// Makes MemLimitExceeded callable for nullptr MemTrackers.
+  static Status MemLimitExceeded(MemTracker* mtracker, RuntimeState* state,
+      const std::string& details, int64_t failed_allocation = 0) WARN_UNUSED_RESULT;
+
+  void set_query_exec_finished() {
+    DCHECK(is_query_mem_tracker_);
+    query_exec_finished_.store(1);
+  }
+
+  static void update_limits(int64_t bytes, const std::vector>& trackers) {
+    for (auto& tracker : trackers) {
+      tracker->Consume(bytes);
+    }
+  }
+
+  static bool limit_exceeded(const std::vector>& trackers) {
+    for (const auto& tracker : trackers) {
+      if (tracker->limit_exceeded()) {
+        // TODO: remove logging
+        LOG(WARNING) << "exceeded limit: limit=" << tracker->limit() << " consumption="
+                     << tracker->consumption();
+        return true;
+      }
     }
 
-    /// Decreases consumption of this tracker and its ancestors by 'bytes'.
-    void release(int64_t bytes) {
-        if (bytes <= 0) {
-            if (bytes < 0) consume(-bytes);
-            return;
-        }
+    return false;
+  }
 
-        if (_consumption_metric != NULL) {
-            RefreshConsumptionFromMetric();
-            return;
-        }
-        for (std::vector::iterator tracker = _all_trackers.begin();
-             tracker != _all_trackers.end(); ++tracker) {
-            (*tracker)->_consumption->add(-bytes);
-            /// If a UDF calls FunctionContext::TrackAllocation() but allocates less than the
-            /// reported amount, the subsequent call to FunctionContext::Free() may cause the
-            /// process mem tracker to go negative until it is synced back to the tcmalloc
-            /// metric. Don't blow up in this case. (Note that this doesn't affect non-process
-            /// trackers since we can enforce that the reported memory usage is internally
-            /// consistent.)
-            if ((*tracker)->_consumption_metric == NULL) {
-                DCHECK_GE((*tracker)->_consumption->current_value(), 0)
-                        << std::endl
-                        << (*tracker)->LogUsage(UNLIMITED_DEPTH);
-            }
-        }
+  std::string debug_string() {
+    std::stringstream msg;
+    msg << "limit: " << limit_ << "; "
+        << "consumption: " << consumption_->current_value() << "; "
+        << "label: " << label_ << "; "
+        << "all tracker size: " << all_trackers_.size() << "; "
+        << "limit trackers size: " << limit_trackers_.size() << "; "
+        << "parent is null: " << ((parent_ == nullptr) ? "true" : "false") << "; ";
+    return msg.str();
+  }
 
-        /// TODO: Release brokered memory?
-    }
+  bool is_consumption_metric_null() { return consumption_metric_ == nullptr; }
+  
+  static const std::string COUNTER_NAME;
 
-    // Returns true if a valid limit of this tracker or one of its ancestors is exceeded.
-    bool any_limit_exceeded() {
-        for (std::vector::iterator tracker = _limit_trackers.begin();
-                tracker != _limit_trackers.end(); ++tracker) {
-            if ((*tracker)->limit_exceeded()) {
-                return true;
-            }
-        }
-        return false;
+ private:
+  friend class PoolMemTrackerRegistry;
+
+  // TODO(HW): remove later
+  /// Closes this MemTracker. After closing it is invalid to consume memory on this
+  /// tracker and the tracker's consumption counter (which may be owned by a
+  /// RuntimeProfile, not this MemTracker) can be safely destroyed. MemTrackers without
+  /// consumption metrics in the context of a daemon must always be closed.
+  /// Idempotent: calling multiple times has no effect.
+  void Close();
+
+  /// Returns true if the current memory tracker's limit is exceeded.
+  bool CheckLimitExceeded(MemLimit mode) const {
+    int64_t limit = GetLimit(mode);
+    return limit >= 0 && limit < consumption();
+  }
+
+  /// Slow path for LimitExceeded().
+  bool LimitExceededSlow(MemLimit mode);
+
+  /// If consumption is higher than max_consumption, attempts to free memory by calling
+  /// any added GC functions.  Returns true if max_consumption is still exceeded. Takes
+  /// gc_lock. Updates metrics if initialized.
+  bool GcMemory(int64_t max_consumption);
+
+  /// Walks the MemTracker hierarchy and populates all_trackers_ and
+  /// limit_trackers_
+  void Init();
+
+  /// Adds tracker to child_trackers_
+  void AddChildTracker(const std::shared_ptr& tracker);
+
+  /// Log consumption of all the trackers provided. Returns the sum of consumption in
+  /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
+  /// of children to include in the dump. If it is zero, then no children are dumped.
+  static std::string LogUsage(int max_recursive_depth, const std::string& prefix,
+      const std::list>& trackers, int64_t* logged_consumption);
+
+  /// Helper function for LogTopNQueries that iterates through the MemTracker hierarchy
+  /// and populates 'min_pq' with 'limit' number of elements (that contain state related
+  /// to query MemTrackers) based on maximum total memory consumption.
+  void GetTopNQueries(
+      std::priority_queue,
+          std::vector>, std::greater>>& min_pq,
+      int limit);
+
+  /// If an ancestor of this tracker is a query MemTracker, return that tracker.
+  /// Otherwise return NULL.
+  MemTracker* GetQueryMemTracker();
+
+  /// Increases/Decreases the consumption of this tracker and the ancestors up to (but
+  /// not including) end_tracker.
+  void ChangeConsumption(int64_t bytes, MemTracker* end_tracker) {
+    DCHECK(consumption_metric_ == nullptr) << "Should not be called on root.";
+    for (MemTracker* tracker : all_trackers_) {
+      if (tracker == end_tracker) return;
+      DCHECK(!tracker->has_limit());
+      tracker->consumption_->add(bytes);
     }
+    DCHECK(false) << "end_tracker is not an ancestor";
+  }
 
-    // Return limit exceeded tracker or null
-    MemTracker* find_limit_exceeded_tracker() {
-        for (std::vector::iterator tracker = _limit_trackers.begin();
-                tracker != _limit_trackers.end(); ++tracker) {
-            if ((*tracker)->limit_exceeded()) {
-                return *tracker;
-            }
-        }
-        return NULL;
-    }
+  // Creates the root tracker.
+  static void CreateRootTracker();
 
-    // Returns the maximum consumption that can be made without exceeding the limit on
-    // this tracker or any of its parents. Returns int64_t::max() if there are no
-    // limits and a negative value if any limit is already exceeded.
-    int64_t spare_capacity() const {
-        int64_t result = std::numeric_limits::max();
-        for (std::vector::const_iterator tracker = _limit_trackers.begin();
-                tracker != _limit_trackers.end(); ++tracker) {
-            int64_t mem_left = (*tracker)->limit() - (*tracker)->consumption();
-            result = std::min(result, mem_left);
-        }
-        return result;
-    }
+  /// Lock to protect GcMemory(). This prevents many GCs from occurring at once.
+  std::mutex gc_lock_;
 
-    /// Refresh the memory consumption value from the consumption metric. Only valid to
-    /// call if this tracker has a consumption metric.
-    void RefreshConsumptionFromMetric() {
-        DCHECK(_consumption_metric != nullptr);
-        DCHECK(_parent == nullptr);
-        _consumption->set(_consumption_metric->value());
-    }
+  /// True if this is a Query MemTracker returned from CreateQueryMemTracker().
+  bool is_query_mem_tracker_ = false;
 
-    bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); }
+  /// Only used if 'is_query_mem_tracker_' is true.
+  /// 0 if the query is still executing or 1 if it has finished executing. Before
+  /// it has finished executing, the tracker limit is treated as "reserved memory"
+  /// for the purpose of admission control - see GetPoolMemReserved().
+  std::atomic query_exec_finished_{0};
 
-    int64_t limit() const { return _limit; }
+  /// Only valid for MemTrackers returned from CreateQueryMemTracker()
+  TUniqueId query_id_;
 
-    bool has_limit() const { return _limit >= 0; }
+  /// Only valid for MemTrackers returned from GetRequestPoolMemTracker()
+  std::string pool_name_;
 
-    const std::string& label() const { return _label; }
+  /// Hard limit on memory consumption, in bytes. May not be exceeded. If limit_ == -1,
+  /// there is no consumption limit.
+  const int64_t limit_;
 
-    /// Returns the lowest limit for this tracker and its ancestors. Returns
-    /// -1 if there is no limit.
-    int64_t lowest_limit() const {
-        if (_limit_trackers.empty()) return -1;
-        int64_t v = std::numeric_limits::max();
-        for (int i = 0; i < _limit_trackers.size(); ++i) {
-            DCHECK(_limit_trackers[i]->has_limit());
-            v = std::min(v, _limit_trackers[i]->limit());
-        }
-        return v;
-    }
+  /// Soft limit on memory consumption, in bytes. Can be exceeded but callers to
+  /// TryConsume() can opt not to exceed this limit. If -1, there is no consumption limit.
+  const int64_t soft_limit_;
 
-    /// Returns the memory 'reserved' by this resource pool mem tracker, which is the sum
-    /// of the memory reserved by the queries in it (i.e. its child trackers). The mem
-    /// reserved for a query is its limit_, if set (which should be the common case with
-    /// admission control). Otherwise the current consumption is used.
-    int64_t GetPoolMemReserved() const;
-
-    int64_t consumption() const { return _consumption->current_value(); }
-
-    /// Note that if _consumption is based on _consumption_metric, this will the max value
-    /// we've recorded in consumption(), not necessarily the highest value
-    /// _consumption_metric has ever reached.
-    int64_t peak_consumption() const { return _consumption->value(); }
-
-    MemTracker* parent() const { return _parent; }
-
-    /// Signature for function that can be called to free some memory after limit is
-    /// reached. The function should try to free at least 'bytes_to_free' bytes of
-    /// memory. See the class header for further details on the expected behaviour of
-    /// these functions.
-    typedef std::function GcFunction;
-
-    /// Add a function 'f' to be called if the limit is reached, if none of the other
-    /// previously-added GC functions were successful at freeing up enough memory.
-    /// 'f' does not need to be thread-safe as long as it is added to only one MemTracker.
-    /// Note that 'f' must be valid for the lifetime of this MemTracker.
-    void AddGcFunction(GcFunction f);
-
-    /// Register this MemTracker's metrics. Each key will be of the form
-    /// ".".
-    void RegisterMetrics(MetricRegistry* metrics, const std::string& prefix);
-
-    /// Logs the usage of this tracker and optionally its children (recursively).
-    /// If 'logged_consumption' is non-NULL, sets the consumption value logged.
-    /// 'max_recursive_depth' specifies the maximum number of levels of children
-    /// to include in the dump. If it is zero, then no children are dumped.
-    /// Limiting the recursive depth reduces the cost of dumping, particularly
-    /// for the process MemTracker.
-    /// TODO: once all memory is accounted in ReservationTracker hierarchy, move
-    /// reporting there.
-    std::string LogUsage(int max_recursive_depth, const std::string& prefix = "",
-                         int64_t* logged_consumption = nullptr) const;
-    /// Dumping the process MemTracker is expensive. Limiting the recursive depth
-    /// to two levels limits the level of detail to a one-line summary for each query
-    /// MemTracker, avoiding all MemTrackers below that level. This provides a summary
-    /// of process usage with substantially lower cost than the full dump.
-    static const int PROCESS_MEMTRACKER_LIMITED_DEPTH = 2;
-    /// Unlimited dumping is useful for query memtrackers or error conditions that
-    /// are not performance sensitive
-    static const int UNLIMITED_DEPTH = INT_MAX;
-
-    /// Log the memory usage when memory limit is exceeded and return a status object with
-    /// details of the allocation which caused the limit to be exceeded.
-    /// If 'failed_allocation_size' is greater than zero, logs the allocation size. If
-    /// 'failed_allocation_size' is zero, nothing about the allocation size is logged.
-    Status MemLimitExceeded(RuntimeState* state, const std::string& details,
-            int64_t failed_allocation = 0);
-
-    static const std::string COUNTER_NAME;
-
-    static void update_limits(int64_t bytes, std::vector* limits) {
-        for (std::vector::iterator i = limits->begin(); i != limits->end(); ++i) {
-            (*i)->consume(bytes);
-        }
-    }
+  std::string label_;
 
-    static bool limit_exceeded(const std::vector& limits) {
-        for (std::vector::const_iterator i = limits.begin(); i != limits.end(); ++i) {
-            if ((*i)->limit_exceeded()) {
-                // TODO: remove logging
-                LOG(WARNING) << "exceeded limit: limit=" << (*i)->limit() << " consumption="
-                             << (*i)->consumption();
-                return true;
-            }
-        }
+  /// The parent of this tracker. The pointer is never modified, even after this tracker
+  /// is unregistered.
+  std::shared_ptr parent_;
 
-        return false;
-    }
+  /// in bytes; not owned
+  RuntimeProfile::HighWaterMarkCounter* consumption_;
 
-    std::string debug_string() {
-        std::stringstream msg;
-        msg << "limit: " << _limit << "; "
-            << "consumption: " << _consumption->current_value() << "; "
-            << "label: " << _label << "; "
-            << "all tracker size: " << _all_trackers.size() << "; "
-            << "limit trackers size: " << _limit_trackers.size() << "; "
-            << "parent is null: " << ((_parent == NULL) ? "true" : "false") << "; ";
-        return msg.str();
-    }
+  /// holds consumption_ counter if not tied to a profile
+  RuntimeProfile::HighWaterMarkCounter local_counter_;
 
-    bool is_consumption_metric_null() { return _consumption_metric == nullptr; }
+  /// If non-NULL, used to measure consumption (in bytes) rather than the values provided
+  /// to Consume()/Release(). Only used for the process tracker, thus parent_ should be
+  /// NULL if consumption_metric_ is set.
+  IntGauge* consumption_metric_;
 
-private:
-    friend class PoolMemTrackerRegistry;
+  /// If non-NULL, counters from a corresponding ReservationTracker that should be
+  /// reported in logs and other diagnostics. Owned by this MemTracker. The counters
+  /// are owned by the fragment's RuntimeProfile.
+  AtomicPtr reservation_counters_;
 
-    /// If consumption is higher than max_consumption, attempts to free memory by calling
-    /// any added GC functions.  Returns true if max_consumption is still exceeded. Takes
-    /// gc_lock. Updates metrics if initialized.
-    bool GcMemory(int64_t max_consumption);
+  std::vector all_trackers_;  // this tracker plus all of its ancestors
+  std::vector limit_trackers_;  // all_trackers_ with valid limits
 
-    // Walks the MemTracker hierarchy and populates _all_trackers and _limit_trackers
-    void Init();
+  // All the child trackers of this tracker. Used for error reporting and
+  // listing only (i.e. updating the consumption of a parent tracker does not
+  // update that of its children).
+  SpinLock child_trackers_lock_;
+  std::list> child_trackers_;
 
-    // Adds tracker to _child_trackers
-    void add_child_tracker(MemTracker* tracker) {
-        std::lock_guard l(_child_trackers_lock);
-        tracker->_child_tracker_it = _child_trackers.insert(_child_trackers.end(), tracker);
-    }
+  /// Iterator into parent_->child_trackers_ for this object. Stored to have O(1)
+  /// remove.
+  std::list>::iterator child_tracker_it_;
+
+  /// Functions to call after the limit is reached to free memory.
+  std::vector gc_functions_;
+
+  /// If false, this tracker (and its children) will not be included in LogUsage() output
+  /// if consumption is 0.
+  bool log_usage_if_zero_;
+
+  /// The number of times the GcFunctions were called.
+  IntCounter* num_gcs_metric_;
+
+  /// The number of bytes freed by the last round of calling the GcFunctions (-1 before any
+  /// GCs are performed).
+  IntGauge* bytes_freed_by_last_gc_metric_;
+
+  /// The number of bytes over the limit we were the last time LimitExceeded() was called
+  /// and the limit was exceeded pre-GC. -1 if there is no limit or the limit was never
+  /// exceeded.
+  IntGauge* bytes_over_limit_metric_;
+
+  /// Metric for limit_.
+  IntGauge* limit_metric_;
 
-    /// Log consumption of all the trackers provided. Returns the sum of consumption in
-    /// 'logged_consumption'. 'max_recursive_depth' specifies the maximum number of levels
-    /// of children to include in the dump. If it is zero, then no children are dumped.
-    static std::string LogUsage(int max_recursive_depth, const std::string& prefix,
-                                const std::list& trackers,
-                                int64_t* logged_consumption);
-
-    /// Lock to protect GcMemory(). This prevents many GCs from occurring at once.
-    std::mutex _gc_lock;
-
-    // Protects _request_to_mem_trackers and _pool_to_mem_trackers
-    static std::mutex _s_mem_trackers_lock;
-
-    // All per-request MemTracker objects that are in use.  For memory management, this map
-    // contains only weak ptrs.  MemTrackers that are handed out via get_query_mem_tracker()
-    // are shared ptrs.  When all the shared ptrs are no longer referenced, the MemTracker
-    // d'tor will be called at which point the weak ptr will be removed from the map.
-    typedef std::unordered_map> RequestTrackersMap;
-    static RequestTrackersMap _s_request_to_mem_trackers;
-
-    // Only valid for MemTrackers returned from get_query_mem_tracker()
-    /// Only valid for MemTrackers returned from CreateQueryMemTracker()
-    TUniqueId _query_id;
-
-    /// Only valid for MemTrackers returned from GetRequestPoolMemTracker()
-    std::string _pool_name;
-
-    int64_t _limit; // in bytes
-    //int64_t _consumption;  // in bytes
-
-    std::string _label;
-    MemTracker* _parent;
-
-    /// in bytes; not owned
-    RuntimeProfile::HighWaterMarkCounter* _consumption;
-
-    /// holds _consumption counter if not tied to a profile
-    RuntimeProfile::HighWaterMarkCounter _local_counter;
-
-    /// If non-NULL, used to measure consumption (in bytes) rather than the values provided
-    /// to Consume()/Release(). Only used for the process tracker, thus parent_ should be
-    /// NULL if _consumption_metric is set.
-    UIntGauge* _consumption_metric;
-
-    /// If non-NULL, counters from a corresponding ReservationTracker that should be
-    /// reported in logs and other diagnostics. Owned by this MemTracker. The counters
-    /// are owned by the fragment's RuntimeProfile.
-    AtomicPtr _reservation_counters;
-
-    std::vector _all_trackers;   // this tracker plus all of its ancestors
-    std::vector _limit_trackers; // _all_trackers with valid limits
-
-    // All the child trackers of this tracker. Used for error reporting only.
-    // i.e., Updating a parent tracker does not update the children.
-    mutable std::mutex _child_trackers_lock;
-    std::list _child_trackers;
-    // Iterator into _parent->_child_trackers for this object. Stored to have O(1)
-    // remove.
-    std::list::iterator _child_tracker_it;
-
-    /// Functions to call after the limit is reached to free memory.
-    std::vector _gc_functions;
-
-    /// If false, this tracker (and its children) will not be included in LogUsage() output
-    /// if consumption is 0.
-    bool _log_usage_if_zero;
-
-    /// The number of times the GcFunctions were called.
-    IntCounter* _num_gcs_metric;
-
-    /// The number of bytes freed by the last round of calling the GcFunctions (-1 before any
-    /// GCs are performed).
-    IntGauge* _bytes_freed_by_last_gc_metric;
-
-    /// The number of bytes over the limit we were the last time LimitExceeded() was called
-    /// and the limit was exceeded pre-GC. -1 if there is no limit or the limit was never
-    /// exceeded.
-    IntGauge* _bytes_over_limit_metric;
-
-    /// Metric for limit_.
-    IntGauge* _limit_metric;
-
-    // If true, calls unregister_from_parent() in the dtor. This is only used for
-    // the query wide trackers to remove it from the process mem tracker. The
-    // process tracker never gets deleted so it is safe to reference it in the dtor.
-    // The query tracker has lifetime shared by multiple plan fragments so it's hard
-    // to do cleanup another way.
-    bool _auto_unregister = false;
+  // If true, calls unregister_from_parent() in the dtor. This is only used for
+  // the query wide trackers to remove it from the process mem tracker. The
+  // process tracker never gets deleted so it is safe to reference it in the dtor.
+  // The query tracker has lifetime shared by multiple plan fragments so it's hard
+  // to do cleanup another way.
+  bool auto_unregister_ = false;
 };
 
 /// Global registry for query and pool MemTrackers. Owned by ExecEnv.
@@ -507,19 +591,19 @@ class PoolMemTrackerRegistry {
   /// with the process tracker as its parent. There is no explicit per-pool byte_limit
   /// set at any particular impalad, so newly created trackers will always have a limit
   /// of -1.
-    MemTracker* GetRequestPoolMemTracker(const std::string& pool_name, bool create_if_not_present);
+  MemTracker* GetRequestPoolMemTracker(
+      const std::string& pool_name, bool create_if_not_present);
 
  private:
   /// All per-request pool MemTracker objects. It is assumed that request pools will live
   /// for the entire duration of the process lifetime so MemTrackers are never removed
   /// from this map. Protected by '_pool_to_mem_trackers_lock'
   typedef std::unordered_map> PoolTrackersMap;
-  PoolTrackersMap _pool_to_mem_trackers;
+  PoolTrackersMap pool_to_mem_trackers_;
   /// IMPALA-3068: Use SpinLock instead of std::mutex so that the lock won't
   /// automatically destroy itself as part of process teardown, which could cause races.
-  SpinLock _pool_to_mem_trackers_lock;
+  SpinLock pool_to_mem_trackers_lock_;
 };
 
-} // namespace doris
+}
 
-#endif
diff --git a/be/src/runtime/memory_scratch_sink.cpp b/be/src/runtime/memory_scratch_sink.cpp
index 8093850d2c9853..fcc20c49485c98 100644
--- a/be/src/runtime/memory_scratch_sink.cpp
+++ b/be/src/runtime/memory_scratch_sink.cpp
@@ -50,8 +50,7 @@ Status MemoryScratchSink::prepare_exprs(RuntimeState* state) {
     RETURN_IF_ERROR(Expr::create_expr_trees(
             state->obj_pool(), _t_output_expr, &_output_expr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(
-            _output_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
     // generate the arrow schema 
     RETURN_IF_ERROR(convert_to_arrow_schema(_row_desc, &_arrow_schema));
     return Status::OK();
diff --git a/be/src/runtime/mysql_table_sink.cpp b/be/src/runtime/mysql_table_sink.cpp
index ec267e93e7fb5f..035b89031dacc2 100644
--- a/be/src/runtime/mysql_table_sink.cpp
+++ b/be/src/runtime/mysql_table_sink.cpp
@@ -57,7 +57,7 @@ Status MysqlTableSink::init(const TDataSink& t_sink) {
 Status MysqlTableSink::prepare(RuntimeState* state) {
     RETURN_IF_ERROR(DataSink::prepare(state));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker.get()));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
     std::stringstream title;
     title << "MysqlTableSink (frag_id=" << state->fragment_instance_id() << ")";
     // create profile
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index d55dd19e0c1b72..2aeef9a52b8548 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -136,7 +136,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request) {
     // NOTE: this MemTracker only for olap
     _mem_tracker.reset(
             new MemTracker(bytes_limit, "fragment mem-limit", _exec_env->process_mem_tracker()));
-    _runtime_state->set_fragment_mem_tracker(_mem_tracker.get());
+    _runtime_state->set_fragment_mem_tracker(_mem_tracker);
 
     LOG(INFO) << "Using query memory limit: "
         << PrettyPrinter::print(bytes_limit, TUnit::BYTES);
@@ -221,7 +221,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request) {
     _row_batch.reset(new RowBatch(
             _plan->row_desc(),
             _runtime_state->batch_size(),
-            _runtime_state->instance_mem_tracker()));
+            _runtime_state->instance_mem_tracker().get()));
     // _row_batch->tuple_data_pool()->set_limits(*_runtime_state->mem_trackers());
     VLOG(3) << "plan_root=\n" << _plan->debug_string();
     _prepared = true;
@@ -574,7 +574,7 @@ void PlanFragmentExecutor::close() {
      
     // _mem_tracker init failed
     if (_mem_tracker.get() != nullptr) {
-        _mem_tracker->release(_mem_tracker->consumption());
+        _mem_tracker->Release(_mem_tracker->consumption());
     }
     _closed = true;
 }
diff --git a/be/src/runtime/plan_fragment_executor.h b/be/src/runtime/plan_fragment_executor.h
index bd4b0a2e69d364..3d36c70a3a8f6b 100644
--- a/be/src/runtime/plan_fragment_executor.h
+++ b/be/src/runtime/plan_fragment_executor.h
@@ -154,8 +154,7 @@ class PlanFragmentExecutor {
     ExecEnv* _exec_env;  // not owned
     ExecNode* _plan;  // lives in _runtime_state->obj_pool()
     TUniqueId _query_id;
-    // MemTracker* _mem_tracker;
-    boost::scoped_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // profile reporting-related
     report_status_callback _report_status_cb;
diff --git a/be/src/runtime/qsorter.cpp b/be/src/runtime/qsorter.cpp
index eaabebf1bc2fbb..e9c6b405f5cf6e 100644
--- a/be/src/runtime/qsorter.cpp
+++ b/be/src/runtime/qsorter.cpp
@@ -84,7 +84,7 @@ QSorter::QSorter(
             RuntimeState* state) :
         _row_desc(row_desc),
         _order_expr_ctxs(order_expr_ctxs),
-        _tuple_pool(new MemPool(state->instance_mem_tracker())) {
+        _tuple_pool(new MemPool(state->instance_mem_tracker().get())) {
 }
 
 Status QSorter::prepare(RuntimeState* state) {
diff --git a/be/src/runtime/result_sink.cpp b/be/src/runtime/result_sink.cpp
index af85fb8256d1c0..0a39f0672dbda1 100644
--- a/be/src/runtime/result_sink.cpp
+++ b/be/src/runtime/result_sink.cpp
@@ -57,8 +57,7 @@ Status ResultSink::prepare_exprs(RuntimeState* state) {
     RETURN_IF_ERROR(Expr::create_expr_trees(
             state->obj_pool(), _t_output_expr, &_output_expr_ctxs));
     // Prepare the exprs to run.
-    RETURN_IF_ERROR(Expr::prepare(
-            _output_expr_ctxs, state, _row_desc, _expr_mem_tracker.get()));
+    RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _expr_mem_tracker));
     return Status::OK();
 }
 
diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp
index 4cfd95399602d0..2fd4fcf366a565 100644
--- a/be/src/runtime/row_batch.cpp
+++ b/be/src/runtime/row_batch.cpp
@@ -56,7 +56,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_
     DCHECK_GT(_tuple_ptrs_size, 0);
     // TODO: switch to Init() pattern so we can check memory limit and return Status.
     if (config::enable_partitioned_aggregation) {
-        _mem_tracker->consume(_tuple_ptrs_size);
+        _mem_tracker->Consume(_tuple_ptrs_size);
         _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size));
         DCHECK(_tuple_ptrs != NULL);
     } else {
@@ -90,7 +90,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc,
     DCHECK_GT(_tuple_ptrs_size, 0);
     // TODO: switch to Init() pattern so we can check memory limit and return Status.
     if (config::enable_partitioned_aggregation) {
-        _mem_tracker->consume(_tuple_ptrs_size);
+        _mem_tracker->Consume(_tuple_ptrs_size);
         _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size));
         DCHECK(_tuple_ptrs != nullptr);
     } else {
@@ -188,7 +188,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch,
     DCHECK_GT(_tuple_ptrs_size, 0);
     // TODO: switch to Init() pattern so we can check memory limit and return Status.
     if (config::enable_partitioned_aggregation) {
-        _mem_tracker->consume(_tuple_ptrs_size);
+        _mem_tracker->Consume(_tuple_ptrs_size);
         _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size));
         DCHECK(_tuple_ptrs != NULL);
     } else {
@@ -291,7 +291,7 @@ void RowBatch::clear() {
     if (config::enable_partitioned_aggregation) {
         DCHECK(_tuple_ptrs != NULL);
         free(_tuple_ptrs);
-        _mem_tracker->release(_tuple_ptrs_size);
+        _mem_tracker->Release(_tuple_ptrs_size);
         _tuple_ptrs = NULL;
     }
     _cleared = true;
@@ -438,7 +438,7 @@ void RowBatch::add_io_buffer(DiskIoMgr::BufferDescriptor* buffer) {
     DCHECK(buffer != NULL);
     _io_buffers.push_back(buffer);
     _auxiliary_mem_usage += buffer->buffer_len();
-    buffer->set_mem_tracker(_mem_tracker);
+    buffer->set_mem_tracker(std::shared_ptr(_mem_tracker));  // TODO(yingchun): fixme
 }
 
 Status RowBatch::resize_and_allocate_tuple_buffer(RuntimeState* state,
@@ -522,7 +522,7 @@ void RowBatch::transfer_resource_ownership(RowBatch* dest) {
         DiskIoMgr::BufferDescriptor* buffer = _io_buffers[i];
         dest->_io_buffers.push_back(buffer);
         dest->_auxiliary_mem_usage += buffer->buffer_len();
-        buffer->set_mem_tracker(dest->_mem_tracker);
+        buffer->set_mem_tracker(std::shared_ptr(dest->_mem_tracker));   // TODO(yingchun): fixme
     }
     _io_buffers.clear();
 
@@ -585,7 +585,7 @@ void RowBatch::acquire_state(RowBatch* src) {
         DiskIoMgr::BufferDescriptor* buffer = src->_io_buffers[i];
         _io_buffers.push_back(buffer);
         _auxiliary_mem_usage += buffer->buffer_len();
-        buffer->set_mem_tracker(_mem_tracker);
+        buffer->set_mem_tracker(std::shared_ptr(_mem_tracker));  // TODO(yingchun): fixme
     }
     src->_io_buffers.clear();
     src->_auxiliary_mem_usage = 0;
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 372c80894f0bff..2192ed3ddad7f3 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -155,23 +155,14 @@ RuntimeState::~RuntimeState() {
     }
 
 #ifndef BE_TEST
+    // TODO: cleanup this comment
     // _query_mem_tracker must be valid as long as _instance_mem_tracker is so
     // delete _instance_mem_tracker first.
     // LogUsage() walks the MemTracker tree top-down when the memory limit is exceeded.
     // Break the link between the instance_mem_tracker and its parent (_query_mem_tracker)
     // before the _instance_mem_tracker and its children are destroyed.
-    if (_instance_mem_tracker.get() != NULL) {
-        // May be NULL if InitMemTrackers() is not called, for example from tests.
-        _instance_mem_tracker->unregister_from_parent();
-        _instance_mem_tracker->close();
-    }
 
     _instance_mem_tracker.reset();
-   
-    if (_query_mem_tracker.get() != NULL) {
-        _query_mem_tracker->unregister_from_parent();
-        _query_mem_tracker->close();
-    }
     _query_mem_tracker.reset();
 #endif
 }
@@ -236,9 +227,9 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
     mem_tracker_counter->set(bytes_limit);
 
     _query_mem_tracker.reset(
-            new MemTracker(bytes_limit, runtime_profile()->name(), _exec_env->process_mem_tracker()));
+            new MemTracker(bytes_limit, std::string("RuntimeState: query ") + runtime_profile()->name(), _exec_env->process_mem_tracker()));
     _instance_mem_tracker.reset(
-            new MemTracker(&_profile, -1, runtime_profile()->name(), _query_mem_tracker.get()));
+            new MemTracker(&_profile, -1, std::string("RuntimeState: instance ") + runtime_profile()->name(), _query_mem_tracker));
 
     /*
     // TODO: this is a stopgap until we implement ExprContext
@@ -250,9 +241,9 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
 
     RETURN_IF_ERROR(init_buffer_poolstate());
 
-    _initial_reservations = _obj_pool->add(new InitialReservations(_obj_pool.get(),
-                      _buffer_reservation, _query_mem_tracker.get(), 
-                      _query_options.initial_reservation_total_claims));
+    _initial_reservations = _obj_pool->add(
+            new InitialReservations(_obj_pool.get(), _buffer_reservation, _query_mem_tracker,
+                                    _query_options.initial_reservation_total_claims));
     RETURN_IF_ERROR(
         _initial_reservations->Init(_query_id, min_reservation()));
     DCHECK_EQ(0, _initial_reservation_refcnt.load());
@@ -273,7 +264,7 @@ Status RuntimeState::init_instance_mem_tracker() {
 
 Status RuntimeState::init_buffer_poolstate() {
   ExecEnv* exec_env = ExecEnv::GetInstance();
-  int64_t mem_limit = _query_mem_tracker->lowest_limit();
+  int64_t mem_limit = _query_mem_tracker->GetLowestLimit(MemLimit::HARD);
   int64_t max_reservation;
   if (query_options().__isset.buffer_pool_limit
       && query_options().buffer_pool_limit > 0) {
@@ -303,7 +294,7 @@ Status RuntimeState::create_block_mgr() {
     if (block_mgr_limit < 0) {
         block_mgr_limit = std::numeric_limits::max();
     }
-    RETURN_IF_ERROR(BufferedBlockMgr2::create(this, _query_mem_tracker.get(),
+    RETURN_IF_ERROR(BufferedBlockMgr2::create(this, _query_mem_tracker,
             runtime_profile(), _exec_env->tmp_file_mgr(),
             block_mgr_limit, _exec_env->disk_io_mgr()->max_read_buffer_size(), &_block_mgr2));
     return Status::OK();
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index dcd97e2b3eec3e..e0f7e89a5dce2d 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -162,17 +162,15 @@ class RuntimeState {
     ExecEnv* exec_env() {
         return _exec_env;
     }
-    std::vector* mem_trackers() {
-        return &_mem_trackers;
+    const std::vector>& mem_trackers() {
+        return _mem_trackers;
     }
-    MemTracker* fragment_mem_tracker() {
+   std::shared_ptr fragment_mem_tracker() {
         return _fragment_mem_tracker;
     }
-    MemTracker* instance_mem_tracker() { {
-        return _instance_mem_tracker.get(); }
-    }
-    MemTracker* query_mem_tracker() { {
-        return _query_mem_tracker.get(); }
+
+    std::shared_ptr instance_mem_tracker() {
+        return _instance_mem_tracker;
     }
     ThreadResourceMgr::ResourcePool* resource_pool() {
         return _resource_pool;
@@ -226,10 +224,10 @@ class RuntimeState {
         int buffer_size, RuntimeProfile* profile);
 
     // Sets the fragment memory limit and adds it to _mem_trackers
-    void set_fragment_mem_tracker(MemTracker* limit) {
-        DCHECK(_fragment_mem_tracker == NULL);
-        _fragment_mem_tracker = limit;
-        _mem_trackers.push_back(limit);
+    void set_fragment_mem_tracker(std::shared_ptr tracker) {
+        DCHECK(_fragment_mem_tracker == nullptr);
+        _fragment_mem_tracker = tracker;
+        _mem_trackers.push_back(tracker);
     }
 
     // Appends error to the _error_log if there is space
@@ -552,17 +550,17 @@ class RuntimeState {
     ThreadResourceMgr::ResourcePool* _resource_pool;
 
     // all mem limits that apply to this query
-    std::vector _mem_trackers;
+    std::vector> _mem_trackers;
 
     // Fragment memory limit.  Also contained in _mem_trackers
-    MemTracker* _fragment_mem_tracker;
+    std::shared_ptr _fragment_mem_tracker;
 
     // MemTracker that is shared by all fragment instances running on this host.
     // The query mem tracker must be released after the _instance_mem_tracker.
-    boost::shared_ptr _query_mem_tracker;
+    std::shared_ptr _query_mem_tracker;
 
     // Memory usage of this fragment instance
-    boost::scoped_ptr _instance_mem_tracker;
+    std::shared_ptr _instance_mem_tracker;
 
     // if true, execution should stop with a CANCELLED status
     bool _is_cancelled;
diff --git a/be/src/runtime/spill_sorter.cc b/be/src/runtime/spill_sorter.cc
index b3317313761fdc..80d1b30a980796 100644
--- a/be/src/runtime/spill_sorter.cc
+++ b/be/src/runtime/spill_sorter.cc
@@ -646,7 +646,7 @@ Status SpillSorter::Run::prepare_read() {
     //         _sorter->_state->batch_size(), _sorter->_mem_tracker));
     _buffered_batch.reset(
             new RowBatch(
-                *_sorter->_output_row_desc, _sorter->_state->batch_size(), _sorter->_mem_tracker));
+                *_sorter->_output_row_desc, _sorter->_state->batch_size(), _sorter->_mem_tracker.get()));
 
     // If the run is pinned, merge is not invoked, so _buffered_batch is not needed
     // and the individual blocks do not need to be pinned.
@@ -1031,7 +1031,7 @@ inline void SpillSorter::TupleSorter::swap(uint8_t* left, uint8_t* right) {
 // SpillSorter methods
 SpillSorter::SpillSorter(const TupleRowComparator& compare_less_than,
         const vector& slot_materialize_expr_ctxs,
-        RowDescriptor* output_row_desc, MemTracker* mem_tracker,
+        RowDescriptor* output_row_desc, const std::shared_ptr& mem_tracker,
         RuntimeProfile* profile, RuntimeState* state) :
     _state(state),
     _compare_less_than(compare_less_than),
@@ -1258,7 +1258,7 @@ Status SpillSorter::merge_intermediate_runs() {
         int num_runs_to_merge = std::min(max_runs_per_intermediate_merge,
                 _sorted_runs.size() - max_runs_per_intermediate_merge);
         RETURN_IF_ERROR(create_merger(num_runs_to_merge));
-        RowBatch intermediate_merge_batch(*_output_row_desc, _state->batch_size(), _mem_tracker);
+        RowBatch intermediate_merge_batch(*_output_row_desc, _state->batch_size(), _mem_tracker.get());
         // merged_run is the new sorted run that is produced by the intermediate merge.
         Run* merged_run = _obj_pool.add(
                 new Run(this, _output_row_desc->tuple_descriptors()[0], false));
diff --git a/be/src/runtime/spill_sorter.h b/be/src/runtime/spill_sorter.h
index d8ddab65c014a9..5bafee93efc6ca 100644
--- a/be/src/runtime/spill_sorter.h
+++ b/be/src/runtime/spill_sorter.h
@@ -94,7 +94,7 @@ class SpillSorter {
     // and retrieve rows from an intermediate merger.
     SpillSorter(const TupleRowComparator& compare_less_than,
             const std::vector& sort_tuple_slot_expr_ctxs,
-            RowDescriptor* output_row_desc, MemTracker* mem_tracker,
+            RowDescriptor* output_row_desc, const std::shared_ptr& mem_tracker,
             RuntimeProfile* profile, RuntimeState* state);
 
     ~SpillSorter();
@@ -174,7 +174,7 @@ class SpillSorter {
     std::vector _sort_tuple_slot_expr_ctxs;
 
     // Mem tracker for batches created during merge. Not owned by SpillSorter.
-    MemTracker* _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // Descriptor for the sort tuple. Input rows are materialized into 1 tuple before
     // sorting. Not owned by the SpillSorter.
diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp
index 2a125af94f7dab..e0b15dcf2162b8 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -29,7 +29,7 @@ namespace doris {
 
 std::atomic TabletsChannel::_s_tablet_writer_count;
 
-TabletsChannel::TabletsChannel(const TabletsChannelKey& key, MemTracker* mem_tracker):
+TabletsChannel::TabletsChannel(const TabletsChannelKey& key, const std::shared_ptr& mem_tracker):
         _key(key), _state(kInitialized), _closed_senders(64) {
     _mem_tracker.reset(new MemTracker(-1, "tablets channel", mem_tracker));
     static std::once_flag once_flag;
@@ -235,7 +235,7 @@ Status TabletsChannel::_open_all_writers(const PTabletWriterOpenRequest& params)
         request.slots = index_slots;
 
         DeltaWriter* writer = nullptr;
-        auto st = DeltaWriter::open(&request, _mem_tracker.get(),  &writer);
+        auto st = DeltaWriter::open(&request, _mem_tracker,  &writer);
         if (st != OLAP_SUCCESS) {
             std::stringstream ss;
             ss << "open delta writer failed, tablet_id=" << tablet.tablet_id()
diff --git a/be/src/runtime/tablets_channel.h b/be/src/runtime/tablets_channel.h
index c9953c3cd3463a..6e7851152b508b 100644
--- a/be/src/runtime/tablets_channel.h
+++ b/be/src/runtime/tablets_channel.h
@@ -56,7 +56,7 @@ class OlapTableSchemaParam;
 // Write channel for a particular (load, index).
 class TabletsChannel {
 public:
-    TabletsChannel(const TabletsChannelKey& key, MemTracker* mem_tracker);
+    TabletsChannel(const TabletsChannelKey& key, const std::shared_ptr& mem_tracker);
 
     ~TabletsChannel();
 
@@ -123,7 +123,7 @@ class TabletsChannel {
 
     std::unordered_set _partition_ids;
 
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     static std::atomic _s_tablet_writer_count;
 };
diff --git a/be/src/runtime/test_env.cc b/be/src/runtime/test_env.cc
index 136929809ee04b..1289aaad5f9058 100644
--- a/be/src/runtime/test_env.cc
+++ b/be/src/runtime/test_env.cc
@@ -33,7 +33,7 @@ TestEnv::TestEnv() {
     // _exec_env->init_for_tests();
     _io_mgr_tracker.reset(new MemTracker(-1));
     _block_mgr_parent_tracker.reset(new MemTracker(-1));
-    _exec_env->disk_io_mgr()->init(_io_mgr_tracker.get());
+    _exec_env->disk_io_mgr()->init(_io_mgr_tracker);
     init_metrics();
     _tmp_file_mgr.reset(new TmpFileMgr());
     _tmp_file_mgr->init(_metrics.get());
@@ -77,7 +77,7 @@ Status TestEnv::create_query_state(int64_t query_id, int max_buffers, int block_
 
     shared_ptr mgr;
     RETURN_IF_ERROR(BufferedBlockMgr2::create(
-                *runtime_state, _block_mgr_parent_tracker.get(),
+                *runtime_state, _block_mgr_parent_tracker,
                 (*runtime_state)->runtime_profile(), _tmp_file_mgr.get(),
                 calculate_mem_tracker(max_buffers, block_size), block_size, &mgr));
     (*runtime_state)->set_block_mgr2(mgr);
diff --git a/be/src/runtime/test_env.h b/be/src/runtime/test_env.h
index 1a2c49bf470a57..1aa937eb2e4497 100644
--- a/be/src/runtime/test_env.h
+++ b/be/src/runtime/test_env.h
@@ -56,8 +56,8 @@ class TestEnv {
     ExecEnv* exec_env() {
         return _exec_env.get();
     }
-    MemTracker* block_mgr_parent_tracker() {
-        return _block_mgr_parent_tracker.get();
+    std::shared_ptr block_mgr_parent_tracker() {
+        return _block_mgr_parent_tracker;
     }
     MemTracker* io_mgr_tracker() {
         return _io_mgr_tracker.get();
@@ -80,8 +80,8 @@ class TestEnv {
     // Global state for test environment.
     static boost::scoped_ptr _s_static_metrics;
     boost::scoped_ptr _exec_env;
-    boost::scoped_ptr _block_mgr_parent_tracker;
-    boost::scoped_ptr _io_mgr_tracker;
+    std::shared_ptr _block_mgr_parent_tracker;
+    std::shared_ptr _io_mgr_tracker;
     boost::scoped_ptr _metrics;
     boost::scoped_ptr _tmp_file_mgr;
 
diff --git a/be/src/testutil/function_utils.cpp b/be/src/testutil/function_utils.cpp
index 28ac6c1d152f3f..0506dcc462a511 100644
--- a/be/src/testutil/function_utils.cpp
+++ b/be/src/testutil/function_utils.cpp
@@ -29,8 +29,8 @@ namespace doris {
 FunctionUtils::FunctionUtils() {
     doris_udf::FunctionContext::TypeDesc return_type;
     std::vector arg_types;
-    _mem_tracker = new MemTracker();
-    _memory_pool = new MemPool(_mem_tracker);
+    _mem_tracker.reset(new MemTracker(-1, "function util"));
+    _memory_pool = new MemPool(_mem_tracker.get());
     _fn_ctx = FunctionContextImpl::create_context(
         _state, _memory_pool, return_type, arg_types, 0, false);
 }
@@ -38,8 +38,8 @@ FunctionUtils::FunctionUtils(RuntimeState* state) {
     _state = state;
     doris_udf::FunctionContext::TypeDesc return_type;
     std::vector arg_types;
-    _mem_tracker = new MemTracker();
-    _memory_pool = new MemPool(_mem_tracker);
+    _mem_tracker.reset(new MemTracker(-1, "function util"));
+    _memory_pool = new MemPool(_mem_tracker.get());
     _fn_ctx = FunctionContextImpl::create_context(
         _state, _memory_pool, return_type, arg_types, 0, false);
 }
@@ -48,7 +48,6 @@ FunctionUtils::~FunctionUtils() {
     _fn_ctx->impl()->close();
     delete _fn_ctx;
     delete _memory_pool;
-    delete _mem_tracker;
 }
 
 }
diff --git a/be/src/testutil/function_utils.h b/be/src/testutil/function_utils.h
index d777aedb0c7408..041f6b2b557e5b 100644
--- a/be/src/testutil/function_utils.h
+++ b/be/src/testutil/function_utils.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include 
+
 namespace doris_udf {
 class FunctionContext;
 }
@@ -36,7 +38,7 @@ class FunctionUtils {
     }
 private:
     RuntimeState* _state = nullptr;
-    MemTracker* _mem_tracker = nullptr;
+    std::shared_ptr _mem_tracker;
     MemPool* _memory_pool = nullptr;
     doris_udf::FunctionContext* _fn_ctx = nullptr;
 };
diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp
index 4dd9294da9fe93..6a750f9ca750a7 100644
--- a/be/src/util/arrow/row_batch.cpp
+++ b/be/src/util/arrow/row_batch.cpp
@@ -363,7 +363,7 @@ class ToRowBatchConverter : public arrow::ArrayVisitor {
 
     ToRowBatchConverter(const arrow::RecordBatch& batch,
                         const RowDescriptor& row_desc,
-                        MemTracker* tracker)
+                        std::shared_ptr tracker)
         : _batch(batch), _row_desc(row_desc), _tracker(tracker) { }
 
 #define PRIMITIVE_VISIT(TYPE) \
@@ -407,7 +407,7 @@ class ToRowBatchConverter : public arrow::ArrayVisitor {
 private:
     const arrow::RecordBatch& _batch;
     const RowDescriptor& _row_desc;
-    MemTracker* _tracker;
+    std::shared_ptr _tracker;
 
     std::unique_ptr _cur_slot_ref;
     std::shared_ptr _output;
@@ -427,7 +427,7 @@ Status ToRowBatchConverter:: convert(std::shared_ptr* result) {
     // TODO(zc): check if field type match
 
     size_t num_rows = _batch.num_rows();
-    _output.reset(new RowBatch(_row_desc, num_rows, _tracker));
+    _output.reset(new RowBatch(_row_desc, num_rows, _tracker.get()));
     _output->commit_rows(num_rows);
     auto pool = _output->tuple_data_pool();
     for (size_t row_id = 0; row_id < num_rows; ++row_id) {
@@ -454,7 +454,7 @@ Status ToRowBatchConverter:: convert(std::shared_ptr* result) {
 
 Status convert_to_row_batch(const arrow::RecordBatch& batch,
                             const RowDescriptor& row_desc,
-                            MemTracker* tracker,
+                            std::shared_ptr tracker,
                             std::shared_ptr* result) {
     ToRowBatchConverter converter(batch, row_desc, tracker);
     return converter.convert(result);
diff --git a/be/src/util/arrow/row_batch.h b/be/src/util/arrow/row_batch.h
index eb68fe46abdf83..68d03ffcb90108 100644
--- a/be/src/util/arrow/row_batch.h
+++ b/be/src/util/arrow/row_batch.h
@@ -68,7 +68,7 @@ Status convert_to_arrow_batch(
 Status convert_to_row_batch(
     const arrow::RecordBatch& batch,
     const RowDescriptor& row_desc,
-    MemTracker* tracker,
+    std::shared_ptr tracker,
     std::shared_ptr* result);
 
 Status serialize_record_batch(const arrow::RecordBatch& record_batch, std::string* result);
diff --git a/be/test/exec/broker_scan_node_test.cpp b/be/test/exec/broker_scan_node_test.cpp
index d9804cf1d2576a..a06ca823595616 100644
--- a/be/test/exec/broker_scan_node_test.cpp
+++ b/be/test/exec/broker_scan_node_test.cpp
@@ -457,9 +457,9 @@ TEST_F(BrokerScanNodeTest, normal) {
     status = scan_node.open(&_runtime_state);
     ASSERT_TRUE(status.ok());
 
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     // Get batch
-    RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), &tracker);
+    RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), tracker.get());
 
     bool eos = false;
     status = scan_node.get_next(&_runtime_state, &batch, &eos);
diff --git a/be/test/exec/broker_scanner_test.cpp b/be/test/exec/broker_scanner_test.cpp
index 1c59c10d790a4f..790f560af0008a 100644
--- a/be/test/exec/broker_scanner_test.cpp
+++ b/be/test/exec/broker_scanner_test.cpp
@@ -38,7 +38,7 @@ namespace doris {
 
 class BrokerScannerTest : public testing::Test {
 public:
-    BrokerScannerTest() : _runtime_state(TQueryGlobals()) {
+    BrokerScannerTest() : _tracker(new MemTracker()), _runtime_state(TQueryGlobals()) {
         init();
         _profile = _runtime_state.runtime_profile();
         _runtime_state._instance_mem_tracker.reset(new MemTracker());
@@ -59,7 +59,7 @@ class BrokerScannerTest : public testing::Test {
     void init_desc_table();
     void init_params();
 
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     RuntimeState _runtime_state;
     RuntimeProfile* _profile;
     ObjectPool _obj_pool;
@@ -361,7 +361,7 @@ TEST_F(BrokerScannerTest, normal) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemPool tuple_pool(&_tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 1,2,3
@@ -413,8 +413,7 @@ TEST_F(BrokerScannerTest, normal2) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 1,2,3
@@ -460,8 +459,7 @@ TEST_F(BrokerScannerTest, normal3) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 1,2,3
@@ -508,8 +506,7 @@ TEST_F(BrokerScannerTest, normal4) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 1,2,3
@@ -540,8 +537,7 @@ TEST_F(BrokerScannerTest, normal5) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // end of file
@@ -565,8 +561,7 @@ TEST_F(BrokerScannerTest, normal6) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 4,5,6
@@ -597,8 +592,7 @@ TEST_F(BrokerScannerTest, normal7) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // end of file
@@ -622,8 +616,7 @@ TEST_F(BrokerScannerTest, normal8) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // 4,5,6
@@ -654,8 +647,7 @@ TEST_F(BrokerScannerTest, normal9) {
     auto st = scanner.open();
     ASSERT_TRUE(st.ok());
 
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+    MemPool tuple_pool(_tracker.get());
     Tuple* tuple = (Tuple*)tuple_pool.allocate(20);
     bool eof = false;
     // end of file
diff --git a/be/test/exec/csv_scan_node_test.cpp b/be/test/exec/csv_scan_node_test.cpp
index 00ae3a778bc9a4..a3d845c3ea18e2 100644
--- a/be/test/exec/csv_scan_node_test.cpp
+++ b/be/test/exec/csv_scan_node_test.cpp
@@ -252,7 +252,8 @@ TEST_F(CsvScanNodeTest, NormalUse) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), new MemTracker(-1));
+    auto tracker = std::make_shared();
+    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get());
     bool eos = false;
 
     while (!eos) {
@@ -291,7 +292,8 @@ TEST_F(CsvScanNodeTest, continuousDelim) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), new MemTracker(-1));
+
+    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get());
     bool eos = false;
 
     while (!eos) {
@@ -330,7 +332,8 @@ TEST_F(CsvScanNodeTest, wrong_decimal_format_test) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), new MemTracker(-1));
+    auto tracker = std::make_shared();
+    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get());
     bool eos = false;
 
     while (!eos) {
@@ -358,7 +361,8 @@ TEST_F(CsvScanNodeTest, fill_fix_len_stringi_test) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), new MemTracker(-1));
+    auto tracker = std::make_shared();
+    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get());
     bool eos = false;
 
     while (!eos) {
@@ -403,7 +407,8 @@ TEST_F(CsvScanNodeTest, wrong_fix_len_string_format_test) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), new MemTracker(-1));
+    auto tracker = std::make_shared();
+    RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), tracker.get());
     bool eos = false;
 
     while (!eos) {
diff --git a/be/test/exec/es_scan_node_test.cpp b/be/test/exec/es_scan_node_test.cpp
index c56848f34c57e9..a38db389af8d45 100644
--- a/be/test/exec/es_scan_node_test.cpp
+++ b/be/test/exec/es_scan_node_test.cpp
@@ -134,7 +134,7 @@ TEST_F(EsScanNodeTest, normal_use) {
 
     status = scan_node.open(&_runtime_state);
     ASSERT_TRUE(status.ok());
-    std::unique_ptr mem_tracker(new MemTracker(-1));
+    std::shared_ptr mem_tracker(new MemTracker(-1));
     RowBatch row_batch(scan_node._row_descriptor, _runtime_state.batch_size(), mem_tracker.get());
     bool eos = false;
     status = scan_node.get_next(&_runtime_state, &row_batch, &eos);
diff --git a/be/test/exec/hash_table_test.cpp b/be/test/exec/hash_table_test.cpp
index bd45684bc8233b..c01aca3e5a079f 100644
--- a/be/test/exec/hash_table_test.cpp
+++ b/be/test/exec/hash_table_test.cpp
@@ -28,7 +28,6 @@
 #include "exprs/expr.h"
 #include "runtime/mem_pool.h"
 #include "runtime/string_value.h"
-#include "runtime/mem_limit.hpp"
 #include "util/cpu_info.h"
 #include "util/runtime_profile.h"
 
@@ -277,12 +276,11 @@ TEST_F(HashTableTest, GrowTableTest) {
     int build_row_val = 0;
     int num_to_add = 4;
     int expected_size = 0;
-    MemTracker mem_limit(1024 * 1024);
-    vector mem_limits;
-    mem_limits.push_back(&mem_limit);
+
+    auto mem_tracker = std::make_shared(1024 * 1024);
     HashTable hash_table(
-        _build_expr, _probe_expr, 1, false, 0, mem_limits, num_to_add);
-    EXPECT_TRUE(!mem_limit.limit_exceeded());
+        _build_expr, _probe_expr, 1, false, 0, mem_tracker, num_to_add);
+    EXPECT_FALSE(mem_tracker->limit_exceeded());
 
     // This inserts about 5M entries
     for (int i = 0; i < 20; ++i) {
@@ -295,7 +293,7 @@ TEST_F(HashTableTest, GrowTableTest) {
         EXPECT_EQ(hash_table.size(), expected_size);
     }
 
-    EXPECT_TRUE(mem_limit.limit_exceeded());
+    EXPECT_TRUE(mem_tracker->limit_exceeded());
 
     // Validate that we can find the entries
     for (int i = 0; i < expected_size * 5; i += 100000) {
@@ -316,11 +314,10 @@ TEST_F(HashTableTest, GrowTableTest2) {
     int build_row_val = 0;
     int num_to_add = 1024;
     int expected_size = 0;
-    MemTracker mem_limit(1024 * 1024);
-    vector mem_limits;
-    mem_limits.push_back(&mem_limit);
+
+    auto mem_tracker = std::make_shared(1024 * 1024);
     HashTable hash_table(
-        _build_expr, _probe_expr, 1, false, 0, mem_limits, num_to_add);
+        _build_expr, _probe_expr, 1, false, 0, mem_tracker, num_to_add);
 
     LOG(INFO) << time(NULL);
 
diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp
index 5e27d74949acab..270a87f4ba9691 100644
--- a/be/test/exec/orc_scanner_test.cpp
+++ b/be/test/exec/orc_scanner_test.cpp
@@ -371,8 +371,9 @@ TEST_F(OrcScannerTest, normal) {
 
     ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, &_counter);
     ASSERT_TRUE(scanner.open().ok());
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+
+    auto tracker = std::make_shared();
+    MemPool tuple_pool(tracker.get());
 
     Tuple *tuple = (Tuple *) tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
@@ -476,8 +477,9 @@ TEST_F(OrcScannerTest, normal2) {
 
     ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, &_counter);
     ASSERT_TRUE(scanner.open().ok());
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+
+    auto tracker = std::make_shared();
+    MemPool tuple_pool(tracker.get());
 
     Tuple *tuple = (Tuple *) tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
@@ -785,8 +787,9 @@ TEST_F(OrcScannerTest, normal3) {
 
     ORCScanner scanner(&_runtime_state, _profile, params, ranges, _addresses, &_counter);
     ASSERT_TRUE(scanner.open().ok());
-    MemTracker tracker;
-    MemPool tuple_pool(&tracker);
+
+    auto tracker = std::make_shared();
+    MemPool tuple_pool(tracker.get());
 
     Tuple *tuple = (Tuple *) tuple_pool.allocate(_desc_tbl->get_tuple_descriptor(1)->byte_size());
     bool eof = false;
diff --git a/be/test/exec/parquet_scanner_test.cpp b/be/test/exec/parquet_scanner_test.cpp
index 64941f87824f5c..bdfe5303bb68f7 100644
--- a/be/test/exec/parquet_scanner_test.cpp
+++ b/be/test/exec/parquet_scanner_test.cpp
@@ -459,9 +459,9 @@ TEST_F(ParquetSannerTest, normal) {
     status = scan_node.open(&_runtime_state);
     ASSERT_TRUE(status.ok());
 
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     // Get batch
-    RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), &tracker);
+    RowBatch batch(scan_node.row_desc(), _runtime_state.batch_size(), tracker.get());
     bool eof = false;
     for (int i = 0; i < 14; i++) {
         status = scan_node.get_next(&_runtime_state, &batch, &eof);
diff --git a/be/test/exec/tablet_info_test.cpp b/be/test/exec/tablet_info_test.cpp
index acd29b40a464d5..0a8c09f15fc6fe 100644
--- a/be/test/exec/tablet_info_test.cpp
+++ b/be/test/exec/tablet_info_test.cpp
@@ -143,8 +143,8 @@ TEST_F(OlapTablePartitionParamTest, normal) {
     ASSERT_TRUE(st.ok());
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
     TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0);
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     // 12, 9, "abc"
     {
         Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size());
@@ -280,8 +280,8 @@ TEST_F(OlapTablePartitionParamTest, unpartitioned) {
     ASSERT_TRUE(st.ok());
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
     TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0);
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     // 12, 9, "abc"
     {
         Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size());
diff --git a/be/test/exec/tablet_sink_test.cpp b/be/test/exec/tablet_sink_test.cpp
index 7b73f69371f8ed..0428a182341c56 100644
--- a/be/test/exec/tablet_sink_test.cpp
+++ b/be/test/exec/tablet_sink_test.cpp
@@ -332,8 +332,8 @@ class TestInternalService : public palo::PInternalService {
             k_add_batch_status.to_protobuf(response->mutable_status());
 
             if (request->has_row_batch() && _row_desc != nullptr) {
-                MemTracker tracker;
-                RowBatch batch(*_row_desc, request->row_batch(), &tracker);
+                auto tracker = std::make_shared();
+                RowBatch batch(*_row_desc, request->row_batch(), tracker.get());
                 for (int i = 0; i < batch.num_rows(); ++i) {
                     LOG(INFO) << batch.get_row(i)->to_string(*_row_desc);
                     _output_set->emplace(batch.get_row(i)->to_string(*_row_desc));
@@ -403,8 +403,8 @@ TEST_F(OlapTableSinkTest, normal) {
     st = sink.open(&state);
     ASSERT_TRUE(st.ok());
     // send
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     // 12, 9, "abc"
     {
         Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size());
@@ -536,8 +536,8 @@ TEST_F(OlapTableSinkTest, convert) {
     st = sink.open(&state);
     ASSERT_TRUE(st.ok());
     // send
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     // 12, 9, "abc"
     {
         Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size());
@@ -844,8 +844,8 @@ TEST_F(OlapTableSinkTest, add_batch_failed) {
     st = sink.open(&state);
     ASSERT_TRUE(st.ok());
     // send
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     TupleDescriptor* tuple_desc = desc_tbl->get_tuple_descriptor(0);
     // 12, 9, "abc"
     {
@@ -925,8 +925,8 @@ TEST_F(OlapTableSinkTest, decimal) {
     st = sink.open(&state);
     ASSERT_TRUE(st.ok());
     // send
-    MemTracker tracker;
-    RowBatch batch(row_desc, 1024, &tracker);
+    auto tracker = std::make_shared();
+    RowBatch batch(row_desc, 1024, tracker.get());
     // 12, 12.3
     {
         Tuple* tuple = (Tuple*)batch.tuple_data_pool()->allocate(tuple_desc->byte_size());
diff --git a/be/test/olap/aggregate_func_test.cpp b/be/test/olap/aggregate_func_test.cpp
index 5c9d9b17443fac..a79d7436a7a265 100644
--- a/be/test/olap/aggregate_func_test.cpp
+++ b/be/test/olap/aggregate_func_test.cpp
@@ -40,7 +40,7 @@ void test_min() {
     static const size_t kValSize = sizeof(CppType) + 1;  // '1' represent the leading bool flag.
     char buf[64];
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_MIN, field_type);
@@ -116,7 +116,7 @@ void test_max() {
 
     char buf[64];
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_MAX, field_type);
@@ -192,7 +192,7 @@ void test_sum() {
     char buf[64];
     RowCursorCell dst(buf);
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_SUM, field_type);
@@ -267,7 +267,7 @@ void test_replace() {
     char buf[64];
     RowCursorCell dst(buf);
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_REPLACE, field_type);
@@ -326,7 +326,7 @@ void test_replace_string() {
     dst_slice->data = nullptr;
     dst_slice->size = 0;
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     const AggregateInfo* agg = get_aggregate_info(OLAP_FIELD_AGGREGATION_REPLACE, field_type);
diff --git a/be/test/olap/column_reader_test.cpp b/be/test/olap/column_reader_test.cpp
index 8a8ac97856e5d0..f902a03e668588 100644
--- a/be/test/olap/column_reader_test.cpp
+++ b/be/test/olap/column_reader_test.cpp
@@ -216,7 +216,7 @@ class TestColumn : public testing::Test {
 
     ColumnWriter *_column_writer;
     ColumnReader *_column_reader;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _col_vector;
 
diff --git a/be/test/olap/comparison_predicate_test.cpp b/be/test/olap/comparison_predicate_test.cpp
index a2bd35702ac79b..dc7db5ca541689 100644
--- a/be/test/olap/comparison_predicate_test.cpp
+++ b/be/test/olap/comparison_predicate_test.cpp
@@ -124,7 +124,7 @@ public: \
         _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size); \
         _vectorized_batch->set_size(size); \
     } \
-    std::unique_ptr _mem_tracker; \
+    std::shared_ptr _mem_tracker; \
     std::unique_ptr _mem_pool; \
     VectorizedRowBatch* _vectorized_batch; \
 }; \
diff --git a/be/test/olap/delta_writer_test.cpp b/be/test/olap/delta_writer_test.cpp
index a14c74d2f8b586..b6aa07e9e7208f 100644
--- a/be/test/olap/delta_writer_test.cpp
+++ b/be/test/olap/delta_writer_test.cpp
@@ -47,7 +47,7 @@ static const uint32_t MAX_RETRY_TIMES = 10;
 static const uint32_t MAX_PATH_LEN = 1024;
 
 StorageEngine* k_engine = nullptr;
-MemTracker* k_mem_tracker = nullptr;
+std::shared_ptr k_mem_tracker = nullptr;
 
 void set_up() {
     char buffer[MAX_PATH_LEN];
@@ -66,7 +66,7 @@ void set_up() {
     ExecEnv* exec_env = doris::ExecEnv::GetInstance();
     exec_env->set_storage_engine(k_engine);
 
-    k_mem_tracker = new MemTracker(-1, "delta writer test");
+    k_mem_tracker.reset(new MemTracker(-1, "delta writer test"));
 }
 
 void tear_down() {
@@ -74,7 +74,6 @@ void tear_down() {
     k_engine = nullptr;
     system("rm -rf ./data_test");
     FileUtils::remove_all(std::string(getenv("DORIS_HOME")) + UNUSED_PREFIX);
-    delete k_mem_tracker;
 }
 
 void create_tablet_request(int64_t tablet_id, int32_t schema_hash, TCreateTabletReq* request) {
@@ -349,8 +348,8 @@ TEST_F(TestDeltaWriter, write) {
     DeltaWriter::open(&write_req, k_mem_tracker, &delta_writer);
     ASSERT_NE(delta_writer, nullptr);
 
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    auto tracker = std::make_shared();
+    MemPool pool(tracker.get());
     // Tuple 1
     {
         Tuple* tuple = reinterpret_cast(pool.allocate(tuple_desc->byte_size()));
diff --git a/be/test/olap/in_list_predicate_test.cpp b/be/test/olap/in_list_predicate_test.cpp
index c293d744d9d43a..2e4e2dbe02fd0d 100644
--- a/be/test/olap/in_list_predicate_test.cpp
+++ b/be/test/olap/in_list_predicate_test.cpp
@@ -127,7 +127,7 @@ class TestInListPredicate : public testing::Test {
         _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size);
         _vectorized_batch->set_size(size);
     }
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     VectorizedRowBatch* _vectorized_batch;
 };
diff --git a/be/test/olap/key_coder_test.cpp b/be/test/olap/key_coder_test.cpp
index c34287f5d27ae8..45bca2b1b0d311 100644
--- a/be/test/olap/key_coder_test.cpp
+++ b/be/test/olap/key_coder_test.cpp
@@ -29,11 +29,11 @@ namespace doris {
 
 class KeyCoderTest : public testing::Test {
 public:
-    KeyCoderTest() : _pool(&_tracker) { }
+    KeyCoderTest() : _tracker(new MemTracker()), _pool(_tracker.get()) { }
     virtual ~KeyCoderTest() {
     }
 private:
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp
index 4be0f8d34e30f2..d3270a9b58e6cd 100644
--- a/be/test/olap/null_predicate_test.cpp
+++ b/be/test/olap/null_predicate_test.cpp
@@ -95,7 +95,7 @@ class TestNullPredicate : public testing::Test {
         _vectorized_batch = new VectorizedRowBatch(tablet_schema, ids, size);
         _vectorized_batch->set_size(size);
     }
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     VectorizedRowBatch* _vectorized_batch;
 };
diff --git a/be/test/olap/row_block_v2_test.cpp b/be/test/olap/row_block_v2_test.cpp
index 7fe0b97e393025..8c377b48f27b11 100644
--- a/be/test/olap/row_block_v2_test.cpp
+++ b/be/test/olap/row_block_v2_test.cpp
@@ -92,8 +92,8 @@ TEST_F(TestRowBlockV2, test_convert) {
     block_info.null_supported = true;
     auto res = output_block.init(block_info);
     ASSERT_EQ(OLAP_SUCCESS, res);
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    auto tracker = std::make_shared();
+    MemPool pool(tracker.get());
     for (int i = 0; i < input_block.capacity(); ++i) {
         RowBlockRow row = input_block.row(i);
 
diff --git a/be/test/olap/row_cursor_test.cpp b/be/test/olap/row_cursor_test.cpp
index 2a39e6d3e5fa3d..3f931264c5b7da 100644
--- a/be/test/olap/row_cursor_test.cpp
+++ b/be/test/olap/row_cursor_test.cpp
@@ -259,7 +259,7 @@ class TestRowCursor : public testing::Test {
 
     virtual void TearDown() {}
 
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
 };
 
@@ -471,7 +471,7 @@ TEST_F(TestRowCursor, AggregateWithoutNull) {
     left.set_field_content(4, reinterpret_cast(&l_decimal), _mem_pool.get());
     left.set_field_content(5, reinterpret_cast(&l_varchar), _mem_pool.get());
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     init_row_with_others(&row, left, mem_pool.get(), &agg_object_pool);
@@ -532,7 +532,7 @@ TEST_F(TestRowCursor, AggregateWithNull) {
     left.set_null(4);
     left.set_field_content(5, reinterpret_cast(&l_varchar), _mem_pool.get());
 
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
     ObjectPool agg_object_pool;
     init_row_with_others(&row, left, mem_pool.get(), &agg_object_pool);
diff --git a/be/test/olap/rowset/alpha_rowset_test.cpp b/be/test/olap/rowset/alpha_rowset_test.cpp
index 96fd9f48521ac6..73132ee07b9487 100644
--- a/be/test/olap/rowset/alpha_rowset_test.cpp
+++ b/be/test/olap/rowset/alpha_rowset_test.cpp
@@ -159,7 +159,7 @@ class AlphaRowsetTest : public testing::Test {
 
 private:
     std::unique_ptr _alpha_rowset_writer;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
 };
 
diff --git a/be/test/olap/rowset/beta_rowset_test.cpp b/be/test/olap/rowset/beta_rowset_test.cpp
index f625512839274e..5a4fe34bb85b94 100644
--- a/be/test/olap/rowset/beta_rowset_test.cpp
+++ b/be/test/olap/rowset/beta_rowset_test.cpp
@@ -173,8 +173,8 @@ TEST_F(BetaRowsetTest, BasicFunctionTest) {
         // k2 := k1 * 10
         // k3 := 4096 * i + rid
         for (int i = 0; i < num_segments; ++i) {
-            MemTracker mem_tracker(-1);
-            MemPool mem_pool(&mem_tracker);
+            auto tracker = std::make_shared();
+            MemPool mem_pool(tracker.get());
             for (int rid = 0; rid < rows_per_segment; ++rid) {
                 uint32_t k1 = rid * 10 + i;
                 uint32_t k2 = k1 * 10;
diff --git a/be/test/olap/rowset/rowset_converter_test.cpp b/be/test/olap/rowset/rowset_converter_test.cpp
index 5ef600cd2e5fa2..1e06d2e79fe6ce 100644
--- a/be/test/olap/rowset/rowset_converter_test.cpp
+++ b/be/test/olap/rowset/rowset_converter_test.cpp
@@ -192,7 +192,7 @@ class RowsetConverterTest : public testing::Test {
 
 private:
     std::string _schema_hash_path;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
 };
 
diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
index 1313061fa8e23c..826ad2984e3d2e 100644
--- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
@@ -80,8 +80,8 @@ class BinaryDictPageTest : public testing::Test {
         ASSERT_EQ(slices.size(), page_decoder.count());
 
         //check values
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
         size_t size = slices.size();
         Slice* values = reinterpret_cast(pool.allocate(size * sizeof(Slice)));
@@ -170,8 +170,8 @@ class BinaryDictPageTest : public testing::Test {
             ASSERT_TRUE(status.ok());
 
             //check values
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            auto tracker = std::make_shared();
+            MemPool pool(tracker.get());
             TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
             Slice* values = reinterpret_cast(pool.allocate(sizeof(Slice)));
             ColumnBlock column_block(type_info, (uint8_t*)values, nullptr, 1, &pool);
diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
index a43ec35d6418f9..d155e94ed5c306 100644
--- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
@@ -69,10 +69,8 @@ class BinaryPlainPageTest : public testing::Test {
         ASSERT_TRUE(status.ok());
 
         //test1
-        
-
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         size_t size = 3;
         Slice* values = reinterpret_cast(pool.allocate(size * sizeof(Slice)));
         uint8_t* null_bitmap = reinterpret_cast(pool.allocate(BitmapSize(size)));
diff --git a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
index f7fe9264d0404f..02dd7bf38f6334 100644
--- a/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_prefix_page_test.cpp
@@ -72,8 +72,8 @@ class BinaryPrefixPageTest : public testing::Test {
         ASSERT_EQ(slices.size(), page_decoder->count());
 
         //check values
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
         size_t size = slices.size();
         Slice* values = reinterpret_cast(pool.allocate(size * sizeof(Slice)));
@@ -146,8 +146,8 @@ class BinaryPrefixPageTest : public testing::Test {
         ret = page_decoder->init();
         ASSERT_TRUE(ret.ok());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         TypeInfo* type_info = get_type_info(OLAP_FIELD_TYPE_VARCHAR);
         size_t size = slices.size();
         Slice* values = reinterpret_cast(pool.allocate(size * sizeof(Slice)));
diff --git a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp
index 4874af6ffa39ce..ff9e27e3f74d48 100644
--- a/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bitmap_index_test.cpp
@@ -38,7 +38,7 @@ namespace segment_v2 {
 class BitmapIndexTest : public testing::Test {
 public:
     const std::string kTestDir = "./ut_dir/bitmap_index_test";
-    BitmapIndexTest() : _pool(&_tracker) { }
+    BitmapIndexTest() : _tracker(new MemTracker()), _pool(_tracker.get()) {}
 
     void SetUp() override {
         if (FileUtils::check_exist(kTestDir)) {
@@ -53,7 +53,7 @@ class BitmapIndexTest : public testing::Test {
     }
 
 private:
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
index f90c3df1214623..1a99644f6f8699 100644
--- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
@@ -36,8 +36,8 @@ class BitShufflePageTest : public testing::Test {
 
     template
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         uint8_t null_bitmap = 0;
         ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &pool);
         ColumnBlockView column_block_view(&block);
@@ -72,8 +72,8 @@ class BitShufflePageTest : public testing::Test {
         ASSERT_TRUE(status.ok());
         ASSERT_EQ(0, page_decoder.current_index());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
 
         CppType* values = reinterpret_cast(pool.allocate(size * sizeof(CppType)));
         uint8_t* null_bitmap = reinterpret_cast(pool.allocate(BitmapSize(size)));
diff --git a/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp b/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
index 4409882c9e40e2..6909b35197098e 100644
--- a/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bloom_filter_page_test.cpp
@@ -62,8 +62,8 @@ class BloomFilterPageTest : public testing::Test {
         status = bf_page_decoder.seek_to_position_in_page(0);
         ASSERT_TRUE(status.ok());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         Slice* values = reinterpret_cast(pool.allocate(sizeof(Slice)));
         ColumnBlock block(get_type_info(OLAP_FIELD_TYPE_VARCHAR), (uint8_t*)values, nullptr, 2, &pool);
         ColumnBlockView column_block_view(&block);
diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
index c8f6820cd99dd7..a004ea0a81f0e4 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
@@ -42,8 +42,8 @@ static const string TEST_DIR = "./ut_dir/column_reader_writer_test";
 
 class ColumnReaderWriterTest : public testing::Test {
 public:
-    ColumnReaderWriterTest() : _pool(&_tracker) { }
-    virtual ~ColumnReaderWriterTest() { }
+    ColumnReaderWriterTest() : _tracker(new MemTracker()), _pool(_tracker.get()) {}
+    virtual ~ColumnReaderWriterTest() {}
 
 protected:
     void SetUp() override {
@@ -60,7 +60,7 @@ class ColumnReaderWriterTest : public testing::Test {
     }
 
 private:
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     MemPool _pool;
 };
 
@@ -146,8 +146,8 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
             st = iter->seek_to_first();
             ASSERT_TRUE(st.ok()) << st.to_string();
 
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            auto tracker = std::make_shared();
+            MemPool pool(tracker.get());
             Type vals[1024];
             Type* vals_ = vals;
             uint8_t is_null[1024];
@@ -180,8 +180,8 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, s
         }
 
         {
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            auto tracker = std::make_shared();
+            MemPool pool(tracker.get());
             Type vals[1024];
             uint8_t is_null[1024];
             ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &pool);
@@ -235,8 +235,8 @@ void test_read_default_value(string value, void* result) {
             st = iter.seek_to_first();
             ASSERT_TRUE(st.ok()) << st.to_string();
 
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            auto tracker = std::make_shared();
+            MemPool pool(tracker.get());
             Type vals[1024];
             Type* vals_ = vals;
             uint8_t is_null[1024];
@@ -264,8 +264,8 @@ void test_read_default_value(string value, void* result) {
         }
 
         {
-            MemTracker tracker;
-            MemPool pool(&tracker);
+            auto tracker = std::make_shared();
+            MemPool pool(tracker.get());
             Type vals[1024];
             uint8_t is_null[1024];
             ColumnBlock col(type_info, (uint8_t*)vals, is_null, 1024, &pool);
diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
index da9729b09abdd5..a40a98fc682260 100644
--- a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
@@ -35,8 +35,8 @@ class FrameOfReferencePageTest : public testing::Test {
 public:
     template
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         uint8_t null_bitmap = 0;
         ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &pool);
         ColumnBlockView column_block_view(&block);
@@ -66,8 +66,8 @@ class FrameOfReferencePageTest : public testing::Test {
         ASSERT_EQ(0, for_page_decoder.current_index());
         ASSERT_EQ(size, for_page_decoder.count());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         CppType* values = reinterpret_cast(pool.allocate(size * sizeof(CppType)));
         uint8_t* null_bitmap = reinterpret_cast(pool.allocate(BitmapSize(size)));
         ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &pool);
diff --git a/be/test/olap/rowset/segment_v2/plain_page_test.cpp b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
index 33a9501c5cf035..a755dcbc1d82ce 100644
--- a/be/test/olap/rowset/segment_v2/plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/plain_page_test.cpp
@@ -46,8 +46,8 @@ class PlainPageTest : public testing::Test {
 
     template
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         uint8_t null_bitmap = 0;
         ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &pool);
         ColumnBlockView column_block_view(&block);
@@ -84,8 +84,8 @@ class PlainPageTest : public testing::Test {
         
         ASSERT_EQ(0, page_decoder.current_index());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
 
         CppType* values = reinterpret_cast(pool.allocate(size * sizeof(CppType)));
         uint8_t* null_bitmap = reinterpret_cast(pool.allocate(BitmapSize(size)));
diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
index bb7050b658b056..13122d4187cec2 100644
--- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
@@ -37,8 +37,8 @@ class RlePageTest : public testing::Test {
 
     template
     void copy_one(PageDecoderType* decoder, typename TypeTraits::CppType* ret) {
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         uint8_t null_bitmap = 0;
         ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, 1, &pool);
         ColumnBlockView column_block_view(&block);
@@ -74,8 +74,8 @@ class RlePageTest : public testing::Test {
         ASSERT_EQ(0, rle_page_decoder.current_index());
         ASSERT_EQ(size, rle_page_decoder.count());
 
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         CppType* values = reinterpret_cast(pool.allocate(size * sizeof(CppType)));
         uint8_t* null_bitmap = reinterpret_cast(pool.allocate(BitmapSize(size)));
         ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, size, &pool);
diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp
index 86bd258972184d..8d8fc89996ce7b 100644
--- a/be/test/olap/rowset/segment_v2/segment_test.cpp
+++ b/be/test/olap/rowset/segment_v2/segment_test.cpp
@@ -752,8 +752,8 @@ TEST_F(SegmentReaderWriterTest, TestDefaultValueColumn) {
 
 TEST_F(SegmentReaderWriterTest, TestStringDict) {
     size_t num_rows_per_block = 10;
-    MemTracker tracker;
-    MemPool pool(&tracker);
+    auto tracker = std::make_shared();
+    MemPool pool(tracker.get());
 
     std::shared_ptr tablet_schema(new TabletSchema());
     tablet_schema->_num_columns = 4;
diff --git a/be/test/olap/schema_change_test.cpp b/be/test/olap/schema_change_test.cpp
index 52a3daeca52a24..3329b89b06a310 100644
--- a/be/test/olap/schema_change_test.cpp
+++ b/be/test/olap/schema_change_test.cpp
@@ -306,7 +306,7 @@ class TestColumn : public testing::Test {
     ColumnWriter *_column_writer;
 
     ColumnReader *_column_reader;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
     std::unique_ptr _col_vector;
 
diff --git a/be/test/olap/skiplist_test.cpp b/be/test/olap/skiplist_test.cpp
index bfaf73819cadc3..8771f3624472f8 100644
--- a/be/test/olap/skiplist_test.cpp
+++ b/be/test/olap/skiplist_test.cpp
@@ -50,7 +50,7 @@ struct TestComparator {
 class SkipTest : public testing::Test {};
 
 TEST_F(SkipTest, Empty) {
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
 
     TestComparator cmp;
@@ -68,7 +68,7 @@ TEST_F(SkipTest, Empty) {
 }
 
 TEST_F(SkipTest, InsertAndLookup) {
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
 
     const int N = 2000;
@@ -150,7 +150,7 @@ TEST_F(SkipTest, InsertAndLookup) {
 
 // Only non-DUP model will use Find() and InsertWithHint().
 TEST_F(SkipTest, InsertWithHintNoneDupModel) {
-    std::unique_ptr tracker(new MemTracker(-1));
+    std::shared_ptr tracker(new MemTracker(-1));
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
 
     const int N = 2000;
@@ -260,7 +260,7 @@ class ConcurrentTest {
     // Current state of the test
     State _current;
 
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
     std::unique_ptr _mem_pool;
 
     // SkipList is not protected by _mu.  We just use a single writer
@@ -268,10 +268,10 @@ class ConcurrentTest {
     SkipList _list;
 
 public:
-    ConcurrentTest():
-        _mem_tracker(new MemTracker(-1)),
-        _mem_pool(new MemPool(_mem_tracker.get())),
-        _list(TestComparator(), _mem_pool.get(), false) { }
+    ConcurrentTest()
+            : _mem_tracker(new MemTracker(-1)),
+              _mem_pool(new MemPool(_mem_tracker.get())),
+              _list(TestComparator(), _mem_pool.get(), false) {}
 
     // REQUIRES: External synchronization
     void write_step(Random* rnd) {
diff --git a/be/test/olap/storage_types_test.cpp b/be/test/olap/storage_types_test.cpp
index d4865a36a50f97..c49ae9bf5b70ab 100644
--- a/be/test/olap/storage_types_test.cpp
+++ b/be/test/olap/storage_types_test.cpp
@@ -41,8 +41,8 @@ void common_test(typename TypeTraits::CppType src_val) {
     ASSERT_EQ(sizeof(src_val), type->size());
     {
         typename TypeTraits::CppType dst_val;
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         type->deep_copy((char*)&dst_val, (char*)&src_val, &pool);
         ASSERT_TRUE(type->equal((char*)&src_val, (char*)&dst_val));
         ASSERT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
@@ -82,8 +82,8 @@ void test_char(Slice src_val) {
     {
         char buf[64];
         Slice dst_val(buf, sizeof(buf));
-        MemTracker tracker;
-        MemPool pool(&tracker);
+        auto tracker = std::make_shared();
+        MemPool pool(tracker.get());
         type->deep_copy((char*)&dst_val, (char*)&src_val, &pool);
         ASSERT_TRUE(type->equal((char*)&src_val, (char*)&dst_val));
         ASSERT_EQ(0, type->cmp((char*)&src_val, (char*)&dst_val));
diff --git a/be/test/runtime/CMakeLists.txt b/be/test/runtime/CMakeLists.txt
index e2574b31297d60..258038059247b3 100644
--- a/be/test/runtime/CMakeLists.txt
+++ b/be/test/runtime/CMakeLists.txt
@@ -26,8 +26,8 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/runtime")
 ADD_BE_TEST(mem_pool_test)
 ADD_BE_TEST(free_list_test)
 ADD_BE_TEST(string_buffer_test)
-# ADD_BE_TEST(data_stream_test)
-#ADD_BE_TEST(disk_io_mgr_test)
+#ADD_BE_TEST(data_stream_test)
+ADD_BE_TEST(disk_io_mgr_test)
 #ADD_BE_TEST(parallel_executor_test)
 ADD_BE_TEST(datetime_value_test)
 ADD_BE_TEST(decimal_value_test)
@@ -43,10 +43,9 @@ ADD_BE_TEST(fragment_mgr_test)
 #ADD_BE_TEST(etl_job_mgr_test)
 
 # ADD_BE_TEST(tmp_file_mgr_test)
-# ADD_BE_TEST(disk_io_mgr_test)
-# ADD_BE_TEST(mem_limit_test)
-# ADD_BE_TEST(buffered_block_mgr2_test)
-# ADD_BE_TEST(buffered_tuple_stream2_test)
+ADD_BE_TEST(mem_limit_test)
+ADD_BE_TEST(buffered_block_mgr2_test)
+ADD_BE_TEST(buffered_tuple_stream2_test)
 ADD_BE_TEST(stream_load_pipe_test)
 ADD_BE_TEST(load_channel_mgr_test)
 #ADD_BE_TEST(export_task_mgr_test)
diff --git a/be/test/runtime/buffered_block_mgr2_test.cpp b/be/test/runtime/buffered_block_mgr2_test.cpp
index 5304b8e44df9a1..dc0e8b068ffb85 100644
--- a/be/test/runtime/buffered_block_mgr2_test.cpp
+++ b/be/test/runtime/buffered_block_mgr2_test.cpp
@@ -148,7 +148,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
     }
 
     BufferedBlockMgr2* CreateMgrAndClient(int64_t query_id, int max_buffers, int block_size,
-                                          int reserved_blocks, MemTracker* tracker,
+                                          int reserved_blocks, const std::shared_ptr& tracker,
                                           BufferedBlockMgr2::Client** client) {
         RuntimeState* state = NULL;
         BufferedBlockMgr2* mgr = CreateMgr(query_id, max_buffers, block_size, &state);
@@ -158,7 +158,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
     }
 
     void CreateMgrsAndClients(int64_t start_query_id, int num_mgrs, int buffers_per_mgr,
-                              int block_size, int reserved_blocks_per_client, MemTracker* tracker,
+                              int block_size, int reserved_blocks_per_client, const std::shared_ptr& tracker,
                               vector* mgrs,
                               vector* clients) {
         for (int i = 0; i < num_mgrs; ++i) {
@@ -269,7 +269,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
         int max_num_blocks = 5;
         BufferedBlockMgr2* block_mgr = NULL;
         BufferedBlockMgr2::Client* client;
-        block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker.get(),
+        block_mgr = CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker,
                                        &client);
         EXPECT_EQ(_test_env->block_mgr_parent_tracker()->consumption(), 0);
 
@@ -314,7 +314,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
         int max_num_buffers = 5;
         BufferedBlockMgr2* block_mgr = NULL;
         BufferedBlockMgr2::Client* client = NULL;
-        block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker.get(),
+        block_mgr = CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker,
                                        &client);
 
         // Check counters.
@@ -393,7 +393,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
         ApiFunction api_function;
 
         BufferedBlockMgr2::Client* client;
-        Status status = block_mgr->register_client(0, _client_tracker.get(), state, &client);
+        Status status = block_mgr->register_client(0, _client_tracker, state, &client);
         EXPECT_TRUE(status.ok());
         EXPECT_TRUE(client != NULL);
 
@@ -566,8 +566,7 @@ class BufferedBlockMgrTest : public ::testing::Test {
     }
 
     scoped_ptr _test_env;
-    // scoped_ptr _client_tracker;
-    scoped_ptr _client_tracker;
+    std::shared_ptr _client_tracker;
     vector _created_tmp_dirs;
 };
 
@@ -584,7 +583,7 @@ TEST_F(BufferedBlockMgrTest, GetNewBlockSmallBlocks) {
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
     block_mgr =
-            CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker, &client);
     EXPECT_EQ(0, _test_env->block_mgr_parent_tracker()->consumption());
 
     vector blocks;
@@ -645,7 +644,7 @@ TEST_F(BufferedBlockMgrTest, Pin) {
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
     block_mgr =
-            CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_blocks, block_size, 0, _client_tracker, &client);
 
     vector blocks;
     AllocateBlocks(block_mgr, client, max_num_blocks, &blocks);
@@ -700,7 +699,7 @@ TEST_F(BufferedBlockMgrTest, Deletion) {
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
     block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
 
     // Check counters.
     RuntimeProfile* profile = block_mgr->profile();
@@ -725,7 +724,7 @@ TEST_F(BufferedBlockMgrTest, DeleteSingleBlocks) {
     int max_num_buffers = 16;
     BufferedBlockMgr2::Client* client;
     BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
 
     // Pinned I/O block.
     BufferedBlockMgr2::Block* new_block;
@@ -777,7 +776,7 @@ TEST_F(BufferedBlockMgrTest, Close) {
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
     block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
 
     vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -819,7 +818,7 @@ TEST_F(BufferedBlockMgrTest, WriteError) {
     BufferedBlockMgr2* block_mgr;
     BufferedBlockMgr2::Client* client;
     block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, block_size, 0, _client_tracker, &client);
 
     vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -863,7 +862,7 @@ TEST_F(BufferedBlockMgrTest, TmpFileAllocateError) {
     int max_num_buffers = 2;
     BufferedBlockMgr2::Client* client;
     BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
 
     vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
@@ -897,7 +896,7 @@ TEST_F(BufferedBlockMgrTest, DISABLED_WriteErrorBlacklist) {
     int blocks_per_mgr = MAX_NUM_BLOCKS / NUM_BLOCK_MGRS;
     vector block_mgrs;
     vector clients;
-    CreateMgrsAndClients(0, NUM_BLOCK_MGRS, blocks_per_mgr, _block_size, 0, _client_tracker.get(),
+    CreateMgrsAndClients(0, NUM_BLOCK_MGRS, blocks_per_mgr, _block_size, 0, _client_tracker,
                          &block_mgrs, &clients);
 
     // Allocate files for all 2x2 combinations by unpinning blocks.
@@ -957,7 +956,7 @@ TEST_F(BufferedBlockMgrTest, DISABLED_WriteErrorBlacklist) {
     // A new block manager should only use the good dir for backing storage.
     BufferedBlockMgr2::Client* new_client;
     BufferedBlockMgr2* new_block_mgr = CreateMgrAndClient(9999, blocks_per_mgr, _block_size, 0,
-                                                          _client_tracker.get(), &new_client);
+                                                          _client_tracker, &new_client);
     vector new_mgr_blocks;
     AllocateBlocks(new_block_mgr, new_client, blocks_per_mgr, &new_mgr_blocks);
     UnpinBlocks(new_mgr_blocks);
@@ -980,7 +979,7 @@ TEST_F(BufferedBlockMgrTest, AllocationErrorHandling) {
     // vector runtime_states;
     vector block_mgrs;
     vector clients;
-    CreateMgrsAndClients(0, num_block_mgrs, blocks_per_mgr, _block_size, 0, _client_tracker.get(),
+    CreateMgrsAndClients(0, num_block_mgrs, blocks_per_mgr, _block_size, 0, _client_tracker,
                          &block_mgrs, &clients);
 
     // Allocate files for all 2x2 combinations by unpinning blocks.
@@ -1019,7 +1018,7 @@ TEST_F(BufferedBlockMgrTest, NoDirsAllocationError) {
     int max_num_buffers = 2;
     BufferedBlockMgr2::Client* client;
     BufferedBlockMgr2* block_mgr =
-            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker.get(), &client);
+            CreateMgrAndClient(0, max_num_buffers, _block_size, 0, _client_tracker, &client);
     vector blocks;
     AllocateBlocks(block_mgr, client, max_num_buffers, &blocks);
     for (int i = 0; i < tmp_dirs.size(); ++i) {
@@ -1043,11 +1042,11 @@ TEST_F(BufferedBlockMgrTest, MultipleClients) {
 
     BufferedBlockMgr2::Client* client1 = NULL;
     BufferedBlockMgr2::Client* client2 = NULL;
-    status = block_mgr->register_client(client1_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state,
                                         &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != NULL);
-    status = block_mgr->register_client(client2_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state,
                                         &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != NULL);
@@ -1156,11 +1155,11 @@ TEST_F(BufferedBlockMgrTest, MultipleClientsExtraBuffers) {
     BufferedBlockMgr2::Client* client1 = NULL;
     BufferedBlockMgr2::Client* client2 = NULL;
     BufferedBlockMgr2::Block* block = NULL;
-    status = block_mgr->register_client(client1_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state,
                                         &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != NULL);
-    status = block_mgr->register_client(client2_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state,
                                         &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != NULL);
@@ -1205,11 +1204,11 @@ TEST_F(BufferedBlockMgrTest, ClientOversubscription) {
     BufferedBlockMgr2::Client* client1 = NULL;
     BufferedBlockMgr2::Client* client2 = NULL;
     BufferedBlockMgr2::Block* block = NULL;
-    status = block_mgr->register_client(client1_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client1_buffers, _client_tracker, runtime_state,
                                         &client1);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client1 != NULL);
-    status = block_mgr->register_client(client2_buffers, _client_tracker.get(), runtime_state,
+    status = block_mgr->register_client(client2_buffers, _client_tracker, runtime_state,
                                         &client2);
     EXPECT_TRUE(status.ok());
     EXPECT_TRUE(client2 != NULL);
diff --git a/be/test/runtime/buffered_tuple_stream2_test.cpp b/be/test/runtime/buffered_tuple_stream2_test.cpp
index 8630ee1267c120..251e537624ec5e 100644
--- a/be/test/runtime/buffered_tuple_stream2_test.cpp
+++ b/be/test/runtime/buffered_tuple_stream2_test.cpp
@@ -64,14 +64,14 @@ static const int NUM_STRINGS = sizeof(STRINGS) / sizeof(StringValue);
 
 class SimpleTupleStreamTest : public testing::Test {
 public:
-    SimpleTupleStreamTest() : _tracker(-1) {}
+    SimpleTupleStreamTest() : _tracker(new MemTracker(-1)) {}
      // A null dtor to pass codestyle check
     ~SimpleTupleStreamTest() {}
 protected:
     virtual void SetUp() {
         _test_env.reset(new TestEnv());
         create_descriptors();
-        _mem_pool.reset(new MemPool(&_tracker));
+        _mem_pool.reset(new MemPool(_tracker.get()));
     }
 
     virtual void create_descriptors() {
@@ -102,7 +102,7 @@ class SimpleTupleStreamTest : public testing::Test {
     void InitBlockMgr(int64_t limit, int block_size) {
         Status status = _test_env->create_query_state(0, limit, block_size, &_runtime_state);
         ASSERT_TRUE(status.ok());
-        status = _runtime_state->block_mgr2()->register_client(0, &_tracker, _runtime_state,
+        status = _runtime_state->block_mgr2()->register_client(0, _tracker, _runtime_state,
                 &_client);
         ASSERT_TRUE(status.ok());
     }
@@ -120,7 +120,7 @@ class SimpleTupleStreamTest : public testing::Test {
     }
 
     virtual RowBatch* CreateIntBatch(int offset, int num_rows, bool gen_null) {
-        RowBatch* batch = _pool.add(new RowBatch(*_int_desc, num_rows, &_tracker));
+        RowBatch* batch = _pool.add(new RowBatch(*_int_desc, num_rows, _tracker.get()));
         int tuple_size = _int_desc->tuple_descriptors()[0]->byte_size();
         uint8_t* tuple_mem = reinterpret_cast(
                 batch->tuple_data_pool()->allocate(tuple_size * num_rows));
@@ -149,7 +149,7 @@ class SimpleTupleStreamTest : public testing::Test {
 
     virtual RowBatch* CreateStringBatch(int offset, int num_rows, bool gen_null) {
         int tuple_size = sizeof(StringValue) + 1;
-        RowBatch* batch = _pool.add(new RowBatch(*_string_desc, num_rows, &_tracker));
+        RowBatch* batch = _pool.add(new RowBatch(*_string_desc, num_rows, _tracker.get()));
         uint8_t* tuple_mem = batch->tuple_data_pool()->allocate(tuple_size * num_rows);
         memset(tuple_mem, 0, tuple_size * num_rows);
         const int string_tuples = _string_desc->tuple_descriptors().size();
@@ -212,7 +212,7 @@ class SimpleTupleStreamTest : public testing::Test {
     void ReadValues(BufferedTupleStream2* stream, RowDescriptor* desc, vector* results,
             int num_batches = -1) {
         bool eos = false;
-        RowBatch batch(*desc, BATCH_SIZE, &_tracker);
+        RowBatch batch(*desc, BATCH_SIZE, _tracker.get());
         int batches_read = 0;
         do {
             batch.reset();
@@ -357,7 +357,7 @@ class SimpleTupleStreamTest : public testing::Test {
     RuntimeState* _runtime_state;
     BufferedBlockMgr2::Client* _client;
 
-    MemTracker _tracker;
+    std::shared_ptr _tracker;
     ObjectPool _pool;
     RowDescriptor* _int_desc;
     RowDescriptor* _string_desc;
@@ -791,7 +791,7 @@ TEST_F(ArrayTupleStreamTest, TestArrayDeepCopy) {
     array_len_index = 0;
     bool eos = false;
     int rows_read = 0;
-    RowBatch batch(*_array_desc, BATCH_SIZE, &_tracker);
+    RowBatch batch(*_array_desc, BATCH_SIZE, _tracker.get());
     do {
         batch.reset();
         ASSERT_TRUE(stream.get_next(&batch, &eos).ok());
diff --git a/be/test/runtime/data_stream_test.cpp b/be/test/runtime/data_stream_test.cpp
index e2558f82380928..527d2fde354b86 100644
--- a/be/test/runtime/data_stream_test.cpp
+++ b/be/test/runtime/data_stream_test.cpp
@@ -123,8 +123,7 @@ class DorisTestBackend : public BackendServiceIf {
 class DataStreamTest : public testing::Test {
 protected:
     DataStreamTest()
-            : _limit(-1),
-              _dummy_mem_limit(-1),
+            : _limit(new MemTracker(-1)),
               _runtime_state(TUniqueId(), TQueryOptions(), "", &_exec_env),
               _next_val(0) {
         _exec_env.init_for_tests();
@@ -210,13 +209,11 @@ class DataStreamTest : public testing::Test {
     static const int NUM_BATCHES = TOTAL_DATA_SIZE / BATCH_CAPACITY / PER_ROW_DATA;
 
     ObjectPool _obj_pool;
-    MemTracker _limit;
-    MemTracker _tracker;
+    std::shared_ptr _limit;
+    std::shared_ptr _tracker;
     DescriptorTbl* _desc_tbl;
     const RowDescriptor* _row_desc;
     TupleRowComparator* _less_than;
-    MemTracker _dummy_mem_limit;
-    MemTracker _dummy_mem_tracker;
     ExecEnv _exec_env;
     RuntimeState _runtime_state;
     TUniqueId _next_instance_id;
@@ -336,8 +333,8 @@ class DataStreamTest : public testing::Test {
         SlotRef* rhs_slot = _obj_pool.add(new SlotRef(expr_node));
         _rhs_slot_ctx = _obj_pool.add(new ExprContext(rhs_slot));
 
-        _lhs_slot_ctx->prepare(&_runtime_state, *_row_desc, &_tracker);
-        _rhs_slot_ctx->prepare(&_runtime_state, *_row_desc, &_tracker);
+        _lhs_slot_ctx->prepare(&_runtime_state, *_row_desc, _tracker.get());
+        _rhs_slot_ctx->prepare(&_runtime_state, *_row_desc, _tracker.get());
         _lhs_slot_ctx->open(NULL);
         _rhs_slot_ctx->open(NULL);
         SortExecExprs* sort_exprs = _obj_pool.add(new SortExecExprs());
@@ -349,7 +346,7 @@ class DataStreamTest : public testing::Test {
 
     // Create _batch, but don't fill it with data yet. Assumes we created _row_desc.
     RowBatch* create_row_batch() {
-        RowBatch* batch = new RowBatch(*_row_desc, BATCH_CAPACITY, &_limit);
+        RowBatch* batch = new RowBatch(*_row_desc, BATCH_CAPACITY, _limit.get());
         int64_t* tuple_mem =
                 reinterpret_cast(batch->tuple_data_pool()->allocate(BATCH_CAPACITY * 8));
         bzero(tuple_mem, BATCH_CAPACITY * 8);
@@ -436,8 +433,7 @@ class DataStreamTest : public testing::Test {
         if (info->status.is_cancelled()) {
             return;
         }
-        // RowBatch batch(*_row_desc, 1024, &_tracker);
-        RowBatch batch(*_row_desc, 1024, &_limit);
+        RowBatch batch(*_row_desc, 1024, _limit.get());
         VLOG_QUERY << "start reading merging";
         bool eos = false;
         while (!(info->status = info->stream_recvr->get_next(&batch, &eos)).is_cancelled()) {
diff --git a/be/test/runtime/disk_io_mgr_test.cpp b/be/test/runtime/disk_io_mgr_test.cpp
index e23b18c484098e..b6f99eff2031d6 100644
--- a/be/test/runtime/disk_io_mgr_test.cpp
+++ b/be/test/runtime/disk_io_mgr_test.cpp
@@ -187,8 +187,7 @@ class DiskIoMgrTest : public testing::Test {
 // by reading the data back via a separate IoMgr instance. All writes are expected to
 // complete successfully.
 TEST_F(DiskIoMgrTest, SingleWriter) {
-    // MemTracker mem_tracker(LARGE_MEM_LIMIT);
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/disk_io_mgr_test.txt";
     int num_ranges = 100;
@@ -202,21 +201,20 @@ TEST_F(DiskIoMgrTest, SingleWriter) {
     }
 
     scoped_ptr read_io_mgr(new DiskIoMgr(1, 1, 1, 10));
-    // MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
-    MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
-    Status status = read_io_mgr->init(&reader_mem_tracker);
+    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
+    Status status = read_io_mgr->init(reader_mem_tracker);
     ASSERT_TRUE(status.ok());
     DiskIoMgr::RequestContext* reader;
-    status = read_io_mgr->register_context(&reader, &reader_mem_tracker);
+    status = read_io_mgr->register_context(&reader, reader_mem_tracker);
     ASSERT_TRUE(status.ok());
     for (int num_threads_per_disk = 1; num_threads_per_disk <= 5; ++num_threads_per_disk) {
         for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
             _pool.reset(new ObjectPool);
             DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 10);
-            status = io_mgr.init(&mem_tracker);
+            status = io_mgr.init(mem_tracker);
             ASSERT_TRUE(status.ok());
             DiskIoMgr::RequestContext* writer;
-            io_mgr.register_context(&writer, &mem_tracker);
+            io_mgr.register_context(&writer, mem_tracker);
             for (int i = 0; i < num_ranges; ++i) {
                 int32_t* data = _pool->add(new int32_t);
                 *data = rand();
@@ -250,11 +248,11 @@ TEST_F(DiskIoMgrTest, SingleWriter) {
 // Perform invalid writes (e.g. non-existent file, negative offset) and validate
 // that an error status is returned via the write callback.
 TEST_F(DiskIoMgrTest, InvalidWrite) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/non-existent.txt";
     DiskIoMgr io_mgr(1, 1, 1, 10);
-    Status status = io_mgr.init(&mem_tracker);
+    Status status = io_mgr.init(mem_tracker);
     ASSERT_TRUE(status.ok());
     DiskIoMgr::RequestContext* writer;
     status = io_mgr.register_context(&writer);
@@ -306,7 +304,7 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
 // add_write_range() is expected to succeed before the cancel and fail after it.
 // The writes themselves may finish with status cancelled or ok.
 TEST_F(DiskIoMgrTest, SingleWriterCancel) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     _num_ranges_written = 0;
     string tmp_file = "/tmp/disk_io_mgr_test.txt";
     int num_ranges = 100;
@@ -321,19 +319,19 @@ TEST_F(DiskIoMgrTest, SingleWriterCancel) {
     }
 
     scoped_ptr read_io_mgr(new DiskIoMgr(1, 1, 1, 10));
-    MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
-    Status status = read_io_mgr->init(&reader_mem_tracker);
+    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
+    Status status = read_io_mgr->init(reader_mem_tracker);
     ASSERT_TRUE(status.ok());
     DiskIoMgr::RequestContext* reader;
-    status = read_io_mgr->register_context(&reader, &reader_mem_tracker);
+    status = read_io_mgr->register_context(&reader, reader_mem_tracker);
     ASSERT_TRUE(status.ok());
     for (int num_threads_per_disk = 1; num_threads_per_disk <= 5; ++num_threads_per_disk) {
         for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
             _pool.reset(new ObjectPool);
             DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 10);
-            status = io_mgr.init(&mem_tracker);
+            status = io_mgr.init(mem_tracker);
             DiskIoMgr::RequestContext* writer;
-            io_mgr.register_context(&writer, &mem_tracker);
+            io_mgr.register_context(&writer, mem_tracker);
             Status validate_status = Status::OK();
             for (int i = 0; i < num_ranges; ++i) {
                 if (i == num_ranges_before_cancel) {
@@ -373,7 +371,7 @@ TEST_F(DiskIoMgrTest, SingleWriterCancel) {
 // Basic test with a single reader, testing multiple threads, disks and a different
 // number of buffers.
 TEST_F(DiskIoMgrTest, SingleReader) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -398,11 +396,11 @@ TEST_F(DiskIoMgrTest, SingleReader) {
                     }
                     DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                    Status status = io_mgr.init(&mem_tracker);
+                    Status status = io_mgr.init(mem_tracker);
                     ASSERT_TRUE(status.ok());
-                    MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+                    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                     DiskIoMgr::RequestContext* reader;
-                    status = io_mgr.register_context(&reader, &reader_mem_tracker);
+                    status = io_mgr.register_context(&reader, reader_mem_tracker);
                     ASSERT_TRUE(status.ok());
 
                     vector ranges;
@@ -424,17 +422,17 @@ TEST_F(DiskIoMgrTest, SingleReader) {
 
                     EXPECT_EQ(num_ranges_processed, ranges.size());
                     io_mgr.unregister_context(reader);
-                    EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+                    EXPECT_EQ(reader_mem_tracker->consumption(), 0);
                 }
             }
         }
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 // This test issues adding additional scan ranges while there are some still in flight.
 TEST_F(DiskIoMgrTest, AddScanRangeTest) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -455,11 +453,11 @@ TEST_F(DiskIoMgrTest, AddScanRangeTest) {
                 if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
                 DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                Status status = io_mgr.init(&mem_tracker);
+                Status status = io_mgr.init(mem_tracker);
                 ASSERT_TRUE(status.ok());
-                MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, &reader_mem_tracker);
+                status = io_mgr.register_context(&reader, reader_mem_tracker);
                 ASSERT_TRUE(status.ok());
 
                 vector ranges_first_half;
@@ -499,18 +497,18 @@ TEST_F(DiskIoMgrTest, AddScanRangeTest) {
                 threads.join_all();
                 EXPECT_EQ(num_ranges_processed, len);
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 // Test to make sure that sync reads and async reads work together
 // Note: this test is constructed so the number of buffers is greater than the
 // number of scan ranges.
 TEST_F(DiskIoMgrTest, SyncReadTest) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -534,11 +532,11 @@ TEST_F(DiskIoMgrTest, SyncReadTest) {
                 DiskIoMgr io_mgr(
                         num_disks, num_threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-                Status status = io_mgr.init(&mem_tracker);
+                Status status = io_mgr.init(mem_tracker);
                 ASSERT_TRUE(status.ok());
-                MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, &reader_mem_tracker);
+                status = io_mgr.register_context(&reader, reader_mem_tracker);
                 ASSERT_TRUE(status.ok());
 
                 DiskIoMgr::ScanRange* complete_range = init_range(1, tmp_file, 0, strlen(data), 0,
@@ -577,16 +575,16 @@ TEST_F(DiskIoMgrTest, SyncReadTest) {
 
                 EXPECT_EQ(num_ranges_processed, ranges.size());
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 // Tests a single reader cancelling half way through scan ranges.
 TEST_F(DiskIoMgrTest, SingleReaderCancel) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -607,11 +605,11 @@ TEST_F(DiskIoMgrTest, SingleReaderCancel) {
                 if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
                 DiskIoMgr io_mgr(num_disks, num_threads_per_disk, 1, 1);
 
-                Status status = io_mgr.init(&mem_tracker);
+                Status status = io_mgr.init(mem_tracker);
                 ASSERT_TRUE(status.ok());
-                MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+                std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
                 DiskIoMgr::RequestContext* reader;
-                status = io_mgr.register_context(&reader, &reader_mem_tracker);
+                status = io_mgr.register_context(&reader, reader_mem_tracker);
                 ASSERT_TRUE(status.ok());
 
                 vector ranges;
@@ -645,11 +643,11 @@ TEST_F(DiskIoMgrTest, SingleReaderCancel) {
                 threads.join_all();
                 EXPECT_TRUE(io_mgr.context_status(reader).is_cancelled());
                 io_mgr.unregister_context(reader);
-                EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+                EXPECT_EQ(reader_mem_tracker->consumption(), 0);
             }
         }
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 // Test when the reader goes over the mem limit
@@ -674,14 +672,14 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
             LOG(ERROR) << "Starting iteration " << iters;
         }
 
-        MemTracker mem_tracker(mem_limit_num_buffers * MAX_BUFFER_SIZE);
+        std::shared_ptr mem_tracker(new MemTracker(mem_limit_num_buffers * MAX_BUFFER_SIZE));
         DiskIoMgr io_mgr(1, 1, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-        Status status = io_mgr.init(&mem_tracker);
+        Status status = io_mgr.init(mem_tracker);
         ASSERT_TRUE(status.ok());
-        MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+        std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
         DiskIoMgr::RequestContext* reader;
-        status = io_mgr.register_context(&reader, &reader_mem_tracker);
+        status = io_mgr.register_context(&reader, reader_mem_tracker);
         ASSERT_TRUE(status.ok());
 
         vector ranges;
@@ -727,7 +725,7 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
 
         EXPECT_TRUE(io_mgr.context_status(reader).is_mem_limit_exceeded());
         io_mgr.unregister_context(reader);
-        EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+        EXPECT_EQ(reader_mem_tracker->consumption(), 0);
     }
 }
 #if 0
@@ -736,7 +734,7 @@ TEST_F(DiskIoMgrTest, MemTrackers) {
 // only tests the fallback mechanism.
 // TODO: we can fake the cached read path without HDFS
 TEST_F(DiskIoMgrTest, CachedReads) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "abcdefghijklm";
     int len = strlen(data);
@@ -755,11 +753,11 @@ TEST_F(DiskIoMgrTest, CachedReads) {
         if (++iters % 5000 == 0) LOG(ERROR) << "Starting iteration " << iters;
         DiskIoMgr io_mgr(num_disks, 1, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
 
-        Status status = io_mgr.init(&mem_tracker);
+        Status status = io_mgr.init(mem_tracker);
         ASSERT_TRUE(status.ok());
-        MemTracker reader_mem_tracker;
+        std::shared_ptr reader_mem_tracker(new MemTracker());
         DiskIoMgr::RequestContext* reader;
-        status = io_mgr.register_context(&reader, &reader_mem_tracker);
+        status = io_mgr.register_context(&reader, reader_mem_tracker);
         ASSERT_TRUE(status.ok());
 
         DiskIoMgr::ScanRange* complete_range =
@@ -798,14 +796,14 @@ TEST_F(DiskIoMgrTest, CachedReads) {
 
         EXPECT_EQ(num_ranges_processed, ranges.size());
         io_mgr.unregister_context(reader);
-        EXPECT_EQ(reader_mem_tracker.consumption(), 0);
+        EXPECT_EQ(reader_mem_tracker->consumption(), 0);
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 #endif // end #if 0
 
 TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const int ITERATIONS = 1;
     const char* data = "abcdefghijklmnopqrstuvwxyz";
     const int num_contexts = 5;
@@ -832,7 +830,7 @@ TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
         for (int threads_per_disk = 1; threads_per_disk <= 5; ++threads_per_disk) {
             for (int num_disks = 1; num_disks <= 5; num_disks += 2) {
                 DiskIoMgr io_mgr(num_disks, threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
-                io_mgr.init(&mem_tracker);
+                io_mgr.init(mem_tracker);
                 for (int file_index = 0; file_index < num_contexts; ++file_index) {
                     status = io_mgr.register_context(&contexts[file_index]);
                     ASSERT_TRUE(status.ok());
@@ -896,7 +894,7 @@ TEST_F(DiskIoMgrTest, MultipleReaderWriter) {
 
 // This test will test multiple concurrent reads each reading a different file.
 TEST_F(DiskIoMgrTest, MultipleReader) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const int NUM_READERS = 5;
     const int DATA_LEN = 50;
     const int ITERATIONS = 25;
@@ -950,7 +948,7 @@ TEST_F(DiskIoMgrTest, MultipleReader) {
                     if (++iters % 2500 == 0) LOG(ERROR) << "Starting iteration " << iters;
 
                     DiskIoMgr io_mgr(num_disks, threads_per_disk, MIN_BUFFER_SIZE, MAX_BUFFER_SIZE);
-                    Status status = io_mgr.init(&mem_tracker);
+                    Status status = io_mgr.init(mem_tracker);
                     ASSERT_TRUE(status.ok());
 
                     for (int i = 0; i < NUM_READERS; ++i) {
@@ -986,7 +984,7 @@ TEST_F(DiskIoMgrTest, MultipleReader) {
             }
         }
     }
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 #if 0
@@ -1004,12 +1002,12 @@ TEST_F(DiskIoMgrTest, Buffers) {
     // Test default min/max buffer size
     int min_buffer_size = 1024;
     int max_buffer_size = 8 * 1024 * 1024; // 8 MB
-    MemTracker mem_tracker(max_buffer_size * 2);
+    std::shared_ptr mem_tracker(new MemTracker(max_buffer_size * 2));
 
     DiskIoMgr io_mgr(1, 1, min_buffer_size, max_buffer_size);
-    Status status = io_mgr.init(&mem_tracker);
+    Status status = io_mgr.init(mem_tracker);
     ASSERT_TRUE(status.ok());
-    ASSERT_EQ(mem_tracker.consumption(), 0);
+    ASSERT_EQ(mem_tracker->consumption(), 0);
 
     // buffer length should be rounded up to min buffer size
     int64_t buffer_len = 1;
@@ -1017,7 +1015,7 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, min_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker.consumption(), min_buffer_size);
+    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size);
 
     // reuse buffer
     buffer_len = min_buffer_size;
@@ -1025,19 +1023,19 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, min_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker.consumption(), min_buffer_size);
+    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size);
 
     // bump up to next buffer size
     buffer_len = min_buffer_size + 1;
     buf = io_mgr.get_free_buffer(&buffer_len);
     EXPECT_EQ(buffer_len, min_buffer_size * 2);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 2);
-    EXPECT_EQ(mem_tracker.consumption(), min_buffer_size * 3);
+    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 3);
 
     // gc unused buffer
     io_mgr.gc_io_buffers();
     EXPECT_EQ(io_mgr._num_allocated_buffers, 1);
-    EXPECT_EQ(mem_tracker.consumption(), min_buffer_size * 2);
+    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 2);
 
     io_mgr.return_free_buffer(buf, buffer_len);
 
@@ -1047,17 +1045,17 @@ TEST_F(DiskIoMgrTest, Buffers) {
     EXPECT_EQ(buffer_len, max_buffer_size);
     EXPECT_EQ(io_mgr._num_allocated_buffers, 2);
     io_mgr.return_free_buffer(buf, buffer_len);
-    EXPECT_EQ(mem_tracker.consumption(), min_buffer_size * 2 + max_buffer_size);
+    EXPECT_EQ(mem_tracker->consumption(), min_buffer_size * 2 + max_buffer_size);
 
     // gc buffers
     io_mgr.gc_io_buffers();
     EXPECT_EQ(io_mgr._num_allocated_buffers, 0);
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 // IMPALA-2366: handle partial read where range goes past end of file.
 TEST_F(DiskIoMgrTest, PartialRead) {
-    MemTracker mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     const char* tmp_file = "/tmp/disk_io_mgr_test.txt";
     const char* data = "the quick brown fox jumped over the lazy dog";
     int len = strlen(data);
@@ -1071,11 +1069,11 @@ TEST_F(DiskIoMgrTest, PartialRead) {
     _pool.reset(new ObjectPool);
     scoped_ptr io_mgr(new DiskIoMgr(1, 1, read_len, read_len));
 
-    Status status = io_mgr->init(&mem_tracker);
+    Status status = io_mgr->init(mem_tracker);
     ASSERT_TRUE(status.ok());
-    MemTracker reader_mem_tracker(LARGE_MEM_LIMIT);
+    std::shared_ptr reader_mem_tracker(new MemTracker(LARGE_MEM_LIMIT));
     DiskIoMgr::RequestContext* reader;
-    status = io_mgr->register_context(&reader, &reader_mem_tracker);
+    status = io_mgr->register_context(&reader, reader_mem_tracker);
     ASSERT_TRUE(status.ok());
 
     // We should not read past the end of file.
@@ -1091,8 +1089,8 @@ TEST_F(DiskIoMgrTest, PartialRead) {
     io_mgr->unregister_context(reader);
     _pool.reset();
     io_mgr.reset();
-    EXPECT_EQ(reader_mem_tracker.consumption(), 0);
-    EXPECT_EQ(mem_tracker.consumption(), 0);
+    EXPECT_EQ(reader_mem_tracker->consumption(), 0);
+    EXPECT_EQ(mem_tracker->consumption(), 0);
 }
 
 } // end namespace doris
diff --git a/be/test/runtime/load_channel_mgr_test.cpp b/be/test/runtime/load_channel_mgr_test.cpp
index bda51c49cf1803..386f44a7931c5c 100644
--- a/be/test/runtime/load_channel_mgr_test.cpp
+++ b/be/test/runtime/load_channel_mgr_test.cpp
@@ -45,7 +45,7 @@ OLAPStatus close_status;
 int64_t wait_lock_time_ns;
 
 // mock
-DeltaWriter::DeltaWriter(WriteRequest* req, MemTracker* mem_tracker,
+DeltaWriter::DeltaWriter(WriteRequest* req, std::shared_ptr mem_tracker,
                          StorageEngine* storage_engine) :
         _req(*req) {
 }
@@ -57,7 +57,7 @@ OLAPStatus DeltaWriter::init() {
     return OLAP_SUCCESS;
 }
 
-OLAPStatus DeltaWriter::open(WriteRequest* req, MemTracker* mem_tracker, DeltaWriter** writer) {
+OLAPStatus DeltaWriter::open(WriteRequest* req, std::shared_ptr mem_tracker, DeltaWriter** writer) {
     if (open_status != OLAP_SUCCESS) {
         return open_status;
     }
@@ -173,7 +173,7 @@ TEST_F(LoadChannelMgrTest, normal) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -208,7 +208,7 @@ TEST_F(LoadChannelMgrTest, normal) {
         request.add_tablet_ids(21);
         request.add_tablet_ids(20);
 
-        RowBatch row_batch(row_desc, 1024, &tracker);
+        RowBatch row_batch(row_desc, 1024, tracker.get());
 
         // row1
         {
@@ -261,7 +261,7 @@ TEST_F(LoadChannelMgrTest, cancel) {
     DescriptorTbl* desc_tbl = nullptr;
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -304,7 +304,7 @@ TEST_F(LoadChannelMgrTest, open_failed) {
     DescriptorTbl* desc_tbl = nullptr;
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -339,7 +339,7 @@ TEST_F(LoadChannelMgrTest, add_failed) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -374,7 +374,7 @@ TEST_F(LoadChannelMgrTest, add_failed) {
         request.add_tablet_ids(21);
         request.add_tablet_ids(20);
 
-        RowBatch row_batch(row_desc, 1024, &tracker);
+        RowBatch row_batch(row_desc, 1024, tracker.get());
 
         // row1
         {
@@ -426,7 +426,7 @@ TEST_F(LoadChannelMgrTest, close_failed) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -464,7 +464,7 @@ TEST_F(LoadChannelMgrTest, close_failed) {
         request.add_partition_ids(10);
         request.add_partition_ids(11);
 
-        RowBatch row_batch(row_desc, 1024, &tracker);
+        RowBatch row_batch(row_desc, 1024, tracker.get());
 
         // row1
         {
@@ -518,7 +518,7 @@ TEST_F(LoadChannelMgrTest, unknown_tablet) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -553,7 +553,7 @@ TEST_F(LoadChannelMgrTest, unknown_tablet) {
         request.add_tablet_ids(22);
         request.add_tablet_ids(20);
 
-        RowBatch row_batch(row_desc, 1024, &tracker);
+        RowBatch row_batch(row_desc, 1024, tracker.get());
 
         // row1
         {
@@ -604,7 +604,7 @@ TEST_F(LoadChannelMgrTest, duplicate_packet) {
     DescriptorTbl::create(&obj_pool, tdesc_tbl, &desc_tbl);
     auto tuple_desc = desc_tbl->get_tuple_descriptor(0);
     RowDescriptor row_desc(*desc_tbl, {0}, {false});
-    MemTracker tracker;
+    auto tracker = std::make_shared();
     PUniqueId load_id;
     load_id.set_hi(2);
     load_id.set_lo(3);
@@ -639,7 +639,7 @@ TEST_F(LoadChannelMgrTest, duplicate_packet) {
         request.add_tablet_ids(21);
         request.add_tablet_ids(20);
 
-        RowBatch row_batch(row_desc, 1024, &tracker);
+        RowBatch row_batch(row_desc, 1024, tracker.get());
 
         // row1
         {
diff --git a/be/test/runtime/mem_limit_test.cpp b/be/test/runtime/mem_limit_test.cpp
index bd1a9a4b260723..0af067e5ea377a 100644
--- a/be/test/runtime/mem_limit_test.cpp
+++ b/be/test/runtime/mem_limit_test.cpp
@@ -27,27 +27,27 @@ namespace doris {
 TEST(MemTestTest, SingleTrackerNoLimit) {
     MemTracker t(-1);
     EXPECT_FALSE(t.has_limit());
-    t.consume(10);
+    t.Consume(10);
     EXPECT_EQ(t.consumption(), 10);
-    t.consume(10);
+    t.Consume(10);
     EXPECT_EQ(t.consumption(), 20);
-    t.release(15);
+    t.Release(15);
     EXPECT_EQ(t.consumption(), 5);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 }
 
 TEST(MemTestTest, SingleTrackerWithLimit) {
     MemTracker t(11);
     EXPECT_TRUE(t.has_limit());
-    t.consume(10);
+    t.Consume(10);
     EXPECT_EQ(t.consumption(), 10);
-    EXPECT_FALSE(t.limit_exceeded());
-    t.consume(10);
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
+    t.Consume(10);
     EXPECT_EQ(t.consumption(), 20);
-    EXPECT_TRUE(t.limit_exceeded());
-    t.release(15);
+    EXPECT_TRUE(t.LimitExceeded(MemLimit::HARD));
+    t.Release(15);
     EXPECT_EQ(t.consumption(), 5);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 }
 
 #if 0
@@ -63,129 +63,129 @@ TEST(MemTestTest, ConsumptionMetric) {
     EXPECT_TRUE(t.has_limit());
     EXPECT_EQ(t.consumption(), 0);
 
-    // consume()/release() arguments have no effect
-    t.consume(150);
+    // Consume()/Release() arguments have no effect
+    t.Consume(150);
     EXPECT_EQ(t.consumption(), 0);
     EXPECT_EQ(t.peak_consumption(), 0);
-    EXPECT_FALSE(t.limit_exceeded());
-    t.release(5);
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
+    t.Release(5);
     EXPECT_EQ(t.consumption(), 0);
     EXPECT_EQ(t.peak_consumption(), 0);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
     metric.Increment(10);
     // _consumption is only updated with _consumption_metric after calls to
-    // consume()/release() with a non-zero value
-    t.consume(1);
+    // Consume()/Release() with a non-zero value
+    t.Consume(1);
     EXPECT_EQ(t.consumption(), 10);
     EXPECT_EQ(t.peak_consumption(), 10);
     metric.Increment(-5);
-    t.consume(-1);
+    t.Consume(-1);
     EXPECT_EQ(t.consumption(), 5);
     EXPECT_EQ(t.peak_consumption(), 10);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
     metric.Increment(150);
-    t.consume(1);
+    t.Consume(1);
     EXPECT_EQ(t.consumption(), 155);
     EXPECT_EQ(t.peak_consumption(), 155);
-    EXPECT_TRUE(t.limit_exceeded());
+    EXPECT_TRUE(t.LimitExceeded(MemLimit::HARD));
     metric.Increment(-150);
-    t.consume(-1);
+    t.Consume(-1);
     EXPECT_EQ(t.consumption(), 5);
     EXPECT_EQ(t.peak_consumption(), 155);
-    EXPECT_FALSE(t.limit_exceeded());
-    // _consumption is not updated when consume()/release() is called with a zero value
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
+    // _consumption is not updated when Consume()/Release() is called with a zero value
     metric.Increment(10);
-    t.consume(0);
+    t.Consume(0);
     EXPECT_EQ(t.consumption(), 5);
     EXPECT_EQ(t.peak_consumption(), 155);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 }
 #endif // #end #if 0
 
 TEST(MemTestTest, TrackerHierarchy) {
-    MemTracker p(100);
-    MemTracker c1(80, "", &p);
-    MemTracker c2(50, "", &p);
+    auto p = std::make_shared(100);
+    auto c1= std::make_shared(80, "", p);
+    auto c2= std::make_shared(50, "", p);
 
     // everything below limits
-    c1.consume(60);
-    EXPECT_EQ(c1.consumption(), 60);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_FALSE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 0);
-    EXPECT_FALSE(c2.limit_exceeded());
-    EXPECT_FALSE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 60);
-    EXPECT_FALSE(p.limit_exceeded());
-    EXPECT_FALSE(p.any_limit_exceeded());
+    c1->Consume(60);
+    EXPECT_EQ(c1->consumption(), 60);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 0);
+    EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 60);
+    EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD));
 
     // p goes over limit
-    c2.consume(50);
-    EXPECT_EQ(c1.consumption(), 60);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_TRUE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 50);
-    EXPECT_FALSE(c2.limit_exceeded());
-    EXPECT_TRUE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 110);
-    EXPECT_TRUE(p.limit_exceeded());
+    c2->Consume(50);
+    EXPECT_EQ(c1->consumption(), 60);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_TRUE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 50);
+    EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_TRUE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 110);
+    EXPECT_TRUE(p->LimitExceeded(MemLimit::HARD));
 
     // c2 goes over limit, p drops below limit
-    c1.release(20);
-    c2.consume(10);
-    EXPECT_EQ(c1.consumption(), 40);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_FALSE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 60);
-    EXPECT_TRUE(c2.limit_exceeded());
-    EXPECT_TRUE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 100);
-    EXPECT_FALSE(p.limit_exceeded());
+    c1->Release(20);
+    c2->Consume(10);
+    EXPECT_EQ(c1->consumption(), 40);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 60);
+    EXPECT_TRUE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_TRUE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 100);
+    EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD));
 }
 
 TEST(MemTestTest, TrackerHierarchyTryConsume) {
-    MemTracker p(100);
-    MemTracker c1(80, "", &p);
-    MemTracker c2(50, "", &p);
+    auto p = std::make_shared(100);
+    auto c1= std::make_shared(80, "", p);
+    auto c2= std::make_shared(50, "", p);
 
     // everything below limits
-    bool consumption = c1.try_consume(60);
+    bool consumption = c1->TryConsume(60);
     EXPECT_EQ(consumption, true);
-    EXPECT_EQ(c1.consumption(), 60);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_FALSE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 0);
-    EXPECT_FALSE(c2.limit_exceeded());
-    EXPECT_FALSE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 60);
-    EXPECT_FALSE(p.limit_exceeded());
-    EXPECT_FALSE(p.any_limit_exceeded());
+    EXPECT_EQ(c1->consumption(), 60);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 0);
+    EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 60);
+    EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD));
 
     // p goes over limit
-    consumption = c2.try_consume(50);
+    consumption = c2->TryConsume(50);
     EXPECT_EQ(consumption, true);
-    EXPECT_EQ(c1.consumption(), 60);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_FALSE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 0);
-    EXPECT_FALSE(c2.limit_exceeded());
-    EXPECT_FALSE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 60);
-    EXPECT_FALSE(p.limit_exceeded());
-    EXPECT_FALSE(p.any_limit_exceeded());
+    EXPECT_EQ(c1->consumption(), 60);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 0);
+    EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 60);
+    EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(p->AnyLimitExceeded(MemLimit::HARD));
 
     // c2 goes over limit, p drops below limit
-    c1.release(20);
-    c2.consume(10);
-    EXPECT_EQ(c1.consumption(), 40);
-    EXPECT_FALSE(c1.limit_exceeded());
-    EXPECT_FALSE(c1.any_limit_exceeded());
-    EXPECT_EQ(c2.consumption(), 10);
-    EXPECT_FALSE(c2.limit_exceeded());
-    EXPECT_FALSE(c2.any_limit_exceeded());
-    EXPECT_EQ(p.consumption(), 50);
-    EXPECT_FALSE(p.limit_exceeded());
+    c1->Release(20);
+    c2->Consume(10);
+    EXPECT_EQ(c1->consumption(), 40);
+    EXPECT_FALSE(c1->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c1->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(c2->consumption(), 10);
+    EXPECT_FALSE(c2->LimitExceeded(MemLimit::HARD));
+    EXPECT_FALSE(c2->AnyLimitExceeded(MemLimit::HARD));
+    EXPECT_EQ(p->consumption(), 50);
+    EXPECT_FALSE(p->LimitExceeded(MemLimit::HARD));
 }
 
 #if 0
@@ -197,7 +197,7 @@ class GcFunctionHelper {
 
         ~GcFunctionHelper() {}
 
-        void gc_func() { _tracker->release(NUM_RELEASE_BYTES); }
+        void gc_func() { _tracker->Release(NUM_RELEASE_BYTES); }
 
     private:
         MemTracker* _tracker;
@@ -207,35 +207,35 @@ TEST(MemTestTest, GcFunctions) {
     MemTracker t(10);
     ASSERT_TRUE(t.has_limit());
 
-    t.consume(9);
-    EXPECT_FALSE(t.limit_exceeded());
+    t.Consume(9);
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
     // Test TryConsume()
     EXPECT_FALSE(t.TryConsume(2));
     EXPECT_EQ(t.consumption(), 9);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
     // Attach GcFunction that releases 1 byte
     GcFunctionHelper gc_func_helper(&t);
     t.AddGcFunction(boost::bind(&GcFunctionHelper::gc_func, &gc_func_helper));
     EXPECT_TRUE(t.TryConsume(2));
     EXPECT_EQ(t.consumption(), 10);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
     // GcFunction will be called even though TryConsume() fails
     EXPECT_FALSE(t.TryConsume(2));
     EXPECT_EQ(t.consumption(), 9);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
     // GcFunction won't be called
     EXPECT_TRUE(t.TryConsume(1));
     EXPECT_EQ(t.consumption(), 10);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
 
-    // Test limit_exceeded()
-    t.consume(1);
+    // Test LimitExceeded(MemLimit::HARD)
+    t.Consume(1);
     EXPECT_EQ(t.consumption(), 11);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
     EXPECT_EQ(t.consumption(), 10);
 
     // Add more GcFunctions, test that we only call them until the limit is no longer
@@ -244,9 +244,9 @@ TEST(MemTestTest, GcFunctions) {
     t.AddGcFunction(boost::bind(&GcFunctionHelper::gc_func, &gc_func_helper2));
     GcFunctionHelper gc_func_helper3(&t);
     t.AddGcFunction(boost::bind(&GcFunctionHelper::gc_func, &gc_func_helper3));
-    t.consume(1);
+    t.Consume(1);
     EXPECT_EQ(t.consumption(), 11);
-    EXPECT_FALSE(t.limit_exceeded());
+    EXPECT_FALSE(t.LimitExceeded(MemLimit::HARD));
     EXPECT_EQ(t.consumption(), 10);
 }
 #endif // enf #if 0
diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp
index eaf602bfc4958f..d02666bd323365 100644
--- a/be/test/runtime/memory_scratch_sink_test.cpp
+++ b/be/test/runtime/memory_scratch_sink_test.cpp
@@ -67,7 +67,6 @@ class MemoryScratchSinkTest : public testing::Test {
 
     ~MemoryScratchSinkTest() {
         delete _state;
-        delete _mem_tracker;
         delete _exec_env->_result_queue_mgr;
         delete _exec_env->_thread_mgr;
         delete _exec_env->_buffer_reservation;
@@ -102,7 +101,7 @@ class MemoryScratchSinkTest : public testing::Test {
     TPlanNode _tnode;
     RowDescriptor* _row_desc = nullptr;
     TMemoryScratchSink _tsink;
-    MemTracker *_mem_tracker = nullptr;
+    std::shared_ptr _mem_tracker = nullptr;
     DescriptorTbl* _desc_tbl = nullptr;
     std::vector _exprs;
 };
diff --git a/be/test/util/arrow/arrow_row_batch_test.cpp b/be/test/util/arrow/arrow_row_batch_test.cpp
index 0c8daa4452bc1f..29707eb6c02dd7 100644
--- a/be/test/util/arrow/arrow_row_batch_test.cpp
+++ b/be/test/util/arrow/arrow_row_batch_test.cpp
@@ -19,15 +19,15 @@
 
 #include 
 
-#include 
 #include 
+#include 
 
 #include "common/logging.h"
 
 #define ARROW_UTIL_LOGGING_H
+#include 
 #include 
 #include 
-#include 
 #include 
 
 #include "common/object_pool.h"
@@ -37,8 +37,7 @@
 
 namespace doris {
 
-class ArrowRowBatchTest : public testing::Test {
-};
+class ArrowRowBatchTest : public testing::Test {};
 
 std::string test_str() {
     return R"(
@@ -58,10 +57,9 @@ TEST_F(ArrowRowBatchTest, PrettyPrint) {
     std::shared_ptr buffer;
     MakeBuffer(test_str(), &buffer);
     arrow::json::ParseOptions parse_opts = arrow::json::ParseOptions::Defaults();
-    parse_opts.explicit_schema = arrow::schema(
-        {
-        arrow::field("c1", arrow::int64()),
-        });
+    parse_opts.explicit_schema = arrow::schema({
+            arrow::field("c1", arrow::int64()),
+    });
 
     std::shared_ptr record_batch;
     auto arrow_st = arrow::json::ParseOne(parse_opts, buffer, &record_batch);
@@ -71,9 +69,9 @@ TEST_F(ArrowRowBatchTest, PrettyPrint) {
     RowDescriptor* row_desc;
     auto doris_st = convert_to_row_desc(&obj_pool, *record_batch->schema(), &row_desc);
     ASSERT_TRUE(doris_st.ok());
-    MemTracker tracker;
+    auto tracker = std::make_shared(-1, "PrettyPrintTest");
     std::shared_ptr row_batch;
-    doris_st = convert_to_row_batch(*record_batch, *row_desc, &tracker, &row_batch);
+    doris_st = convert_to_row_batch(*record_batch, *row_desc, tracker, &row_batch);
     ASSERT_TRUE(doris_st.ok());
 
     {
@@ -90,7 +88,7 @@ TEST_F(ArrowRowBatchTest, PrettyPrint) {
     }
 }
 
-}
+} // namespace doris
 
 int main(int argc, char** argv) {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/be/test/util/arrow/arrow_row_block_test.cpp b/be/test/util/arrow/arrow_row_block_test.cpp
index 111a4778241f96..6efd5ac57bb118 100644
--- a/be/test/util/arrow/arrow_row_block_test.cpp
+++ b/be/test/util/arrow/arrow_row_block_test.cpp
@@ -23,24 +23,23 @@
 #include "common/logging.h"
 
 #define ARROW_UTIL_LOGGING_H
+#include 
 #include 
 #include 
-#include 
-#include 
 #include 
+#include 
 #include 
 
-#include "olap/tablet_schema_helper.h"
-#include "olap/schema.h"
 #include "olap/row_block2.h"
+#include "olap/schema.h"
+#include "olap/tablet_schema_helper.h"
 
 namespace doris {
 
 class ArrowRowBlockTest : public testing::Test {
 public:
-    ArrowRowBlockTest() { }
-    virtual ~ArrowRowBlockTest() {
-    }
+    ArrowRowBlockTest() {}
+    virtual ~ArrowRowBlockTest() {}
 };
 
 std::string test_str() {
@@ -61,10 +60,9 @@ TEST_F(ArrowRowBlockTest, Normal) {
     std::shared_ptr buffer;
     MakeBuffer(test_str(), &buffer);
     arrow::json::ParseOptions parse_opts = arrow::json::ParseOptions::Defaults();
-    parse_opts.explicit_schema = arrow::schema(
-        {
-        arrow::field("c1", arrow::int64()),
-        });
+    parse_opts.explicit_schema = arrow::schema({
+            arrow::field("c1", arrow::int64()),
+    });
 
     std::shared_ptr record_batch;
     auto arrow_st = arrow::json::ParseOne(parse_opts, buffer, &record_batch);
@@ -82,7 +80,6 @@ TEST_F(ArrowRowBlockTest, Normal) {
         std::shared_ptr check_schema;
         doris_st = convert_to_arrow_schema(*schema, &check_schema);
         ASSERT_TRUE(doris_st.ok());
-
         arrow::MemoryPool* pool = arrow::default_memory_pool();
         std::shared_ptr check_batch;
         doris_st = convert_to_arrow_batch(*row_block, check_schema, pool, &check_batch);
@@ -92,10 +89,9 @@ TEST_F(ArrowRowBlockTest, Normal) {
     }
 }
 
-}
+} // namespace doris
 
 int main(int argc, char** argv) {
     ::testing::InitGoogleTest(&argc, argv);
     return RUN_ALL_TESTS();
 }
-
diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp
index fb6f814c8ad272..ea63cd0fd263cb 100644
--- a/be/test/util/arrow/arrow_work_flow_test.cpp
+++ b/be/test/util/arrow/arrow_work_flow_test.cpp
@@ -17,13 +17,13 @@
 
 #include "exec/csv_scan_node.h"
 
-#include 
-#include 
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
+#include 
+#include 
 
 #include "common/logging.h"
 #include "gen_cpp/PlanNodes_types.h"
@@ -33,22 +33,22 @@
 #include "runtime/exec_env.h"
 #include "runtime/mem_tracker.h"
 #include "runtime/result_queue_mgr.h"
-#include "runtime/thread_resource_mgr.h"
 #include "runtime/row_batch.h"
 #include "runtime/runtime_state.h"
+#include "runtime/thread_resource_mgr.h"
 #include "runtime/tuple_row.h"
 #include "util/arrow/row_batch.h"
+#include "util/cpu_info.h"
 #include "util/debug_util.h"
 #include "util/disk_info.h"
-#include "util/cpu_info.h"
 #include "util/logging.h"
 
 namespace doris {
 
 class ArrowWorkFlowTest : public testing::Test {
 public:
-    ArrowWorkFlowTest(){}
-    ~ArrowWorkFlowTest(){}
+    ArrowWorkFlowTest() {}
+    ~ArrowWorkFlowTest() {}
 
 protected:
     virtual void SetUp() {
@@ -66,7 +66,6 @@ class ArrowWorkFlowTest : public testing::Test {
         system("rm -rf ./test_run");
 
         delete _state;
-        delete _mem_tracker;
     }
 
     void init();
@@ -80,7 +79,7 @@ class ArrowWorkFlowTest : public testing::Test {
     TPlanNode _tnode;
     ExecEnv* _exec_env = nullptr;
     RuntimeState* _state = nullptr;
-    MemTracker *_mem_tracker = nullptr;
+    std::shared_ptr _mem_tracker;
 }; // end class ArrowWorkFlowTest
 
 void ArrowWorkFlowTest::init() {
@@ -100,7 +99,7 @@ void ArrowWorkFlowTest::init_runtime_state() {
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env);
     _state->init_instance_mem_tracker();
-    _mem_tracker = new MemTracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker());
+    _mem_tracker.reset(new MemTracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker()));
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());
@@ -318,7 +317,6 @@ void ArrowWorkFlowTest::init_desc_tbl() {
     _tnode.csv_scan_node.__isset.default_values = true;
     _tnode.csv_scan_node.max_filter_ratio = 0.5;
     _tnode.__isset.csv_scan_node = true;
-
 }
 
 TEST_F(ArrowWorkFlowTest, NormalUse) {
@@ -333,7 +331,7 @@ TEST_F(ArrowWorkFlowTest, NormalUse) {
     status = scan_node.open(_state);
     ASSERT_TRUE(status.ok());
 
-    std::unique_ptr mem_tracker(new MemTracker(-1));
+    auto mem_tracker = std::make_shared(-1);
     RowBatch row_batch(scan_node._row_descriptor, _state->batch_size(), mem_tracker.get());
     bool eos = false;
 
@@ -347,7 +345,8 @@ TEST_F(ArrowWorkFlowTest, NormalUse) {
         status = convert_to_arrow_schema(scan_node._row_descriptor, &schema);
         ASSERT_TRUE(status.ok());
         std::shared_ptr record_batch;
-        status = convert_to_arrow_batch(row_batch, schema, arrow::default_memory_pool(), &record_batch);
+        status = convert_to_arrow_batch(row_batch, schema, arrow::default_memory_pool(),
+                                        &record_batch);
         ASSERT_TRUE(status.ok());
         ASSERT_EQ(6, record_batch->num_rows());
         ASSERT_EQ(6, record_batch->num_columns());

From 360deb36b9ed6fb570ca797fa8d6995061ea34ee Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Mon, 20 Jul 2020 16:16:21 +0800
Subject: [PATCH 02/12] [Need revert] temp fix ut

---
 be/test/olap/schema_change_test.cpp | 2 +-
 run-ut.sh                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/be/test/olap/schema_change_test.cpp b/be/test/olap/schema_change_test.cpp
index 3329b89b06a310..8367fa8bbaf653 100644
--- a/be/test/olap/schema_change_test.cpp
+++ b/be/test/olap/schema_change_test.cpp
@@ -875,7 +875,7 @@ TEST_F(TestColumn, ConvertCharToHLL) {
     mutable_block.get_row(0, &mv_row_cursor);
 
     auto dst_slice = reinterpret_cast(mv_row_cursor.cell_ptr(1));
-    HyperLogLog hll(dst_slice->data);
+    HyperLogLog hll(*dst_slice);
     ASSERT_EQ(hll.estimate_cardinality(), 1);
 }
 }
diff --git a/run-ut.sh b/run-ut.sh
index f657058d8b1f2c..f889b7907d1f0f 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -25,7 +25,7 @@ export DORIS_HOME=${ROOT}
 
 . ${DORIS_HOME}/env.sh
 
-PARALLEL=$[$(nproc)/4+1]
+PARALLEL=32 #$[$(nproc)/4+1]
 
 # Check args
 usage() {

From 6baf8cbbca10c22c98ca7a3fa8b17c559f54ec6c Mon Sep 17 00:00:00 2001
From: Yingchun Lai <405403881@qq.com>
Date: Wed, 24 Jun 2020 11:56:54 +0800
Subject: [PATCH 03/12] [cant run]rm register module of mem tracker

---
 be/src/runtime/buffered_block_mgr2.cc     |  1 -
 be/src/runtime/mem_tracker.cpp            | 62 ++++++++++++++++-------
 be/src/runtime/mem_tracker.h              | 20 ++------
 be/src/runtime/mysql_table_sink.cpp       |  3 +-
 be/src/runtime/mysql_table_sink.h         |  2 +-
 be/src/runtime/plan_fragment_executor.cpp |  5 --
 6 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc
index 3d634a23817f6c..3edd4bab015c58 100644
--- a/be/src/runtime/buffered_block_mgr2.cc
+++ b/be/src/runtime/buffered_block_mgr2.cc
@@ -590,7 +590,6 @@ BufferedBlockMgr2::~BufferedBlockMgr2() {
         delete[] buffer->buffer;
     }
     DCHECK_EQ(_mem_tracker->consumption(), 0);
-    _mem_tracker->unregister_from_parent();
     _mem_tracker.reset();
 }
 
diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index 8b340287c13f50..6dab5104ad3fc8 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -37,11 +37,14 @@
 #include "util/uid_util.h"
 #include "util/stack_util.h"
 
+using std::deque;
 using std::endl;
 using std::greater;
 using std::pair;
 using std::priority_queue;
+using std::shared_ptr;
 using std::string;
+using std::vector;
 using strings::Substitute;
 
 namespace doris {
@@ -67,22 +70,21 @@ void MemTracker::CreateRootTracker() {
   root_tracker->Init();
 }
 
-MemTracker::MemTracker(
-    int64_t byte_limit, const string& label, const std::shared_ptr& parent, bool auto_unregister, bool log_usage_if_zero)
-  : limit_(byte_limit),
-    soft_limit_(CalcSoftLimit(byte_limit)),
-    label_(label),
-    parent_(parent),
-    consumption_(&local_counter_),
-    local_counter_(TUnit::BYTES),
-    consumption_metric_(nullptr),
-    log_usage_if_zero_(log_usage_if_zero),
-    num_gcs_metric_(nullptr),
-    bytes_freed_by_last_gc_metric_(nullptr),
-    bytes_over_limit_metric_(nullptr),
-    limit_metric_(nullptr),
-    auto_unregister_(auto_unregister) {
-  Init();
+MemTracker::MemTracker(int64_t byte_limit, const string& label,
+                       const std::shared_ptr& parent, bool log_usage_if_zero)
+        : limit_(byte_limit),
+          soft_limit_(CalcSoftLimit(byte_limit)),
+          label_(label),
+          parent_(parent),
+          consumption_(&local_counter_),
+          local_counter_(TUnit::BYTES),
+          consumption_metric_(nullptr),
+          log_usage_if_zero_(log_usage_if_zero),
+          num_gcs_metric_(nullptr),
+          bytes_freed_by_last_gc_metric_(nullptr),
+          bytes_over_limit_metric_(nullptr),
+          limit_metric_(nullptr) {
+    Init();
 }
 
 MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
@@ -219,8 +221,32 @@ MemTracker::~MemTracker() {
     DCHECK(consumption() == 0) << "Memory tracker " << debug_string()
                                << " has unreleased consumption " << consumption();
     parent_->Release(consumption());
-    if (auto_unregister_) {  // TODO(yingchun): when auto_unregister_ is false, and can it be false?
-      unregister_from_parent();
+
+    lock_guard l(parent_->child_trackers_lock_);
+    if (child_tracker_it_ != parent_->child_trackers_.end()) {
+      parent_->child_trackers_.erase(child_tracker_it_);
+      child_tracker_it_ = parent_->child_trackers_.end();
+    }
+  }
+}
+
+void MemTracker::ListTrackers(vector>* trackers) {
+  trackers->clear();
+  deque> to_process;
+  to_process.push_front(GetRootTracker());
+  while (!to_process.empty()) {
+    shared_ptr t = to_process.back();
+    to_process.pop_back();
+
+    trackers->push_back(t);
+    {
+      lock_guard l(t->child_trackers_lock_);
+      for (const auto& child_weak : t->child_trackers_) {
+        shared_ptr child = child_weak.lock();
+        if (child) {
+          to_process.emplace_back(std::move(child));
+        }
+      }
     }
   }
 }
diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h
index 4709bd4ac0dd61..8f2eff7bc03741 100644
--- a/be/src/runtime/mem_tracker.h
+++ b/be/src/runtime/mem_tracker.h
@@ -83,16 +83,14 @@ class TQueryOptions;
 /// This class is thread-safe.
 class MemTracker : public std::enable_shared_from_this {
  public:
-  // TODO(yingchun): change to std::shared_ptr parent
   /// 'byte_limit' < 0 means no limit
   /// 'label' is the label used in the usage string (LogUsage())
-  /// If 'auto_unregister' is true, never call unregister_from_parent().
   /// If 'log_usage_if_zero' is false, this tracker (and its children) will not be
   /// included
   /// in LogUsage() output if consumption is 0.
   MemTracker(int64_t byte_limit = -1, const std::string& label = std::string(),
              const std::shared_ptr& parent = std::shared_ptr(),
-             bool auto_unregister = false, bool log_usage_if_zero = true);
+             bool log_usage_if_zero = true);
 
   /// C'tor for tracker for which consumption counter is created as part of a profile.
   /// The counter is created with name COUNTER_NAME.
@@ -108,13 +106,8 @@ class MemTracker : public std::enable_shared_from_this {
 
   ~MemTracker();
 
-  // Removes this tracker from parent_->child_trackers_.
-  void unregister_from_parent() {
-      DCHECK(parent_ != nullptr);
-      std::lock_guard l(parent_->child_trackers_lock_);
-      parent_->child_trackers_.erase(child_tracker_it_);
-      child_tracker_it_ = parent_->child_trackers_.end();
-  }
+  // Returns a list of all the valid trackers.
+  static void ListTrackers(std::vector>* trackers);
 
   /// Include counters from a ReservationTracker in logs and other diagnostics.
   /// The counters should be owned by the fragment's RuntimeProfile.
@@ -572,13 +565,6 @@ class MemTracker : public std::enable_shared_from_this {
 
   /// Metric for limit_.
   IntGauge* limit_metric_;
-
-  // If true, calls unregister_from_parent() in the dtor. This is only used for
-  // the query wide trackers to remove it from the process mem tracker. The
-  // process tracker never gets deleted so it is safe to reference it in the dtor.
-  // The query tracker has lifetime shared by multiple plan fragments so it's hard
-  // to do cleanup another way.
-  bool auto_unregister_ = false;
 };
 
 /// Global registry for query and pool MemTrackers. Owned by ExecEnv.
diff --git a/be/src/runtime/mysql_table_sink.cpp b/be/src/runtime/mysql_table_sink.cpp
index 035b89031dacc2..4adb8ba57e8c52 100644
--- a/be/src/runtime/mysql_table_sink.cpp
+++ b/be/src/runtime/mysql_table_sink.cpp
@@ -32,7 +32,8 @@ MysqlTableSink::MysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
                                const std::vector& t_exprs) :
         _pool(pool),
         _row_desc(row_desc),
-        _t_output_expr(t_exprs) {
+        _t_output_expr(t_exprs),
+        _mem_tracker(new MemTracker(-1, "MysqlTableSink")) {
 }
 
 MysqlTableSink::~MysqlTableSink() {
diff --git a/be/src/runtime/mysql_table_sink.h b/be/src/runtime/mysql_table_sink.h
index b6ac5ab76612a5..de63edb4ceed12 100644
--- a/be/src/runtime/mysql_table_sink.h
+++ b/be/src/runtime/mysql_table_sink.h
@@ -73,7 +73,7 @@ class MysqlTableSink : public DataSink {
     MysqlTableWriter* _writer;
 
     RuntimeProfile* _profile;
-    std::unique_ptr _mem_tracker;
+    std::shared_ptr _mem_tracker;
 };
 
 }
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index 2aeef9a52b8548..2cbff2b26b0af5 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -65,11 +65,6 @@ PlanFragmentExecutor::~PlanFragmentExecutor() {
     // }
     // at this point, the report thread should have been stopped
     DCHECK(!_report_thread_active);
-
-    // fragment mem tracker needs unregister
-    if (_mem_tracker.get() != nullptr) {
-        _mem_tracker->unregister_from_parent();
-    }
 }
 
 Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request) {

From 04e9e2ea3994f13f889c29617ec62c73280b92d9 Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Tue, 21 Jul 2020 10:36:00 +0800
Subject: [PATCH 04/12] use CreateTracker() to create shared tracker

---
 be/src/exec/base_scanner.cpp                 |  2 +-
 be/src/exec/data_sink.cpp                    |  5 +-
 be/src/exec/es_http_scanner.cpp              | 23 +++----
 be/src/exec/exec_node.cpp                    |  4 +-
 be/src/exec/tablet_sink.cpp                  |  2 +-
 be/src/olap/delta_writer.cpp                 |  2 +-
 be/src/olap/fs/file_block_manager.cpp        |  2 +-
 be/src/olap/memtable.cpp                     |  2 +-
 be/src/runtime/buffered_block_mgr2.cc        |  4 +-
 be/src/runtime/data_stream_recvr.cc          |  5 +-
 be/src/runtime/data_stream_sender.cpp        |  3 +-
 be/src/runtime/exec_env_init.cpp             |  3 +-
 be/src/runtime/export_sink.cpp               |  2 +-
 be/src/runtime/load_channel.cpp              |  2 +-
 be/src/runtime/load_channel_mgr.cpp          |  2 +-
 be/src/runtime/mem_tracker.cpp               | 68 +++++++++++++++-----
 be/src/runtime/mem_tracker.h                 | 14 ++++
 be/src/runtime/plan_fragment_executor.cpp    |  3 +-
 be/src/runtime/runtime_state.cpp             | 14 ++--
 be/src/runtime/tablets_channel.cpp           |  2 +-
 be/test/runtime/memory_scratch_sink_test.cpp |  2 +-
 be/test/util/arrow/arrow_work_flow_test.cpp  |  2 +-
 22 files changed, 107 insertions(+), 61 deletions(-)

diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp
index f0edbaae1a6891..73e6d5bed7363e 100644
--- a/be/src/exec/base_scanner.cpp
+++ b/be/src/exec/base_scanner.cpp
@@ -37,7 +37,7 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile,
 #if BE_TEST
           _mem_tracker(new MemTracker()),
 #else
-          _mem_tracker(new MemTracker(-1, "Broker Scanner", state->instance_mem_tracker())),
+          _mem_tracker(MemTracker::CreateTracker(-1, "Broker Scanner", state->instance_mem_tracker())),
 #endif
           _mem_pool(_mem_tracker.get()),
           _dest_tuple_desc(nullptr),
diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp
index 9fd258e5eb593c..50a27cbb9ef15a 100644
--- a/be/src/exec/data_sink.cpp
+++ b/be/src/exec/data_sink.cpp
@@ -152,9 +152,8 @@ Status DataSink::init(const TDataSink& thrift_sink) {
 }
 
 Status DataSink::prepare(RuntimeState* state) {
-    _expr_mem_tracker.reset(
-            new MemTracker(-1, std::string("DataSink:") + std::to_string(state->load_job_id()),
-                           state->instance_mem_tracker()));
+    _expr_mem_tracker = MemTracker::CreateTracker(-1, std::string("DataSink:") + std::to_string(state->load_job_id()),
+                           state->instance_mem_tracker());
     return Status::OK();
 }
 
diff --git a/be/src/exec/es_http_scanner.cpp b/be/src/exec/es_http_scanner.cpp
index 53bd2e96e11d7d..6890010ef8bda1 100644
--- a/be/src/exec/es_http_scanner.cpp
+++ b/be/src/exec/es_http_scanner.cpp
@@ -43,19 +43,20 @@ EsHttpScanner::EsHttpScanner(RuntimeState* state, RuntimeProfile* profile, Tuple
           _line_eof(false),
           _batch_eof(false),
 #if BE_TEST
-        _mem_tracker(new MemTracker()),
+          _mem_tracker(new MemTracker()),
 #else
-        _mem_tracker(new MemTracker(-1, "EsHttp Scanner", state->instance_mem_tracker())),
+          _mem_tracker(
+                  MemTracker::CreateTracker(-1, "EsHttp Scanner", state->instance_mem_tracker())),
 #endif
-        _mem_pool(_mem_tracker.get()),
-        _tuple_desc(nullptr),
-        _counter(counter),
-        _es_reader(nullptr),
-        _es_scroll_parser(nullptr),
-        _doc_value_mode(doc_value_mode),
-        _rows_read_counter(nullptr),
-        _read_timer(nullptr),
-        _materialize_timer(nullptr) {
+          _mem_pool(_mem_tracker.get()),
+          _tuple_desc(nullptr),
+          _counter(counter),
+          _es_reader(nullptr),
+          _es_scroll_parser(nullptr),
+          _doc_value_mode(doc_value_mode),
+          _rows_read_counter(nullptr),
+          _read_timer(nullptr),
+          _materialize_timer(nullptr) {
 }
 
 EsHttpScanner::~EsHttpScanner() {
diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp
index 1a8b71a37de586..5d94c8cbce03ad 100644
--- a/be/src/exec/exec_node.cpp
+++ b/be/src/exec/exec_node.cpp
@@ -176,8 +176,8 @@ Status ExecNode::prepare(RuntimeState* state) {
                                                    _rows_returned_counter,
                                                    runtime_profile()->total_time_counter()),
                               "");
-    _mem_tracker.reset(new MemTracker(_runtime_profile.get(), -1, "ExecNode "+ _runtime_profile->name(), state->instance_mem_tracker()));
-    _expr_mem_tracker.reset(new MemTracker(-1, "ExecNode Exprs", _mem_tracker));
+    _mem_tracker = MemTracker::CreateTracker(_runtime_profile.get(), -1, "ExecNode "+ _runtime_profile->name(), state->instance_mem_tracker());
+    _expr_mem_tracker = MemTracker::CreateTracker(-1, "ExecNode Exprs", _mem_tracker);
     _expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get()));
     // TODO chenhao
     RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc(), expr_mem_tracker()));
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index 5d8ee35622a4da..b43829e45970ba 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -463,7 +463,7 @@ Status OlapTableSink::prepare(RuntimeState* state) {
 
     // profile must add to state's object pool
     _profile = state->obj_pool()->add(new RuntimeProfile("OlapTableSink"));
-    _mem_tracker.reset(new MemTracker(-1, "OlapTableSink", state->instance_mem_tracker()));
+    _mem_tracker = MemTracker::CreateTracker(-1, "OlapTableSink", state->instance_mem_tracker());
 
     SCOPED_TIMER(_profile->total_time_counter());
 
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index a549321de9434f..e75380945299d7 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -42,7 +42,7 @@ DeltaWriter::DeltaWriter(WriteRequest* req, std::shared_ptr parent,
           _tablet_schema(nullptr),
           _delta_written_success(false),
           _storage_engine(storage_engine),
-          _mem_tracker(new MemTracker(-1, "DeltaWriter", parent)) {}
+          _mem_tracker(MemTracker::CreateTracker(-1, "DeltaWriter", parent)) {}
 
 DeltaWriter::~DeltaWriter() {
     if (_is_init && !_delta_written_success) {
diff --git a/be/src/olap/fs/file_block_manager.cpp b/be/src/olap/fs/file_block_manager.cpp
index 0d0cbc9702421f..291dc0ed61f122 100644
--- a/be/src/olap/fs/file_block_manager.cpp
+++ b/be/src/olap/fs/file_block_manager.cpp
@@ -381,7 +381,7 @@ Status FileReadableBlock::readv(uint64_t offset, const Slice* results, size_t re
 FileBlockManager::FileBlockManager(Env* env, BlockManagerOptions opts) :
         _env(DCHECK_NOTNULL(env)),
         _opts(std::move(opts)),
-        _mem_tracker(new MemTracker(-1, "file_block_manager", _opts.parent_mem_tracker)) {
+        _mem_tracker(MemTracker::CreateTracker(-1, "file_block_manager", _opts.parent_mem_tracker)) {
     if (_opts.enable_metric) {
         _metrics.reset(new internal::BlockManagerMetrics());
     }
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 88eaf162c73be0..fc69ff09270a89 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -39,7 +39,7 @@ MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet
           _slot_descs(slot_descs),
           _keys_type(keys_type),
           _row_comparator(_schema),
-          _mem_tracker(new MemTracker(-1, "MemTable", parent_tracker)),
+          _mem_tracker(MemTracker::CreateTracker(-1, "MemTable", parent_tracker)),
           _buffer_mem_pool(new MemPool(_mem_tracker.get())),
           _table_mem_pool(new MemPool(_mem_tracker.get())),
           _schema_size(_schema->schema_size()),
diff --git a/be/src/runtime/buffered_block_mgr2.cc b/be/src/runtime/buffered_block_mgr2.cc
index 3edd4bab015c58..091342c4fa4efa 100644
--- a/be/src/runtime/buffered_block_mgr2.cc
+++ b/be/src/runtime/buffered_block_mgr2.cc
@@ -58,7 +58,7 @@ class BufferedBlockMgr2::Client {
             : _mgr(mgr),
               _state(state),
               _tracker(tracker),
-              _query_tracker(new MemTracker(-1, "BufferedBlockMgr2", _mgr->_mem_tracker->parent())),
+              _query_tracker(MemTracker::CreateTracker(-1, "BufferedBlockMgr2", _mgr->_mem_tracker->parent())),
               _num_reserved_buffers(num_reserved_buffers),
               _num_tmp_reserved_buffers(0),
               _num_pinned_buffers(0) {
@@ -1277,7 +1277,7 @@ void BufferedBlockMgr2::init(
     // Create a new mem_tracker and allocate buffers.
     // _mem_tracker.reset(new MemTracker(
     //             profile(), mem_limit, -1, "Block Manager", parent_tracker));
-    _mem_tracker.reset(new MemTracker(mem_limit, "Block Manager2", parent_tracker));
+    _mem_tracker = MemTracker::CreateTracker(mem_limit, "Block Manager2", parent_tracker);
 
     _initialized = true;
 }
diff --git a/be/src/runtime/data_stream_recvr.cc b/be/src/runtime/data_stream_recvr.cc
index f5df75242e4895..78dd50c9edf3ed 100644
--- a/be/src/runtime/data_stream_recvr.cc
+++ b/be/src/runtime/data_stream_recvr.cc
@@ -366,10 +366,7 @@ DataStreamRecvr::DataStreamRecvr(
             _num_buffered_bytes(0),
             _profile(profile),
             _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) {
-    // TODO: Now the parent tracker may cause problem when we need spill to disk, so we
-    // replace parent_tracker with nullptr, fix future
-    _mem_tracker.reset(new MemTracker(_profile, -1, "DataStreamRecvr", nullptr));
-    // _mem_tracker.reset(new MemTracker(_profile.get(), -1, "DataStreamRecvr", parent_tracker));
+    _mem_tracker = MemTracker::CreateTracker(_profile, -1, "DataStreamRecvr", parent_tracker);
 
     // Create one queue per sender if is_merging is true.
     int num_queues = is_merging ? num_senders : 1;
diff --git a/be/src/runtime/data_stream_sender.cpp b/be/src/runtime/data_stream_sender.cpp
index 5d61dc534c01df..5454c1abae0b4d 100644
--- a/be/src/runtime/data_stream_sender.cpp
+++ b/be/src/runtime/data_stream_sender.cpp
@@ -383,8 +383,7 @@ Status DataStreamSender::prepare(RuntimeState* state) {
     title << "DataStreamSender (dst_id=" << _dest_node_id << ")";
     _profile = _pool->add(new RuntimeProfile(title.str()));
     SCOPED_TIMER(_profile->total_time_counter());
-    _mem_tracker.reset(
-            new MemTracker(_profile, -1, "DataStreamSender", state->instance_mem_tracker()));
+    _mem_tracker = MemTracker::CreateTracker(_profile, -1, "DataStreamSender", state->instance_mem_tracker());
 
     if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) {
         // Randomize the order we open/transmit to channels to avoid thundering herd problems.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index b7fb080e53a5c7..7325a8a4286472 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -177,7 +177,8 @@ Status ExecEnv::_init_mem_tracker() {
         return Status::InternalError(ss.str());
     }
 
-    _mem_tracker.reset(new MemTracker(bytes_limit, "ExecEnv root", MemTracker::GetRootTracker()));
+    _mem_tracker =
+            MemTracker::CreateTracker(bytes_limit, "ExecEnv root", MemTracker::GetRootTracker());
 
     LOG(INFO) << "Using global memory limit: " << PrettyPrinter::print(bytes_limit, TUnit::BYTES);
     RETURN_IF_ERROR(_disk_io_mgr->init(_mem_tracker));
diff --git a/be/src/runtime/export_sink.cpp b/be/src/runtime/export_sink.cpp
index 3abb05c7827aa9..85616fc88d13ef 100644
--- a/be/src/runtime/export_sink.cpp
+++ b/be/src/runtime/export_sink.cpp
@@ -67,7 +67,7 @@ Status ExportSink::prepare(RuntimeState* state) {
     _profile = state->obj_pool()->add(new RuntimeProfile(title.str()));
     SCOPED_TIMER(_profile->total_time_counter());
 
-    _mem_tracker.reset(new MemTracker(-1, "ExportSink", state->instance_mem_tracker()));
+    _mem_tracker = MemTracker::CreateTracker(-1, "ExportSink", state->instance_mem_tracker());
 
     // Prepare the exprs to run.
     RETURN_IF_ERROR(Expr::prepare(_output_expr_ctxs, state, _row_desc, _mem_tracker));
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index 575bb9ceb5eaa1..72ace2357783f5 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -26,7 +26,7 @@ namespace doris {
 LoadChannel::LoadChannel(const UniqueId& load_id, int64_t mem_limit,
                          int64_t timeout_s, const std::shared_ptr& mem_tracker) :
         _load_id(load_id), _timeout_s(timeout_s) {
-    _mem_tracker.reset(new MemTracker(mem_limit, _load_id.to_string(), mem_tracker));
+    _mem_tracker = MemTracker::CreateTracker(mem_limit, _load_id.to_string(), mem_tracker);
     // _last_updated_time should be set before being inserted to
     // _load_channels in load_channel_mgr, or it may be erased
     // immediately by gc thread.
diff --git a/be/src/runtime/load_channel_mgr.cpp b/be/src/runtime/load_channel_mgr.cpp
index ad238313d905ab..f0237ad7776f98 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -79,7 +79,7 @@ LoadChannelMgr::~LoadChannelMgr() {
 
 Status LoadChannelMgr::init(int64_t process_mem_limit) {
     int64_t load_mem_limit = calc_process_max_load_memory(process_mem_limit);
-    _mem_tracker.reset(new MemTracker(load_mem_limit, "load channel mgr"));
+    _mem_tracker = MemTracker::CreateTracker(load_mem_limit, "load channel mgr");
     RETURN_IF_ERROR(_start_bg_worker());
     return Status::OK();
 }
diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index 6dab5104ad3fc8..1689bcd67eb13b 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -66,25 +66,60 @@ static std::shared_ptr root_tracker;
 static GoogleOnceType root_tracker_once = GOOGLE_ONCE_INIT;
 
 void MemTracker::CreateRootTracker() {
-  root_tracker.reset(new MemTracker(-1, "root", std::shared_ptr()));
+  root_tracker = MemTracker::CreateTracker(-1, "root", std::shared_ptr());
   root_tracker->Init();
 }
 
-MemTracker::MemTracker(int64_t byte_limit, const string& label,
-                       const std::shared_ptr& parent, bool log_usage_if_zero)
-        : limit_(byte_limit),
-          soft_limit_(CalcSoftLimit(byte_limit)),
-          label_(label),
-          parent_(parent),
-          consumption_(&local_counter_),
-          local_counter_(TUnit::BYTES),
-          consumption_metric_(nullptr),
-          log_usage_if_zero_(log_usage_if_zero),
-          num_gcs_metric_(nullptr),
-          bytes_freed_by_last_gc_metric_(nullptr),
-          bytes_over_limit_metric_(nullptr),
-          limit_metric_(nullptr) {
-    Init();
+std::shared_ptr MemTracker::CreateTracker(
+    int64_t byte_limit,
+    const std::string& label,
+    std::shared_ptr parent,
+    bool log_usage_if_zero) {
+  shared_ptr real_parent;
+  if (parent) {
+      real_parent = std::move(parent);
+  } else {
+      real_parent = GetRootTracker();
+  }
+  shared_ptr tracker = std:move(MemTracker::CreateTracker(byte_limit, label, real_parent, log_usage_if_zero));
+  real_parent->AddChildTracker(tracker);
+  tracker->Init();
+
+  return tracker;
+}
+
+std::shared_ptr MemTracker::CreateTracker(
+    RuntimeProfile* profile, int64_t byte_limit,
+    const std::string& label,
+    const std::shared_ptr& parent) {
+  shared_ptr real_parent;
+  if (parent) {
+      real_parent = std::move(parent);
+  } else {
+      real_parent = GetRootTracker();
+  }
+  shared_ptr tracker = std::move(MemTracker::CreateTracker(profile, byte_limit, label, real_parent));
+  real_parent->AddChildTracker(tracker);
+  tracker->Init();
+
+  return tracker;
+}
+
+MemTracker::MemTracker(
+    int64_t byte_limit, const string& label, const std::shared_ptr& parent, bool auto_unregister, bool log_usage_if_zero)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(&local_counter_),
+    local_counter_(TUnit::BYTES),
+    consumption_metric_(nullptr),
+    log_usage_if_zero_(log_usage_if_zero),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
+  Init();
 }
 
 MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
@@ -124,7 +159,6 @@ MemTracker::MemTracker(IntGauge* consumption_metric,
 void MemTracker::Init() {
   DCHECK_GE(limit_, -1);
   DCHECK_LE(soft_limit_, limit_);
-  // if (parent_ != nullptr) parent_->AddChildTracker(std::shared_ptr(this));
   // populate all_trackers_ and limit_trackers_
   MemTracker* tracker = this;
   while (tracker != nullptr) {
diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h
index 8f2eff7bc03741..bb84bf345a5598 100644
--- a/be/src/runtime/mem_tracker.h
+++ b/be/src/runtime/mem_tracker.h
@@ -83,6 +83,19 @@ class TQueryOptions;
 /// This class is thread-safe.
 class MemTracker : public std::enable_shared_from_this {
  public:
+  // Creates and adds the tracker to the tree so that it can be retrieved with
+  // FindTracker/FindOrCreateTracker.
+  static std::shared_ptr CreateTracker(
+      int64_t byte_limit = -1,
+      const std::string& label = std::string(),
+      std::shared_ptr parent = std::shared_ptr(),
+      bool log_usage_if_zero = true);
+
+  static std::shared_ptr CreateTracker(
+      RuntimeProfile* profile, int64_t byte_limit,
+      const std::string& label = std::string(),
+      const std::shared_ptr& parent = std::shared_ptr());
+
   /// 'byte_limit' < 0 means no limit
   /// 'label' is the label used in the usage string (LogUsage())
   /// If 'log_usage_if_zero' is false, this tracker (and its children) will not be
@@ -97,6 +110,7 @@ class MemTracker : public std::enable_shared_from_this {
   MemTracker(RuntimeProfile* profile, int64_t byte_limit,
       const std::string& label = std::string(), const std::shared_ptr& parent = std::shared_ptr());
 
+  // TODO(yingchun): not used, remove it later
   /// C'tor for tracker that uses consumption_metric as the consumption value.
   /// Consume()/Release() can still be called. This is used for the root process tracker
   /// (if 'parent' is NULL). It is also to report on other categories of memory under the
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index 2cbff2b26b0af5..264a6685a8a98a 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -129,8 +129,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request) {
         bytes_limit = _exec_env->process_mem_tracker()->limit();
     }
     // NOTE: this MemTracker only for olap
-    _mem_tracker.reset(
-            new MemTracker(bytes_limit, "fragment mem-limit", _exec_env->process_mem_tracker()));
+    _mem_tracker = MemTracker::CreateTracker(bytes_limit, "fragment mem-limit", _exec_env->process_mem_tracker());
     _runtime_state->set_fragment_mem_tracker(_mem_tracker);
 
     LOG(INFO) << "Using query memory limit: "
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index 2192ed3ddad7f3..adc8674337444e 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -55,7 +55,7 @@ RuntimeState::RuntimeState(
             _obj_pool(new ObjectPool()),
             _data_stream_recvrs_pool(new ObjectPool()),
             _unreported_error_idx(0),
-            _fragment_mem_tracker(NULL),
+            _fragment_mem_tracker(nullptr),
             _is_cancelled(false),
             _per_fragment_instance_idx(0),
             _root_node_id(-1),
@@ -81,7 +81,7 @@ RuntimeState::RuntimeState(
             _data_stream_recvrs_pool(new ObjectPool()),
             _unreported_error_idx(0),
             _query_id(fragment_params.params.query_id),
-            _fragment_mem_tracker(NULL),
+            _fragment_mem_tracker(nullptr),
             _is_cancelled(false),
             _per_fragment_instance_idx(0),
             _root_node_id(-1),
@@ -226,10 +226,12 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
     auto mem_tracker_counter = ADD_COUNTER(&_profile, "MemoryLimit", TUnit::BYTES);
     mem_tracker_counter->set(bytes_limit);
 
-    _query_mem_tracker.reset(
-            new MemTracker(bytes_limit, std::string("RuntimeState: query ") + runtime_profile()->name(), _exec_env->process_mem_tracker()));
-    _instance_mem_tracker.reset(
-            new MemTracker(&_profile, -1, std::string("RuntimeState: instance ") + runtime_profile()->name(), _query_mem_tracker));
+    _query_mem_tracker = MemTracker::CreateTracker(
+            bytes_limit, std::string("RuntimeState: query ") + runtime_profile()->name(),
+            _exec_env->process_mem_tracker());
+    _instance_mem_tracker = MemTracker::CreateTracker(
+            &_profile, -1, std::string("RuntimeState: instance ") + runtime_profile()->name(),
+            _query_mem_tracker);
 
     /*
     // TODO: this is a stopgap until we implement ExprContext
diff --git a/be/src/runtime/tablets_channel.cpp b/be/src/runtime/tablets_channel.cpp
index e0b15dcf2162b8..57c33a55317bc2 100644
--- a/be/src/runtime/tablets_channel.cpp
+++ b/be/src/runtime/tablets_channel.cpp
@@ -31,7 +31,7 @@ std::atomic TabletsChannel::_s_tablet_writer_count;
 
 TabletsChannel::TabletsChannel(const TabletsChannelKey& key, const std::shared_ptr& mem_tracker):
         _key(key), _state(kInitialized), _closed_senders(64) {
-    _mem_tracker.reset(new MemTracker(-1, "tablets channel", mem_tracker));
+    _mem_tracker = MemTracker::CreateTracker(-1, "tablets channel", mem_tracker);
     static std::once_flag once_flag;
     std::call_once(once_flag, [] {
         REGISTER_GAUGE_DORIS_METRIC(tablet_writer_count, [&]() {
diff --git a/be/test/runtime/memory_scratch_sink_test.cpp b/be/test/runtime/memory_scratch_sink_test.cpp
index d02666bd323365..a7b2321a158126 100644
--- a/be/test/runtime/memory_scratch_sink_test.cpp
+++ b/be/test/runtime/memory_scratch_sink_test.cpp
@@ -124,7 +124,7 @@ void MemoryScratchSinkTest::init_runtime_state() {
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env);
     _state->init_instance_mem_tracker();
-    _mem_tracker = new MemTracker(-1, "MemoryScratchSinkTest", _state->instance_mem_tracker());
+    _mem_tracker = MemTracker::CreateTracker(-1, "MemoryScratchSinkTest", _state->instance_mem_tracker());
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());
diff --git a/be/test/util/arrow/arrow_work_flow_test.cpp b/be/test/util/arrow/arrow_work_flow_test.cpp
index ea63cd0fd263cb..05806f819a4d30 100644
--- a/be/test/util/arrow/arrow_work_flow_test.cpp
+++ b/be/test/util/arrow/arrow_work_flow_test.cpp
@@ -99,7 +99,7 @@ void ArrowWorkFlowTest::init_runtime_state() {
     query_id.hi = 100;
     _state = new RuntimeState(query_id, query_options, TQueryGlobals(), _exec_env);
     _state->init_instance_mem_tracker();
-    _mem_tracker.reset(new MemTracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker()));
+    _mem_tracker = MemTracker::CreateTracker(-1, "ArrowWorkFlowTest", _state->instance_mem_tracker());
     _state->set_desc_tbl(_desc_tbl);
     _state->_load_dir = "./test_run/output/";
     _state->init_mem_trackers(TUniqueId());

From c02fe94e337151e6fb712cb3048600b4d0d64d64 Mon Sep 17 00:00:00 2001
From: Yingchun Lai <405403881@qq.com>
Date: Sat, 4 Jul 2020 00:48:36 +0800
Subject: [PATCH 05/12] fix dead lock

---
 be/src/runtime/mem_tracker.cpp | 153 ++++++++++++++++++---------------
 1 file changed, 85 insertions(+), 68 deletions(-)

diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index 1689bcd67eb13b..bb48730df4f1cd 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -37,14 +37,18 @@
 #include "util/uid_util.h"
 #include "util/stack_util.h"
 
+using boost::join;
 using std::deque;
 using std::endl;
 using std::greater;
+using std::list;
 using std::pair;
 using std::priority_queue;
 using std::shared_ptr;
 using std::string;
+using std::unique_ptr;
 using std::vector;
+using std::weak_ptr;
 using strings::Substitute;
 
 namespace doris {
@@ -66,7 +70,7 @@ static std::shared_ptr root_tracker;
 static GoogleOnceType root_tracker_once = GOOGLE_ONCE_INIT;
 
 void MemTracker::CreateRootTracker() {
-  root_tracker = MemTracker::CreateTracker(-1, "root", std::shared_ptr());
+  root_tracker.reset(new MemTracker(-1, "root", std::shared_ptr()));
   root_tracker->Init();
 }
 
@@ -81,7 +85,7 @@ std::shared_ptr MemTracker::CreateTracker(
   } else {
       real_parent = GetRootTracker();
   }
-  shared_ptr tracker = std:move(MemTracker::CreateTracker(byte_limit, label, real_parent, log_usage_if_zero));
+  shared_ptr tracker(new MemTracker(byte_limit, label, real_parent, log_usage_if_zero));
   real_parent->AddChildTracker(tracker);
   tracker->Init();
 
@@ -98,62 +102,62 @@ std::shared_ptr MemTracker::CreateTracker(
   } else {
       real_parent = GetRootTracker();
   }
-  shared_ptr tracker = std::move(MemTracker::CreateTracker(profile, byte_limit, label, real_parent));
+  shared_ptr tracker(new MemTracker(profile, byte_limit, label, real_parent));
   real_parent->AddChildTracker(tracker);
   tracker->Init();
 
   return tracker;
 }
 
-MemTracker::MemTracker(
-    int64_t byte_limit, const string& label, const std::shared_ptr& parent, bool auto_unregister, bool log_usage_if_zero)
-  : limit_(byte_limit),
-    soft_limit_(CalcSoftLimit(byte_limit)),
-    label_(label),
-    parent_(parent),
-    consumption_(&local_counter_),
-    local_counter_(TUnit::BYTES),
-    consumption_metric_(nullptr),
-    log_usage_if_zero_(log_usage_if_zero),
-    num_gcs_metric_(nullptr),
-    bytes_freed_by_last_gc_metric_(nullptr),
-    bytes_over_limit_metric_(nullptr),
-    limit_metric_(nullptr) {
-  Init();
+MemTracker::MemTracker(int64_t byte_limit, const string& label,
+                       const std::shared_ptr& parent, bool log_usage_if_zero)
+        : limit_(byte_limit),
+          soft_limit_(CalcSoftLimit(byte_limit)),
+          label_(label),
+          parent_(parent),
+          consumption_(&local_counter_),
+          local_counter_(TUnit::BYTES),
+          consumption_metric_(nullptr),
+          log_usage_if_zero_(log_usage_if_zero),
+          num_gcs_metric_(nullptr),
+          bytes_freed_by_last_gc_metric_(nullptr),
+          bytes_over_limit_metric_(nullptr),
+          limit_metric_(nullptr) {
+    Init();
 }
 
-MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
-    const std::string& label, const std::shared_ptr& parent)
-  : limit_(byte_limit),
-    soft_limit_(CalcSoftLimit(byte_limit)),
-    label_(label),
-    parent_(parent),
-    consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
-    local_counter_(TUnit::BYTES),
-    consumption_metric_(nullptr),
-    log_usage_if_zero_(true),
-    num_gcs_metric_(nullptr),
-    bytes_freed_by_last_gc_metric_(nullptr),
-    bytes_over_limit_metric_(nullptr),
-    limit_metric_(nullptr) {
-  Init();
+MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit, const std::string& label,
+                       const std::shared_ptr& parent)
+        : limit_(byte_limit),
+          soft_limit_(CalcSoftLimit(byte_limit)),
+          label_(label),
+          parent_(parent),
+          consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
+          local_counter_(TUnit::BYTES),
+          consumption_metric_(nullptr),
+          log_usage_if_zero_(true),
+          num_gcs_metric_(nullptr),
+          bytes_freed_by_last_gc_metric_(nullptr),
+          bytes_over_limit_metric_(nullptr),
+          limit_metric_(nullptr) {
+    Init();
 }
 
-MemTracker::MemTracker(IntGauge* consumption_metric,
-    int64_t byte_limit, const string& label, const std::shared_ptr& parent)
-  : limit_(byte_limit),
-    soft_limit_(CalcSoftLimit(byte_limit)),
-    label_(label),
-    parent_(parent),
-    consumption_(&local_counter_),
-    local_counter_(TUnit::BYTES),
-    consumption_metric_(consumption_metric),
-    log_usage_if_zero_(true),
-    num_gcs_metric_(nullptr),
-    bytes_freed_by_last_gc_metric_(nullptr),
-    bytes_over_limit_metric_(nullptr),
-    limit_metric_(nullptr) {
-  Init();
+MemTracker::MemTracker(IntGauge* consumption_metric, int64_t byte_limit, const string& label,
+                       const std::shared_ptr& parent)
+        : limit_(byte_limit),
+          soft_limit_(CalcSoftLimit(byte_limit)),
+          label_(label),
+          parent_(parent),
+          consumption_(&local_counter_),
+          local_counter_(TUnit::BYTES),
+          consumption_metric_(consumption_metric),
+          log_usage_if_zero_(true),
+          num_gcs_metric_(nullptr),
+          bytes_freed_by_last_gc_metric_(nullptr),
+          bytes_over_limit_metric_(nullptr),
+          limit_metric_(nullptr) {
+    Init();
 }
 
 void MemTracker::Init() {
@@ -208,9 +212,15 @@ int64_t MemTracker::GetPoolMemReserved() {
   DCHECK(!pool_name_.empty());
   DCHECK_EQ(limit_, -1) << LogUsage(UNLIMITED_DEPTH);
 
+  // Use cache to avoid holding child_trackers_lock_
+  list> children;
+  {
+    lock_guard l(child_trackers_lock_);
+    children = child_trackers_;
+  }
+
   int64_t mem_reserved = 0L;
-  lock_guard l(child_trackers_lock_);
-  for (const auto& child_weak : child_trackers_) {
+  for (const auto& child_weak : children) {
     std::shared_ptr child = child_weak.lock();
     if (child) {
       int64_t child_limit = child->limit();
@@ -220,7 +230,7 @@ int64_t MemTracker::GetPoolMemReserved() {
         mem_reserved += std::min(child_limit, MemInfo::physical_mem());
       } else {
         DCHECK(query_exec_finished || child_limit == -1)
-                    << child->LogUsage(UNLIMITED_DEPTH);
+            << child->LogUsage(UNLIMITED_DEPTH);
         mem_reserved += child->consumption();
       }
     }
@@ -244,7 +254,7 @@ MemTracker* PoolMemTrackerRegistry::GetRequestPoolMemTracker(
       new MemTracker(-1, Substitute(REQUEST_POOL_MEM_TRACKER_LABEL_FORMAT, pool_name),
           ExecEnv::GetInstance()->process_mem_tracker());
   tracker->pool_name_ = pool_name;
-  pool_to_mem_trackers_.emplace(pool_name, std::unique_ptr(tracker));
+  pool_to_mem_trackers_.emplace(pool_name, unique_ptr(tracker));
   return tracker;
 }
 
@@ -273,13 +283,15 @@ void MemTracker::ListTrackers(vector>* trackers) {
     to_process.pop_back();
 
     trackers->push_back(t);
+    list> children;
     {
       lock_guard l(t->child_trackers_lock_);
-      for (const auto& child_weak : t->child_trackers_) {
-        shared_ptr child = child_weak.lock();
-        if (child) {
-          to_process.emplace_back(std::move(child));
-        }
+      children = t->child_trackers_;
+    }
+    for (const auto& child_weak : children) {
+      shared_ptr child = child_weak.lock();
+      if (child) {
+        to_process.emplace_back(std::move(child));
       }
     }
   }
@@ -380,11 +392,12 @@ string MemTracker::LogUsage(int max_recursive_depth, const string& prefix,
   string new_prefix = Substitute("  $0", prefix);
   int64_t child_consumption;
   string child_trackers_usage;
+  list> children;
   {
     lock_guard l(child_trackers_lock_);
-    child_trackers_usage = LogUsage(max_recursive_depth - 1, new_prefix,
-        child_trackers_, &child_consumption);
+    children = child_trackers_;
   }
+  child_trackers_usage = LogUsage(max_recursive_depth - 1, new_prefix, children, &child_consumption);
   if (!child_trackers_usage.empty()) ss << "\n" << child_trackers_usage;
 
   if (parent_ == nullptr) {
@@ -401,20 +414,20 @@ string MemTracker::LogUsage(int max_recursive_depth, const string& prefix,
 }
 
 string MemTracker::LogUsage(int max_recursive_depth, const string& prefix,
-    const std::list>& trackers, int64_t* logged_consumption) {
+    const list>& trackers, int64_t* logged_consumption) {
   *logged_consumption = 0;
   vector usage_strings;
   for (const auto& tracker_weak : trackers) {
-    std::shared_ptr tracker = tracker_weak.lock();
+    shared_ptr tracker = tracker_weak.lock();
     if (tracker) {
       int64_t tracker_consumption;
       string usage_string = tracker->LogUsage(max_recursive_depth, prefix,
-                                              &tracker_consumption);
+          &tracker_consumption);
       if (!usage_string.empty()) usage_strings.push_back(usage_string);
       *logged_consumption += tracker_consumption;
     }
   }
-  return boost::join(usage_strings, "\n");
+  return join(usage_strings, "\n");
 }
 
 string MemTracker::LogTopNQueries(int limit) {
@@ -430,16 +443,20 @@ string MemTracker::LogTopNQueries(int limit) {
     min_pq.pop();
   }
   std::reverse(usage_strings.begin(), usage_strings.end());
-  return boost::join(usage_strings, "\n");
+  return join(usage_strings, "\n");
 }
 
 void MemTracker::GetTopNQueries(
     priority_queue, vector>,
         greater>>& min_pq,
     int limit) {
-  lock_guard l(child_trackers_lock_);
-  for (const auto& child_weak : child_trackers_) {
-    std::shared_ptr child = child_weak.lock();
+  list> children;
+  {
+    lock_guard l(child_trackers_lock_);
+    children = child_trackers_;
+  }
+  for (const auto& child_weak : children) {
+    shared_ptr child = child_weak.lock();
     if (child) {
       if (!child->is_query_mem_tracker_) {
         child->GetTopNQueries(min_pq, limit);
@@ -463,7 +480,7 @@ Status MemTracker::MemLimitExceeded(MemTracker* mtracker, RuntimeState* state,
     const std::string& details, int64_t failed_allocation_size) {
   DCHECK_GE(failed_allocation_size, 0);
   stringstream ss;
-  if (details.size() != 0) ss << details << endl;
+  if (!details.empty()) ss << details << endl;
   if (failed_allocation_size != 0) {
     if (mtracker != nullptr) ss << mtracker->label();
     ss << " could not allocate "

From a7bdd5ad8ab97a6ee77ddfa24981d3fd3e41d42e Mon Sep 17 00:00:00 2001
From: Yingchun Lai <405403881@qq.com>
Date: Sun, 5 Jul 2020 13:38:38 +0800
Subject: [PATCH 06/12] shared: HighWaterMarkCounter

---
 be/src/runtime/mem_tracker.cpp  | 84 +++++++++++++++------------------
 be/src/runtime/mem_tracker.h    |  7 +--
 be/src/util/runtime_profile.cpp | 17 +++++++
 be/src/util/runtime_profile.h   |  7 +++
 4 files changed, 65 insertions(+), 50 deletions(-)

diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index bb48730df4f1cd..9d93162260d380 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -109,55 +109,49 @@ std::shared_ptr MemTracker::CreateTracker(
   return tracker;
 }
 
-MemTracker::MemTracker(int64_t byte_limit, const string& label,
-                       const std::shared_ptr& parent, bool log_usage_if_zero)
-        : limit_(byte_limit),
-          soft_limit_(CalcSoftLimit(byte_limit)),
-          label_(label),
-          parent_(parent),
-          consumption_(&local_counter_),
-          local_counter_(TUnit::BYTES),
-          consumption_metric_(nullptr),
-          log_usage_if_zero_(log_usage_if_zero),
-          num_gcs_metric_(nullptr),
-          bytes_freed_by_last_gc_metric_(nullptr),
-          bytes_over_limit_metric_(nullptr),
-          limit_metric_(nullptr) {
-    Init();
+MemTracker::MemTracker(
+    int64_t byte_limit, const string& label, const std::shared_ptr& parent, bool log_usage_if_zero)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(std::make_shared(TUnit::BYTES)),
+    consumption_metric_(nullptr),
+    log_usage_if_zero_(log_usage_if_zero),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
 }
 
-MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit, const std::string& label,
-                       const std::shared_ptr& parent)
-        : limit_(byte_limit),
-          soft_limit_(CalcSoftLimit(byte_limit)),
-          label_(label),
-          parent_(parent),
-          consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
-          local_counter_(TUnit::BYTES),
-          consumption_metric_(nullptr),
-          log_usage_if_zero_(true),
-          num_gcs_metric_(nullptr),
-          bytes_freed_by_last_gc_metric_(nullptr),
-          bytes_over_limit_metric_(nullptr),
-          limit_metric_(nullptr) {
-    Init();
+MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
+    const std::string& label, const std::shared_ptr& parent)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
+    consumption_metric_(nullptr),
+    log_usage_if_zero_(true),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
 }
 
-MemTracker::MemTracker(IntGauge* consumption_metric, int64_t byte_limit, const string& label,
-                       const std::shared_ptr& parent)
-        : limit_(byte_limit),
-          soft_limit_(CalcSoftLimit(byte_limit)),
-          label_(label),
-          parent_(parent),
-          consumption_(&local_counter_),
-          local_counter_(TUnit::BYTES),
-          consumption_metric_(consumption_metric),
-          log_usage_if_zero_(true),
-          num_gcs_metric_(nullptr),
-          bytes_freed_by_last_gc_metric_(nullptr),
-          bytes_over_limit_metric_(nullptr),
-          limit_metric_(nullptr) {
-    Init();
+MemTracker::MemTracker(IntGauge* consumption_metric,
+    int64_t byte_limit, const string& label, const std::shared_ptr& parent)
+  : limit_(byte_limit),
+    soft_limit_(CalcSoftLimit(byte_limit)),
+    label_(label),
+    parent_(parent),
+    consumption_(std::make_shared(TUnit::BYTES)),
+    consumption_metric_(consumption_metric),
+    log_usage_if_zero_(true),
+    num_gcs_metric_(nullptr),
+    bytes_freed_by_last_gc_metric_(nullptr),
+    bytes_over_limit_metric_(nullptr),
+    limit_metric_(nullptr) {
 }
 
 void MemTracker::Init() {
diff --git a/be/src/runtime/mem_tracker.h b/be/src/runtime/mem_tracker.h
index bb84bf345a5598..b88d404db1397f 100644
--- a/be/src/runtime/mem_tracker.h
+++ b/be/src/runtime/mem_tracker.h
@@ -529,11 +529,8 @@ class MemTracker : public std::enable_shared_from_this {
   /// is unregistered.
   std::shared_ptr parent_;
 
-  /// in bytes; not owned
-  RuntimeProfile::HighWaterMarkCounter* consumption_;
-
-  /// holds consumption_ counter if not tied to a profile
-  RuntimeProfile::HighWaterMarkCounter local_counter_;
+  /// in bytes
+  std::shared_ptr consumption_;
 
   /// If non-NULL, used to measure consumption (in bytes) rather than the values provided
   /// to Consume()/Release(). Only used for the process tracker, thus parent_ should be
diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp
index 53c5b3431d3113..74d1d176a7244e 100644
--- a/be/src/util/runtime_profile.cpp
+++ b/be/src/util/runtime_profile.cpp
@@ -382,10 +382,27 @@ const std::string* RuntimeProfile::get_info_string(const std::string& key) {
 ADD_COUNTER_IMPL(AddHighWaterMarkCounter, HighWaterMarkCounter);
 //ADD_COUNTER_IMPL(AddConcurrentTimerCounter, ConcurrentTimerCounter);
 
+std::shared_ptr RuntimeProfile::AddSharedHighWaterMarkCounter(
+    const std::string& name, TUnit::type unit, const std::string& parent_counter_name) {
+    DCHECK_EQ(_is_averaged_profile, false);
+    boost::lock_guard l(_counter_map_lock);
+    DCHECK(_counter_map.find(name) == _counter_map.end());
+    DCHECK(parent_counter_name == ROOT_COUNTER ||
+        _counter_map.find(parent_counter_name) != _counter_map.end());
+    std::shared_ptr counter = std::make_shared(unit);
+    _shared_counter_pool.push_back(counter);
+    _counter_map[name] = counter.get();
+    std::set* child_counters =
+        find_or_insert(&_child_counter_map, parent_counter_name, std::set());
+    child_counters->insert(name);
+    return counter;
+}
+
 RuntimeProfile::Counter* RuntimeProfile::add_counter(const std::string& name, TUnit::type type,
                                                      const std::string& parent_counter_name) {
     boost::lock_guard l(_counter_map_lock);
 
+    // TODO(yingchun): Can we ensure that 'name' is not exist in '_counter_map'? Use CHECK instead?
     if (_counter_map.find(name) != _counter_map.end()) {
         // TODO: should we make sure that we don't return existing derived counters?
         return _counter_map[name];
diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
index ec81559d03b41b..1406b5e1468096 100644
--- a/be/src/util/runtime_profile.h
+++ b/be/src/util/runtime_profile.h
@@ -454,6 +454,10 @@ class RuntimeProfile {
     HighWaterMarkCounter* AddHighWaterMarkCounter(const std::string& name,
             TUnit::type unit, const std::string& parent_counter_name = "");
 
+    std::shared_ptr AddSharedHighWaterMarkCounter(
+        const std::string& name, TUnit::type unit,
+        const std::string& parent_counter_name = "");
+
     // stops updating the value of 'rate_counter'. Rate counters are updated
     // periodically so should be removed as soon as the underlying counter is
     // no longer going to change.
@@ -480,6 +484,9 @@ class RuntimeProfile {
     // object, but occasionally allocated in the constructor.
     std::unique_ptr _pool;
 
+    // Pool for allocated counters. These counters are shared with some other objects.
+    std::vector> _shared_counter_pool;
+
     // True if we have to delete the _pool on destruction.
     bool _own_pool;
 

From 05e91c2f3c9a73d95007b4a3e1c8aa1258d1ebe0 Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Tue, 21 Jul 2020 17:09:18 +0800
Subject: [PATCH 07/12] [] fix add shared counter from runtime profile

---
 be/src/runtime/mem_tracker.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp
index 9d93162260d380..5e3c90b8bec803 100644
--- a/be/src/runtime/mem_tracker.cpp
+++ b/be/src/runtime/mem_tracker.cpp
@@ -130,7 +130,7 @@ MemTracker::MemTracker(RuntimeProfile* profile, int64_t byte_limit,
     soft_limit_(CalcSoftLimit(byte_limit)),
     label_(label),
     parent_(parent),
-    consumption_(profile->AddHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
+    consumption_(profile->AddSharedHighWaterMarkCounter(COUNTER_NAME, TUnit::BYTES)),
     consumption_metric_(nullptr),
     log_usage_if_zero_(true),
     num_gcs_metric_(nullptr),

From 5d6e1199bd06630517a3384b7fea9880b861864b Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Tue, 21 Jul 2020 18:09:47 +0800
Subject: [PATCH 08/12] [] add comment

---
 be/src/util/runtime_profile.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
index 1406b5e1468096..d96dc60b704119 100644
--- a/be/src/util/runtime_profile.h
+++ b/be/src/util/runtime_profile.h
@@ -454,6 +454,7 @@ class RuntimeProfile {
     HighWaterMarkCounter* AddHighWaterMarkCounter(const std::string& name,
             TUnit::type unit, const std::string& parent_counter_name = "");
 
+    // Only for create MemTracker(using profile's counter to calc consumption)
     std::shared_ptr AddSharedHighWaterMarkCounter(
         const std::string& name, TUnit::type unit,
         const std::string& parent_counter_name = "");

From 64df4769992c9555e0ae35c1e5383a8d22a109b2 Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Tue, 21 Jul 2020 18:10:04 +0800
Subject: [PATCH 09/12] Revert "[Need revert] temp fix ut"

This reverts commit 360deb36
---
 be/test/olap/schema_change_test.cpp | 2 +-
 run-ut.sh                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/be/test/olap/schema_change_test.cpp b/be/test/olap/schema_change_test.cpp
index 8367fa8bbaf653..3329b89b06a310 100644
--- a/be/test/olap/schema_change_test.cpp
+++ b/be/test/olap/schema_change_test.cpp
@@ -875,7 +875,7 @@ TEST_F(TestColumn, ConvertCharToHLL) {
     mutable_block.get_row(0, &mv_row_cursor);
 
     auto dst_slice = reinterpret_cast(mv_row_cursor.cell_ptr(1));
-    HyperLogLog hll(*dst_slice);
+    HyperLogLog hll(dst_slice->data);
     ASSERT_EQ(hll.estimate_cardinality(), 1);
 }
 }
diff --git a/run-ut.sh b/run-ut.sh
index f889b7907d1f0f..f657058d8b1f2c 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -25,7 +25,7 @@ export DORIS_HOME=${ROOT}
 
 . ${DORIS_HOME}/env.sh
 
-PARALLEL=32 #$[$(nproc)/4+1]
+PARALLEL=$[$(nproc)/4+1]
 
 # Check args
 usage() {

From 4ba559100590f9910e5aab5fd638cd314e03cb4b Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Thu, 30 Jul 2020 14:28:26 +0800
Subject: [PATCH 10/12] fix merge

---
 be/src/olap/base_compaction.cpp            |  2 +-
 be/src/olap/base_compaction.h              |  3 ++-
 be/src/olap/compaction.cpp                 |  8 +++----
 be/src/olap/compaction.h                   |  6 ++---
 be/src/olap/cumulative_compaction.cpp      |  2 +-
 be/src/olap/cumulative_compaction.h        |  2 +-
 be/src/olap/row_block.cpp                  |  2 +-
 be/src/olap/row_block.h                    |  3 ++-
 be/src/olap/rowset/alpha_rowset.cpp        |  2 +-
 be/src/olap/rowset/alpha_rowset.h          |  2 +-
 be/src/olap/rowset/alpha_rowset_reader.cpp |  2 +-
 be/src/olap/rowset/alpha_rowset_reader.h   |  4 ++--
 be/src/olap/rowset/beta_rowset.cpp         |  2 +-
 be/src/olap/rowset/beta_rowset.h           |  2 +-
 be/src/olap/rowset/beta_rowset_reader.cpp  |  3 ++-
 be/src/olap/rowset/beta_rowset_reader.h    |  5 ++--
 be/src/olap/rowset/column_data.cpp         | 28 ++++++++++++----------
 be/src/olap/rowset/column_data.h           |  8 ++++---
 be/src/olap/rowset/rowset.h                |  2 +-
 be/src/olap/rowset/segment_reader.cpp      |  2 +-
 be/src/olap/rowset/segment_reader.h        |  2 +-
 be/src/olap/storage_engine.cpp             | 10 ++++----
 be/src/olap/storage_engine.h               |  2 +-
 be/src/runtime/runtime_state.cpp           | 18 +++-----------
 be/src/runtime/runtime_state.h             | 26 ++++++++++----------
 be/src/runtime/vectorized_row_batch.cpp    |  4 ++--
 be/src/runtime/vectorized_row_batch.h      |  4 ++--
 27 files changed, 76 insertions(+), 80 deletions(-)

diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp
index 0ccb4cae7d47ef..2b6343fbf6fcbd 100644
--- a/be/src/olap/base_compaction.cpp
+++ b/be/src/olap/base_compaction.cpp
@@ -22,7 +22,7 @@
 namespace doris {
 
 BaseCompaction::BaseCompaction(TabletSharedPtr tablet, const std::string& label,
-                               MemTracker* parent_tracker)
+                               const std::shared_ptr& parent_tracker)
         : Compaction(tablet, label, parent_tracker) {}
 
 BaseCompaction::~BaseCompaction() { }
diff --git a/be/src/olap/base_compaction.h b/be/src/olap/base_compaction.h
index 9ea54a9275dfa7..58d662def2e8f2 100644
--- a/be/src/olap/base_compaction.h
+++ b/be/src/olap/base_compaction.h
@@ -29,7 +29,8 @@ namespace doris {
 
 class BaseCompaction : public Compaction {
 public:
-    BaseCompaction(TabletSharedPtr tablet, const std::string& label, MemTracker* parent_tracker);
+    BaseCompaction(TabletSharedPtr tablet, const std::string& label,
+                   const std::shared_ptr& parent_tracker);
     ~BaseCompaction() override;
 
     OLAPStatus compact() override;
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 5a74671fb20776..67b56f6e763172 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -27,9 +27,9 @@ namespace doris {
 
 Semaphore Compaction::_concurrency_sem;
 
-Compaction::Compaction(TabletSharedPtr tablet, const std::string& label, MemTracker* parent_tracker)
-        : _mem_tracker(-1, label, parent_tracker, true),
-          _readers_tracker(-1, "readers tracker", &_mem_tracker, true),
+Compaction::Compaction(TabletSharedPtr tablet, const std::string& label, const std::shared_ptr& parent_tracker)
+        : _mem_tracker(MemTracker::CreateTracker(-1, label, parent_tracker)),
+          _readers_tracker(MemTracker::CreateTracker(-1, "readers tracker", _mem_tracker)),
           _tablet(tablet),
           _input_rowsets_size(0),
           _input_row_num(0),
@@ -153,7 +153,7 @@ OLAPStatus Compaction::construct_output_rowset_writer() {
 OLAPStatus Compaction::construct_input_rowset_readers() {
     for (auto& rowset : _input_rowsets) {
         RowsetReaderSharedPtr rs_reader;
-        RETURN_NOT_OK(rowset->create_reader(&_readers_tracker, &rs_reader));
+        RETURN_NOT_OK(rowset->create_reader(_readers_tracker, &rs_reader));
         _input_rs_readers.push_back(std::move(rs_reader));
     }
     return OLAP_SUCCESS;
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index 9507cc2026ec5d..6c4b438aaeb260 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -44,7 +44,7 @@ class Merger;
 //  4. gc unused rowstes
 class Compaction {
 public:
-    Compaction(TabletSharedPtr tablet, const std::string& label, MemTracker* parent_tracker);
+    Compaction(TabletSharedPtr tablet, const std::string& label, const std::shared_ptr& parent_tracker);
     virtual ~Compaction();
 
     virtual OLAPStatus compact() = 0;
@@ -78,10 +78,10 @@ class Compaction {
 
 protected:
     // the root tracker for this compaction
-    MemTracker _mem_tracker;
+    std::shared_ptr _mem_tracker;
 
     // the child of root, only track rowset readers mem
-    MemTracker _readers_tracker;
+    std::shared_ptr _readers_tracker;
     TabletSharedPtr _tablet;
 
     std::vector _input_rowsets;
diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp
index 3494561e83c1d1..a5f135869b3072 100755
--- a/be/src/olap/cumulative_compaction.cpp
+++ b/be/src/olap/cumulative_compaction.cpp
@@ -23,7 +23,7 @@
 namespace doris {
 
 CumulativeCompaction::CumulativeCompaction(TabletSharedPtr tablet, const std::string& label,
-                                           MemTracker* parent_tracker)
+                                           const std::shared_ptr& parent_tracker)
         : Compaction(tablet, label, parent_tracker),
           _cumulative_rowset_size_threshold(config::cumulative_compaction_budgeted_bytes) {}
 
diff --git a/be/src/olap/cumulative_compaction.h b/be/src/olap/cumulative_compaction.h
index 49e5f2c2b3aff9..f32268d6617c73 100755
--- a/be/src/olap/cumulative_compaction.h
+++ b/be/src/olap/cumulative_compaction.h
@@ -27,7 +27,7 @@ namespace doris {
 class CumulativeCompaction : public Compaction {
 public:
     CumulativeCompaction(TabletSharedPtr tablet, const std::string& label,
-                         MemTracker* parent_tracker);
+                         const std::shared_ptr& parent_tracker);
     ~CumulativeCompaction() override;
 
     OLAPStatus compact() override;
diff --git a/be/src/olap/row_block.cpp b/be/src/olap/row_block.cpp
index ce701029868eb3..f50acf5d347fba 100644
--- a/be/src/olap/row_block.cpp
+++ b/be/src/olap/row_block.cpp
@@ -40,7 +40,7 @@ namespace doris {
 RowBlock::RowBlock(const TabletSchema* schema, const std::shared_ptr& parent_tracker) :
         _capacity(0),
         _schema(schema) {
-    _tracker = MemTracker::CreateMemTracker(-1, "RowBlock", parent_tracker);
+    _tracker = MemTracker::CreateTracker(-1, "RowBlock", parent_tracker);
     _mem_pool.reset(new MemPool(_tracker.get()));
 }
 
diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h
index 214c84936b0ec2..20e935444a535a 100644
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@@ -56,7 +56,8 @@ class RowBlock {
     friend class RowBlockChanger;
     friend class VectorizedRowBatch;
 public:
-    RowBlock(const TabletSchema* schema, MemTracker* parent_tracker = nullptr);
+    RowBlock(const TabletSchema* schema,
+             const std::shared_ptr& parent_tracker = std::shared_ptr());
 
     // 注意回收内部buffer
     ~RowBlock();
diff --git a/be/src/olap/rowset/alpha_rowset.cpp b/be/src/olap/rowset/alpha_rowset.cpp
index 337a049b62efe5..138b9dbd3a07a7 100644
--- a/be/src/olap/rowset/alpha_rowset.cpp
+++ b/be/src/olap/rowset/alpha_rowset.cpp
@@ -57,7 +57,7 @@ OLAPStatus AlphaRowset::create_reader(std::shared_ptr* result) {
     return OLAP_SUCCESS;
 }
 
-OLAPStatus AlphaRowset::create_reader(MemTracker* parent_tracker,
+OLAPStatus AlphaRowset::create_reader(const std::shared_ptr& parent_tracker,
                                       std::shared_ptr* result) {
     result->reset(new AlphaRowsetReader(_schema->num_rows_per_row_block(),
                                         std::static_pointer_cast(shared_from_this()),
diff --git a/be/src/olap/rowset/alpha_rowset.h b/be/src/olap/rowset/alpha_rowset.h
index bff9618dfa76fe..3c21e7dd21dbf6 100644
--- a/be/src/olap/rowset/alpha_rowset.h
+++ b/be/src/olap/rowset/alpha_rowset.h
@@ -42,7 +42,7 @@ class AlphaRowset : public Rowset {
 
     OLAPStatus create_reader(std::shared_ptr* result) override;
 
-    OLAPStatus create_reader(MemTracker* parent_tracker,
+    OLAPStatus create_reader(const std::shared_ptr& parent_tracker,
                              std::shared_ptr* result) override;
 
     OLAPStatus split_range(const RowCursor& start_key, const RowCursor& end_key,
diff --git a/be/src/olap/rowset/alpha_rowset_reader.cpp b/be/src/olap/rowset/alpha_rowset_reader.cpp
index 906eeeba63b039..361b945e3b87db 100644
--- a/be/src/olap/rowset/alpha_rowset_reader.cpp
+++ b/be/src/olap/rowset/alpha_rowset_reader.cpp
@@ -22,7 +22,7 @@
 namespace doris {
 
 AlphaRowsetReader::AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset,
-                                     MemTracker* parent_tracker)
+                                     const std::shared_ptr& parent_tracker)
         : _num_rows_per_row_block(num_rows_per_row_block),
           _rowset(std::move(rowset)),
           _parent_tracker(parent_tracker),
diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h
index 222523a4b3dec1..711f4c2a60c01b 100644
--- a/be/src/olap/rowset/alpha_rowset_reader.h
+++ b/be/src/olap/rowset/alpha_rowset_reader.h
@@ -53,7 +53,7 @@ struct AlphaMergeContextComparator {
 class AlphaRowsetReader : public RowsetReader {
 public:
     AlphaRowsetReader(int num_rows_per_row_block, AlphaRowsetSharedPtr rowset,
-                      MemTracker* parent_tracker = nullptr);
+                      const std::shared_ptr& parent_tracker = nullptr);
 
     ~AlphaRowsetReader() override;
 
@@ -104,7 +104,7 @@ class AlphaRowsetReader : public RowsetReader {
 private:
     int _num_rows_per_row_block;
     AlphaRowsetSharedPtr _rowset;
-    MemTracker* _parent_tracker;
+    std::shared_ptr _parent_tracker;
     std::string _rowset_path;
     AlphaRowsetMeta* _alpha_rowset_meta;
     const std::vector>& _segment_groups;
diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp
index 784872a8fccb7c..cc531493dd677c 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -67,7 +67,7 @@ OLAPStatus BetaRowset::create_reader(RowsetReaderSharedPtr* result) {
     return OLAP_SUCCESS;
 }
 
-OLAPStatus BetaRowset::create_reader(MemTracker* parent_tracker, std::shared_ptr* result) {
+OLAPStatus BetaRowset::create_reader(const std::shared_ptr& parent_tracker, std::shared_ptr* result) {
     // NOTE: We use std::static_pointer_cast for performance
     result->reset(new BetaRowsetReader(std::static_pointer_cast(shared_from_this()), parent_tracker));
     return OLAP_SUCCESS;
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 6704fd5e8ec34d..b85e8750b7609f 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -39,7 +39,7 @@ class BetaRowset : public Rowset {
 
     OLAPStatus create_reader(RowsetReaderSharedPtr* result) override;
 
-    OLAPStatus create_reader(MemTracker* parent_tracker,
+    OLAPStatus create_reader(const std::shared_ptr& parent_tracker,
                              std::shared_ptr* result) override;
 
     static std::string segment_file_path(const std::string& segment_dir, const RowsetId& rowset_id,
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index dbc2ea16b00c3f..2f6ce767643804 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -27,7 +27,8 @@
 
 namespace doris {
 
-BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset, MemTracker* parent_tracker)
+BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset,
+                                   const std::shared_ptr& parent_tracker)
         : _rowset(std::move(rowset)), _stats(&_owned_stats), _parent_tracker(parent_tracker) {
     _rowset->aquire();
 }
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index 368354085bc69e..9ad2de4f81f86c 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -29,7 +29,8 @@ namespace doris {
 
 class BetaRowsetReader : public RowsetReader {
 public:
-    BetaRowsetReader(BetaRowsetSharedPtr rowset, MemTracker* parent_tracker = nullptr);
+    BetaRowsetReader(BetaRowsetSharedPtr rowset,
+                     const std::shared_ptr& parent_tracker = nullptr);
 
     ~BetaRowsetReader() override { _rowset->release(); }
 
@@ -56,7 +57,7 @@ class BetaRowsetReader : public RowsetReader {
     OlapReaderStatistics _owned_stats;
     OlapReaderStatistics* _stats;
 
-    MemTracker* _parent_tracker;
+    std::shared_ptr _parent_tracker;
 
     std::unique_ptr _iterator;
 
diff --git a/be/src/olap/rowset/column_data.cpp b/be/src/olap/rowset/column_data.cpp
index e33025559c11f7..b1b15cba5ae6f1 100644
--- a/be/src/olap/rowset/column_data.cpp
+++ b/be/src/olap/rowset/column_data.cpp
@@ -24,23 +24,25 @@
 
 namespace doris {
 
-ColumnData* ColumnData::create(SegmentGroup* segment_group, MemTracker* parent_tracker) {
+ColumnData* ColumnData::create(SegmentGroup* segment_group,
+                               const std::shared_ptr& parent_tracker) {
     ColumnData* data = new (std::nothrow) ColumnData(segment_group, parent_tracker);
     return data;
 }
 
-ColumnData::ColumnData(SegmentGroup* segment_group, MemTracker* parent_tracker)
-      : _segment_group(segment_group),
-        _parent_tracker(parent_tracker),
-        _eof(false),
-        _conditions(nullptr),
-        _col_predicates(nullptr),
-        _delete_status(DEL_NOT_SATISFIED),
-        _runtime_state(nullptr),
-        _schema(segment_group->get_tablet_schema()),
-        _is_using_cache(false),
-        _segment_reader(nullptr),
-        _lru_cache(nullptr) {
+ColumnData::ColumnData(SegmentGroup* segment_group,
+                       const std::shared_ptr& parent_tracker)
+        : _segment_group(segment_group),
+          _parent_tracker(parent_tracker),
+          _eof(false),
+          _conditions(nullptr),
+          _col_predicates(nullptr),
+          _delete_status(DEL_NOT_SATISFIED),
+          _runtime_state(nullptr),
+          _schema(segment_group->get_tablet_schema()),
+          _is_using_cache(false),
+          _segment_reader(nullptr),
+          _lru_cache(nullptr) {
     if (StorageEngine::instance() != nullptr) {
         _lru_cache = StorageEngine::instance()->index_stream_lru_cache();
     } else {
diff --git a/be/src/olap/rowset/column_data.h b/be/src/olap/rowset/column_data.h
index 3363a6cadbe55a..7124dda1408a8b 100644
--- a/be/src/olap/rowset/column_data.h
+++ b/be/src/olap/rowset/column_data.h
@@ -40,8 +40,10 @@ class SegmentReader;
 // This class is column data reader. this class will be used in two case.
 class ColumnData {
 public:
-    static ColumnData* create(SegmentGroup* segment_group, MemTracker* parent_tracker = nullptr);
-    ColumnData(SegmentGroup* segment_group, MemTracker* parent_tracker = nullptr);
+    static ColumnData* create(SegmentGroup* segment_group,
+                              const std::shared_ptr& parent_tracker = nullptr);
+    ColumnData(SegmentGroup* segment_group,
+               const std::shared_ptr& parent_tracker = nullptr);
     ~ColumnData();
 
     // 为了与之前兼容, 暴露部分index的接口
@@ -155,7 +157,7 @@ class ColumnData {
     }
 private:
     SegmentGroup* _segment_group;
-    MemTracker* _parent_tracker;
+    std::shared_ptr _parent_tracker;
     // 当到达文件末尾或者到达end key时设置此标志
     bool _eof;
     const Conditions* _conditions;
diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h
index 07a1b9d530512c..aac920c2f447c1 100644
--- a/be/src/olap/rowset/rowset.h
+++ b/be/src/olap/rowset/rowset.h
@@ -120,7 +120,7 @@ class Rowset : public std::enable_shared_from_this {
     virtual OLAPStatus create_reader(std::shared_ptr* result) = 0;
 
     // Support adding parent tracker, but should be careful about destruction sequence.
-    virtual OLAPStatus create_reader(MemTracker* parent_tracker,
+    virtual OLAPStatus create_reader(const std::shared_ptr& parent_tracker,
                                      std::shared_ptr* result) = 0;
 
     // Split range denoted by `start_key` and `end_key` into sub-ranges, each contains roughly
diff --git a/be/src/olap/rowset/segment_reader.cpp b/be/src/olap/rowset/segment_reader.cpp
index 014b09b53deecf..b01f476c3145d7 100644
--- a/be/src/olap/rowset/segment_reader.cpp
+++ b/be/src/olap/rowset/segment_reader.cpp
@@ -58,7 +58,7 @@ SegmentReader::SegmentReader(const std::string file, SegmentGroup* segment_group
           _is_using_mmap(false),
           _is_data_loaded(false),
           _buffer_size(0),
-          _tracker(MemTracker::CreateMemTracker(-1, "SegmentReader", parent_tracker)),
+          _tracker(MemTracker::CreateTracker(-1, "SegmentReader", parent_tracker)),
           _mem_pool(new MemPool(_tracker.get())),
           _shared_buffer(NULL),
           _lru_cache(lru_cache),
diff --git a/be/src/olap/rowset/segment_reader.h b/be/src/olap/rowset/segment_reader.h
index 1a6ecb909cc388..f2b723792dcd31 100644
--- a/be/src/olap/rowset/segment_reader.h
+++ b/be/src/olap/rowset/segment_reader.h
@@ -53,7 +53,7 @@ class SegmentReader {
                   const std::set& load_bf_columns, const Conditions* conditions,
                   const DeleteHandler* delete_handler, const DelCondSatisfied delete_status,
                   Cache* lru_cache, RuntimeState* runtime_state, OlapReaderStatistics* stats,
-                  MemTracker* parent_tracker = nullptr);
+                  const std::shared_ptr& parent_tracker = nullptr);
 
     ~SegmentReader();
 
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 09ddb098b993f8..3472e9e502d7be 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -109,7 +109,7 @@ StorageEngine::StorageEngine(const EngineOptions& options)
           _is_all_cluster_id_exist(true),
           _index_stream_lru_cache(NULL),
           _file_cache(nullptr),
-          _compaction_mem_tracker(-1, "compaction mem tracker(unlimited)"),
+          _compaction_mem_tracker(MemTracker::CreateTracker(-1, "compaction mem tracker(unlimited)")),
           _tablet_manager(new TabletManager(config::tablet_map_shard_size)),
           _txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)),
           _rowset_id_generator(new UniqueRowsetIdGenerator(options.backend_uid)),
@@ -125,9 +125,9 @@ StorageEngine::StorageEngine(const EngineOptions& options)
         return _unused_rowsets.size();
     });
     REGISTER_GAUGE_DORIS_METRIC(compaction_mem_current_consumption, [this]() {
-        return _compaction_mem_tracker.consumption();
+        return _compaction_mem_tracker->consumption();
         // We can get each compaction's detail usage
-        LOG(INFO) << _compaction_mem_tracker.LogUsage(2);
+        // LOG(INFO) << _compaction_mem_tracker=>LogUsage(2);
     });
 }
 
@@ -539,7 +539,7 @@ void StorageEngine::_perform_cumulative_compaction(DataDir* data_dir) {
     DorisMetrics::instance()->cumulative_compaction_request_total.increment(1);
 
     std::string tracker_label = "cumulative compaction " + std::to_string(syscall(__NR_gettid));
-    CumulativeCompaction cumulative_compaction(best_tablet, tracker_label, &_compaction_mem_tracker);
+    CumulativeCompaction cumulative_compaction(best_tablet, tracker_label, _compaction_mem_tracker);
 
     OLAPStatus res = cumulative_compaction.compact();
     if (res != OLAP_SUCCESS) {
@@ -575,7 +575,7 @@ void StorageEngine::_perform_base_compaction(DataDir* data_dir) {
     DorisMetrics::instance()->base_compaction_request_total.increment(1);
 
     std::string tracker_label = "base compaction " + std::to_string(syscall(__NR_gettid));
-    BaseCompaction base_compaction(best_tablet, tracker_label, &_compaction_mem_tracker);
+    BaseCompaction base_compaction(best_tablet, tracker_label, _compaction_mem_tracker);
     OLAPStatus res = base_compaction.compact();
     if (res != OLAP_SUCCESS) {
         best_tablet->set_last_base_compaction_failure_time(UnixMillis());
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index d8d97dde92b5a7..56ac12fd528c10 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -316,7 +316,7 @@ class StorageEngine {
     // map, if we use RowsetId as the key, we need custom hash func
     std::unordered_map _unused_rowsets;
 
-    MemTracker _compaction_mem_tracker;
+    std::shared_ptr _compaction_mem_tracker;
 
     bool _stop_bg_worker = false;
     std::thread _unused_rowset_monitor_thread;
diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp
index adc8674337444e..15781e7cb99717 100644
--- a/be/src/runtime/runtime_state.cpp
+++ b/be/src/runtime/runtime_state.cpp
@@ -51,11 +51,11 @@ RuntimeState::RuntimeState(
         const TUniqueId& fragment_instance_id,
         const TQueryOptions& query_options,
         const TQueryGlobals& query_globals, ExecEnv* exec_env) :
+            _fragment_mem_tracker(nullptr),
             _profile("Fragment " + print_id(fragment_instance_id)),
             _obj_pool(new ObjectPool()),
             _data_stream_recvrs_pool(new ObjectPool()),
             _unreported_error_idx(0),
-            _fragment_mem_tracker(nullptr),
             _is_cancelled(false),
             _per_fragment_instance_idx(0),
             _root_node_id(-1),
@@ -76,12 +76,12 @@ RuntimeState::RuntimeState(
         const TExecPlanFragmentParams& fragment_params,
         const TQueryOptions& query_options,
         const TQueryGlobals& query_globals, ExecEnv* exec_env) :
+            _fragment_mem_tracker(nullptr),
             _profile("Fragment " + print_id(fragment_params.params.fragment_instance_id)),
             _obj_pool(new ObjectPool()),
             _data_stream_recvrs_pool(new ObjectPool()),
             _unreported_error_idx(0),
             _query_id(fragment_params.params.query_id),
-            _fragment_mem_tracker(nullptr),
             _is_cancelled(false),
             _per_fragment_instance_idx(0),
             _root_node_id(-1),
@@ -153,18 +153,6 @@ RuntimeState::~RuntimeState() {
     if (_exec_env != nullptr && _exec_env->thread_mgr() != nullptr) {
         _exec_env->thread_mgr()->unregister_pool(_resource_pool);
     }
-
-#ifndef BE_TEST
-    // TODO: cleanup this comment
-    // _query_mem_tracker must be valid as long as _instance_mem_tracker is so
-    // delete _instance_mem_tracker first.
-    // LogUsage() walks the MemTracker tree top-down when the memory limit is exceeded.
-    // Break the link between the instance_mem_tracker and its parent (_query_mem_tracker)
-    // before the _instance_mem_tracker and its children are destroyed.
-
-    _instance_mem_tracker.reset();
-    _query_mem_tracker.reset();
-#endif
 }
 
 Status RuntimeState::init(
@@ -260,7 +248,7 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) {
 }
 
 Status RuntimeState::init_instance_mem_tracker() {
-    _instance_mem_tracker.reset(new MemTracker(-1));
+    _instance_mem_tracker = MemTracker::CreateTracker(-1);
     return Status::OK();
 }
 
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index e0f7e89a5dce2d..8fc07e76ab0e47 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -506,6 +506,19 @@ class RuntimeState {
 
     static const int DEFAULT_BATCH_SIZE = 2048;
 
+    // all mem limits that apply to this query
+    std::vector> _mem_trackers;
+
+    // Fragment memory limit.  Also contained in _mem_trackers
+    std::shared_ptr _fragment_mem_tracker;
+
+    // MemTracker that is shared by all fragment instances running on this host.
+    // The query mem tracker must be released after the _instance_mem_tracker.
+    std::shared_ptr _query_mem_tracker;
+
+    // Memory usage of this fragment instance
+    std::shared_ptr _instance_mem_tracker;
+
     // put runtime state before _obj_pool, so that it will be deconstructed after
     // _obj_pool. Because some of object in _obj_pool will use profile when deconstructing.
     RuntimeProfile _profile;
@@ -549,19 +562,6 @@ class RuntimeState {
     // state is responsible for returning this pool to the thread mgr.
     ThreadResourceMgr::ResourcePool* _resource_pool;
 
-    // all mem limits that apply to this query
-    std::vector> _mem_trackers;
-
-    // Fragment memory limit.  Also contained in _mem_trackers
-    std::shared_ptr _fragment_mem_tracker;
-
-    // MemTracker that is shared by all fragment instances running on this host.
-    // The query mem tracker must be released after the _instance_mem_tracker.
-    std::shared_ptr _query_mem_tracker;
-
-    // Memory usage of this fragment instance
-    std::shared_ptr _instance_mem_tracker;
-
     // if true, execution should stop with a CANCELLED status
     bool _is_cancelled;
 
diff --git a/be/src/runtime/vectorized_row_batch.cpp b/be/src/runtime/vectorized_row_batch.cpp
index 60b309c8bf0021..68ae2cc0980bdf 100644
--- a/be/src/runtime/vectorized_row_batch.cpp
+++ b/be/src/runtime/vectorized_row_batch.cpp
@@ -24,12 +24,12 @@ namespace doris {
 
 VectorizedRowBatch::VectorizedRowBatch(const TabletSchema* schema,
                                        const std::vector& cols, int capacity,
-                                       MemTracker* parent_tracker)
+                                       const std::shared_ptr& parent_tracker)
         : _schema(schema), _cols(cols), _capacity(capacity), _limit(capacity) {
     _selected_in_use = false;
     _size = 0;
 
-    _tracker.reset(new MemTracker(-1, "VectorizedRowBatch", parent_tracker, true));
+    _tracker = MemTracker::CreateTracker(-1, "VectorizedRowBatch", parent_tracker);
     _mem_pool.reset(new MemPool(_tracker.get()));
 
     _selected = reinterpret_cast(new char[sizeof(uint16_t) * _capacity]);
diff --git a/be/src/runtime/vectorized_row_batch.h b/be/src/runtime/vectorized_row_batch.h
index aef23ae701e358..47f5cbe553bf6b 100644
--- a/be/src/runtime/vectorized_row_batch.h
+++ b/be/src/runtime/vectorized_row_batch.h
@@ -73,7 +73,7 @@ class ColumnVector {
 class VectorizedRowBatch {
 public:
     VectorizedRowBatch(const TabletSchema* schema, const std::vector& cols, int capacity,
-                       MemTracker* parent_tracker = nullptr);
+                       const std::shared_ptr& parent_tracker = std::shared_ptr());
 
     ~VectorizedRowBatch() {
         for (auto vec: _col_vectors) {
@@ -147,7 +147,7 @@ class VectorizedRowBatch {
     bool _selected_in_use = false;
     uint8_t _block_status;
 
-    std::unique_ptr _tracker;
+    std::shared_ptr _tracker;
     std::unique_ptr _mem_pool;
     uint16_t _limit;
 };

From 2cb6999fad417560b306e90b210a03f4cf1db8fd Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Thu, 30 Jul 2020 17:08:44 +0800
Subject: [PATCH 11/12] [] fix profile add high water mark counter

---
 be/src/util/runtime_profile.cpp | 17 ++++++++++++-----
 be/src/util/runtime_profile.h   |  2 +-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp
index 74d1d176a7244e..2208ede1f79c29 100644
--- a/be/src/util/runtime_profile.cpp
+++ b/be/src/util/runtime_profile.cpp
@@ -383,17 +383,24 @@ ADD_COUNTER_IMPL(AddHighWaterMarkCounter, HighWaterMarkCounter);
 //ADD_COUNTER_IMPL(AddConcurrentTimerCounter, ConcurrentTimerCounter);
 
 std::shared_ptr RuntimeProfile::AddSharedHighWaterMarkCounter(
-    const std::string& name, TUnit::type unit, const std::string& parent_counter_name) {
+        const std::string& name, TUnit::type unit, const std::string& parent_counter_name) {
     DCHECK_EQ(_is_averaged_profile, false);
     boost::lock_guard l(_counter_map_lock);
-    DCHECK(_counter_map.find(name) == _counter_map.end());
+    if (_shared_counter_pool.find(name) != _shared_counter_pool.end()) {
+        return _shared_counter_pool[name];
+    }
     DCHECK(parent_counter_name == ROOT_COUNTER ||
-        _counter_map.find(parent_counter_name) != _counter_map.end());
+           _counter_map.find(parent_counter_name) != _counter_map.end());
     std::shared_ptr counter = std::make_shared(unit);
-    _shared_counter_pool.push_back(counter);
+    _shared_counter_pool[name] = counter;
+
+    DCHECK(_counter_map.find(name) == _counter_map.end())
+            << "already has a raw counter named " << name;
+
+    // it's OK to insert shared counter to _counter_map, cuz _counter_map is not the owner of counters
     _counter_map[name] = counter.get();
     std::set* child_counters =
-        find_or_insert(&_child_counter_map, parent_counter_name, std::set());
+            find_or_insert(&_child_counter_map, parent_counter_name, std::set());
     child_counters->insert(name);
     return counter;
 }
diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
index d96dc60b704119..ee126dcc5ad627 100644
--- a/be/src/util/runtime_profile.h
+++ b/be/src/util/runtime_profile.h
@@ -486,7 +486,7 @@ class RuntimeProfile {
     std::unique_ptr _pool;
 
     // Pool for allocated counters. These counters are shared with some other objects.
-    std::vector> _shared_counter_pool;
+    std::map> _shared_counter_pool;
 
     // True if we have to delete the _pool on destruction.
     bool _own_pool;

From 9f79bf096de36536af1dfb21f531191ecb5797f8 Mon Sep 17 00:00:00 2001
From: huangwei 
Date: Fri, 31 Jul 2020 14:15:06 +0800
Subject: [PATCH 12/12] [] use const&

---
 be/src/exec/hash_table.cpp            |  2 +-
 be/src/exec/hash_table.h              |  2 +-
 be/src/exec/merge_join_node.h         |  9 ++++---
 be/src/exec/olap_scanner.cpp          |  2 +-
 be/src/exec/partitioned_hash_table.cc | 39 ++++++++++++++-------------
 be/src/exec/partitioned_hash_table.h  | 28 +++++++++----------
 be/src/exec/sort_exec_exprs.cpp       |  2 +-
 be/src/exec/sort_exec_exprs.h         |  3 ++-
 be/src/exec/tablet_info.cpp           |  2 +-
 be/src/exprs/agg_fn_evaluator.cpp     |  2 +-
 be/src/exprs/agg_fn_evaluator.h       |  2 +-
 be/src/exprs/expr.cpp                 | 26 +++++++++---------
 be/src/exprs/expr.h                   | 17 +++++++-----
 be/src/exprs/expr_context.cpp         |  2 +-
 be/src/exprs/expr_context.h           |  2 +-
 be/src/exprs/new_agg_fn_evaluator.cc  | 37 +++++++++++++------------
 be/src/exprs/new_agg_fn_evaluator.h   |  6 ++---
 be/src/http/default_path_handlers.cpp | 12 +++++----
 be/src/http/default_path_handlers.h   |  5 ++--
 be/src/olap/delta_writer.cpp          |  6 +++--
 be/src/olap/delta_writer.h            |  6 +++--
 be/src/olap/memtable.cpp              |  3 ++-
 be/src/olap/memtable.h                |  3 ++-
 be/src/olap/row_block.h               |  2 +-
 be/src/runtime/mysql_table_sink.cpp   | 14 +++++-----
 be/src/runtime/vectorized_row_batch.h |  2 +-
 be/src/util/arrow/row_batch.cpp       |  4 +--
 27 files changed, 130 insertions(+), 110 deletions(-)

diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp
index c4136eda8931ca..48b70b52f8b15a 100644
--- a/be/src/exec/hash_table.cpp
+++ b/be/src/exec/hash_table.cpp
@@ -33,7 +33,7 @@ HashTable::HashTable(const vector& build_expr_ctxs,
                      int num_build_tuples, bool stores_nulls, 
                      const std::vector& finds_nulls,
                      int32_t initial_seed,
-                     std::shared_ptr mem_tracker, int64_t num_buckets) :
+                     const std::shared_ptr& mem_tracker, int64_t num_buckets) :
         _build_expr_ctxs(build_expr_ctxs),
         _probe_expr_ctxs(probe_expr_ctxs),
         _num_build_tuples(num_build_tuples),
diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h
index f06ff07972a0b4..010c9b7d9644ec 100644
--- a/be/src/exec/hash_table.h
+++ b/be/src/exec/hash_table.h
@@ -94,7 +94,7 @@ class HashTable {
         int num_build_tuples, bool stores_nulls, 
         const std::vector& finds_nulls,
         int32_t initial_seed,
-        std::shared_ptr mem_tracker,
+        const std::shared_ptr& mem_tracker,
         int64_t num_buckets);
 
     ~HashTable();
diff --git a/be/src/exec/merge_join_node.h b/be/src/exec/merge_join_node.h
index 7dfe65d9c974c5..1dab87fc2b9bd2 100644
--- a/be/src/exec/merge_join_node.h
+++ b/be/src/exec/merge_join_node.h
@@ -66,9 +66,12 @@ class MergeJoinNode : public ExecNode {
         int row_idx;
         bool is_eos;
         TupleRow* current_row;
-        ChildReaderContext(const RowDescriptor& desc, int batch_size, std::shared_ptr mem_tracker) :
-            batch(desc, batch_size, mem_tracker.get()), row_idx(0), is_eos(false), current_row(NULL) {
-        }
+        ChildReaderContext(const RowDescriptor& desc, int batch_size,
+                           const std::shared_ptr& mem_tracker)
+                : batch(desc, batch_size, mem_tracker.get()),
+                  row_idx(0),
+                  is_eos(false),
+                  current_row(NULL) {}
     };
     // _left_batch must be cleared before calling get_next().  used cache child(0)'s data
     // _rigth_batch must be cleared before calling get_next().  used cache child(1)'s data
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index 488875753395b6..a9bc32e7c72b89 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -238,7 +238,7 @@ Status OlapScanner::get_batch(
     bzero(tuple_buf, state->batch_size() * _tuple_desc->byte_size());
     Tuple *tuple = reinterpret_cast(tuple_buf);
 
-    std::unique_ptr tracker(new MemTracker(state->fragment_mem_tracker()->limit()));
+    auto tracker = MemTracker::CreateTracker(state->fragment_mem_tracker()->limit(), "OlapScanner");
     std::unique_ptr mem_pool(new MemPool(tracker.get()));
 
     int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num;
diff --git a/be/src/exec/partitioned_hash_table.cc b/be/src/exec/partitioned_hash_table.cc
index e92ff486a2a65f..e8ab74a2f84d93 100644
--- a/be/src/exec/partitioned_hash_table.cc
+++ b/be/src/exec/partitioned_hash_table.cc
@@ -78,21 +78,24 @@ static int64_t NULL_VALUE[] = {
 };
 
 PartitionedHashTableCtx::PartitionedHashTableCtx(const std::vector& build_exprs,
-    const std::vector& probe_exprs, bool stores_nulls,
-    const std::vector& finds_nulls, int32_t initial_seed,
-    int max_levels, MemPool* mem_pool, MemPool* expr_results_pool, std::shared_ptr tracker)
-    : tracker_(tracker),
-      build_exprs_(build_exprs),
-      probe_exprs_(probe_exprs),
-      stores_nulls_(stores_nulls),
-      finds_nulls_(finds_nulls),
-      finds_some_nulls_(std::accumulate(
-          finds_nulls_.begin(), finds_nulls_.end(), false, std::logical_or())),
-      level_(0),
-      scratch_row_(NULL),
-      mem_pool_(mem_pool),
-      expr_results_pool_(expr_results_pool) {
-  DCHECK(tracker_ != nullptr);
+                                                 const std::vector& probe_exprs,
+                                                 bool stores_nulls,
+                                                 const std::vector& finds_nulls,
+                                                 int32_t initial_seed, int max_levels,
+                                                 MemPool* mem_pool, MemPool* expr_results_pool,
+                                                 const std::shared_ptr& tracker)
+        : tracker_(tracker),
+          build_exprs_(build_exprs),
+          probe_exprs_(probe_exprs),
+          stores_nulls_(stores_nulls),
+          finds_nulls_(finds_nulls),
+          finds_some_nulls_(std::accumulate(finds_nulls_.begin(), finds_nulls_.end(), false,
+                                            std::logical_or())),
+          level_(0),
+          scratch_row_(NULL),
+          mem_pool_(mem_pool),
+          expr_results_pool_(expr_results_pool) {
+    DCHECK(tracker_ != nullptr);
   DCHECK(!finds_some_nulls_ || stores_nulls_);
   // Compute the layout and buffer size to store the evaluated expr results
   DCHECK_EQ(build_exprs_.size(), probe_exprs_.size());
@@ -150,7 +153,7 @@ Status PartitionedHashTableCtx::Create(ObjectPool* pool, RuntimeState* state,
     const std::vector& probe_exprs, bool stores_nulls,
     const std::vector& finds_nulls, int32_t initial_seed, int max_levels,
     int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, 
-    std::shared_ptr tracker, const RowDescriptor& row_desc,
+    const std::shared_ptr& tracker, const RowDescriptor& row_desc,
     const RowDescriptor& row_desc_probe,
     scoped_ptr* ht_ctx) {
   ht_ctx->reset(new PartitionedHashTableCtx(build_exprs, probe_exprs, stores_nulls,
@@ -314,7 +317,7 @@ PartitionedHashTableCtx::ExprValuesCache::ExprValuesCache()
     null_bitmap_(0) {}
 
 Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state,
-    std::shared_ptr tracker, const std::vector& build_exprs) {
+    const std::shared_ptr& tracker, const std::vector& build_exprs) {
   // Initialize the number of expressions.
   num_exprs_ = build_exprs.size();
   // Compute the layout of evaluated values of a row.
@@ -358,7 +361,7 @@ Status PartitionedHashTableCtx::ExprValuesCache::Init(RuntimeState* state,
   return Status::OK();
 }
 
-void PartitionedHashTableCtx::ExprValuesCache::Close(std::shared_ptr tracker) {
+void PartitionedHashTableCtx::ExprValuesCache::Close(const std::shared_ptr& tracker) {
   if (capacity_ == 0) return;
   cur_expr_values_ = NULL;
   cur_expr_values_null_ = NULL;
diff --git a/be/src/exec/partitioned_hash_table.h b/be/src/exec/partitioned_hash_table.h
index 12dc00c2c55f0e..13209d37c85720 100644
--- a/be/src/exec/partitioned_hash_table.h
+++ b/be/src/exec/partitioned_hash_table.h
@@ -110,16 +110,16 @@ class PartitionedHashTableCtx {
   /// evaluators for the build and probe expressions will also be allocated.
   /// Please see the comments of HashTableCtx constructor and Init() for details
   /// of other parameters.
-  static Status Create(ObjectPool* pool, RuntimeState* state,
-      const std::vector& build_exprs,
-      const std::vector& probe_exprs, bool stores_nulls,
-      const std::vector& finds_nulls, int32_t initial_seed, int max_levels,
-      int num_build_tuples, MemPool* mem_pool, MemPool* expr_results_pool, 
-      std::shared_ptr tracker, const RowDescriptor& row_desc,
-      const RowDescriptor& row_desc_probe,
-      boost::scoped_ptr* ht_ctx);
-
-  /// Initialize the build and probe expression evaluators.
+     static Status Create(ObjectPool* pool, RuntimeState* state,
+                          const std::vector& build_exprs,
+                          const std::vector& probe_exprs, bool stores_nulls,
+                          const std::vector& finds_nulls, int32_t initial_seed,
+                          int max_levels, int num_build_tuples, MemPool* mem_pool,
+                          MemPool* expr_results_pool, const std::shared_ptr& tracker,
+                          const RowDescriptor& row_desc, const RowDescriptor& row_desc_probe,
+                          boost::scoped_ptr* ht_ctx);
+
+     /// Initialize the build and probe expression evaluators.
   Status Open(RuntimeState* state);
 
   /// Call to cleanup any resources allocated by the expression evaluators.
@@ -211,12 +211,12 @@ class PartitionedHashTableCtx {
     /// Allocates memory and initializes various data structures. Return error status
     /// if memory allocation leads to the memory limits of the exec node to be exceeded.
     /// 'tracker' is the memory tracker of the exec node which owns this PartitionedHashTableCtx.
-    Status Init(RuntimeState* state, std::shared_ptr tracker,
-        const std::vector& build_exprs);
+    Status Init(RuntimeState* state, const std::shared_ptr& tracker,
+                const std::vector& build_exprs);
 
     /// Frees up various resources and updates memory tracker with proper accounting.
     /// 'tracker' should be the same memory tracker which was passed in for Init().
-    void Close(std::shared_ptr tracker);
+    void Close(const std::shared_ptr& tracker);
 
     /// Resets the cache states (iterators, end pointers etc) before writing.
     void Reset() noexcept;
@@ -385,7 +385,7 @@ class PartitionedHashTableCtx {
                           const std::vector& probe_exprs, bool stores_nulls,
                           const std::vector& finds_nulls, int32_t initial_seed,
                           int max_levels, MemPool* mem_pool, MemPool* expr_results_pool,
-                          std::shared_ptr tracker);
+                          const std::shared_ptr& tracker);
 
   /// Allocate various buffers for storing expression evaluation results, hash values,
   /// null bits etc. Also allocate evaluators for the build and probe expressions and
diff --git a/be/src/exec/sort_exec_exprs.cpp b/be/src/exec/sort_exec_exprs.cpp
index 3780ce387e9e82..3c3c52e11bc143 100644
--- a/be/src/exec/sort_exec_exprs.cpp
+++ b/be/src/exec/sort_exec_exprs.cpp
@@ -50,7 +50,7 @@ Status SortExecExprs::init(const std::vector& lhs_ordering_expr_ct
 
 Status SortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc,
                               const RowDescriptor& output_row_desc,
-                              std::shared_ptr expr_mem_tracker) {
+                              const std::shared_ptr& expr_mem_tracker) {
     if (_materialize_tuple) {
         RETURN_IF_ERROR(Expr::prepare(
                 _sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker));
diff --git a/be/src/exec/sort_exec_exprs.h b/be/src/exec/sort_exec_exprs.h
index 070703036b31d3..91cb03ce0ffb41 100644
--- a/be/src/exec/sort_exec_exprs.h
+++ b/be/src/exec/sort_exec_exprs.h
@@ -44,7 +44,8 @@ class SortExecExprs {
 
     // prepare all expressions used for sorting and tuple materialization.
     Status prepare(RuntimeState* state, const RowDescriptor& child_row_desc,
-                   const RowDescriptor& output_row_desc, std::shared_ptr mem_tracker);
+                   const RowDescriptor& output_row_desc,
+                   const std::shared_ptr& mem_tracker);
 
     // open all expressions used for sorting and tuple materialization.
     Status open(RuntimeState* state);
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index 98026d3d021936..703d941d993716 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -150,7 +150,7 @@ OlapTablePartitionParam::OlapTablePartitionParam(
         std::shared_ptr schema,
         const TOlapTablePartitionParam& t_param)
             : _schema(schema), _t_param(t_param),
-            _mem_tracker(new MemTracker(-1, "OlapTablePartitionParam")),
+            _mem_tracker(MemTracker::CreateTracker(-1, "OlapTablePartitionParam")),
             _mem_pool(new MemPool(_mem_tracker.get())) {
 }
 
diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp
index 03b76d0d367cfa..12405526b4e751 100755
--- a/be/src/exprs/agg_fn_evaluator.cpp
+++ b/be/src/exprs/agg_fn_evaluator.cpp
@@ -148,7 +148,7 @@ Status AggFnEvaluator::prepare(
         MemPool* pool,
         const SlotDescriptor* intermediate_slot_desc,
         const SlotDescriptor* output_slot_desc,
-        std::shared_ptr mem_tracker,
+        const std::shared_ptr& mem_tracker,
         FunctionContext** agg_fn_ctx) {
     DCHECK(pool != NULL);
     DCHECK(intermediate_slot_desc != NULL);
diff --git a/be/src/exprs/agg_fn_evaluator.h b/be/src/exprs/agg_fn_evaluator.h
index c96c52e79df975..0f76aa763a2b9e 100755
--- a/be/src/exprs/agg_fn_evaluator.h
+++ b/be/src/exprs/agg_fn_evaluator.h
@@ -85,7 +85,7 @@ class AggFnEvaluator {
         MemPool* pool,
         const SlotDescriptor* intermediate_slot_desc,
         const SlotDescriptor* output_slot_desc,
-        std::shared_ptr mem_tracker,
+        const std::shared_ptr& mem_tracker,
         FunctionContext** agg_fn_ctx);
 
     Status open(RuntimeState* state, FunctionContext* agg_fn_ctx);
diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp
index f71c5b3e9fb9c3..ee89a332c5e205 100644
--- a/be/src/exprs/expr.cpp
+++ b/be/src/exprs/expr.cpp
@@ -539,7 +539,7 @@ Status Expr::prepare(
         const std::vector& ctxs,
         RuntimeState* state,
         const RowDescriptor& row_desc,
-        std::shared_ptr tracker) {
+        const std::shared_ptr& tracker) {
     for (int i = 0; i < ctxs.size(); ++i) {
         RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker));
     }
@@ -868,13 +868,12 @@ void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) {
   _fn_ctx_idx_end = *next_fn_ctx_idx;
 }
 
-
-Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc,
-    RuntimeState* state, ObjectPool* pool, Expr** scalar_expr,
-    std::shared_ptr tracker) {
-  *scalar_expr = nullptr;
-  Expr* root;
-  RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root));
+Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state,
+                    ObjectPool* pool, Expr** scalar_expr,
+                    const std::shared_ptr& tracker) {
+    *scalar_expr = nullptr;
+    Expr* root;
+    RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root));
   RETURN_IF_ERROR(create_tree(texpr, pool, root));
   // TODO pengyubing replace by Init()
   ExprContext* ctx = pool->add(new ExprContext(root));
@@ -893,9 +892,10 @@ Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc,
   return Status::OK();
 }
 
-Status Expr::create(const vector& texprs, const RowDescriptor& row_desc,
-    RuntimeState* state, ObjectPool* pool, vector* exprs, std::shared_ptr tracker) {
-  exprs->clear();
+Status Expr::create(const vector& texprs, const RowDescriptor& row_desc, RuntimeState* state,
+                    ObjectPool* pool, vector* exprs,
+                    const std::shared_ptr& tracker) {
+    exprs->clear();
   for (const TExpr& texpr: texprs) {
     Expr* expr;
     RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr, tracker));
@@ -906,12 +906,12 @@ Status Expr::create(const vector& texprs, const RowDescriptor& row_desc,
 }
 
 Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc,
-    RuntimeState* state, Expr** scalar_expr, std::shared_ptr tracker) {
+    RuntimeState* state, Expr** scalar_expr, const std::shared_ptr& tracker) {
   return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr, tracker);
 }
 
 Status Expr::create(const vector& texprs, const RowDescriptor& row_desc,
-    RuntimeState* state, vector* exprs, std::shared_ptr tracker) {
+    RuntimeState* state, vector* exprs, const std::shared_ptr& tracker) {
   return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs, tracker);
 }
 
diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h
index 38ea87b5b1424a..292626492ab10e 100644
--- a/be/src/exprs/expr.h
+++ b/be/src/exprs/expr.h
@@ -206,27 +206,30 @@ class Expr {
     /// is stored in ObjectPool 'pool' and returned in 'expr' on success. 'row_desc' is the
     /// tuple row descriptor of the input tuple row. On failure, 'expr' is set to NULL and
     /// the expr tree (if created) will be closed. Error status will be returned too.
-    static Status create(const TExpr& texpr, const RowDescriptor& row_desc,
-        RuntimeState* state, ObjectPool* pool, Expr** expr, std::shared_ptr tracker);
+    static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state,
+                         ObjectPool* pool, Expr** expr, const std::shared_ptr& tracker);
 
     /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr
     /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is
     /// the tuple row descriptor of the input tuple row. Returns error status on failure.
-    static Status create(const TExpr& texpr, const RowDescriptor& row_desc,
-        RuntimeState* state, Expr** expr, std::shared_ptr tracker);
+    static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state,
+                         Expr** expr, const std::shared_ptr& tracker);
 
     /// Convenience functions creating multiple ScalarExpr.
     static Status create(const std::vector& texprs, const RowDescriptor& row_desc,
-        RuntimeState* state, ObjectPool* pool, std::vector* exprs, std::shared_ptr tracker);
+                         RuntimeState* state, ObjectPool* pool, std::vector* exprs,
+                         const std::shared_ptr& tracker);
 
     /// Convenience functions creating multiple ScalarExpr.
     static Status create(const std::vector& texprs, const RowDescriptor& row_desc,
-        RuntimeState* state, std::vector* exprs, std::shared_ptr tracker);
+                         RuntimeState* state, std::vector* exprs,
+                         const std::shared_ptr& tracker);
 
     /// Convenience function for preparing multiple expr trees.
     /// Allocations from 'ctxs' will be counted against 'tracker'.
     static Status prepare(const std::vector& ctxs, RuntimeState* state,
-                          const RowDescriptor& row_desc, std::shared_ptr tracker);
+                          const RowDescriptor& row_desc,
+                          const std::shared_ptr& tracker);
 
     /// Convenience function for opening multiple expr trees.
     static Status open(const std::vector& ctxs, RuntimeState* state);
diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp
index ba7a572bd36969..68ae418bf3f8cb 100644
--- a/be/src/exprs/expr_context.cpp
+++ b/be/src/exprs/expr_context.cpp
@@ -51,7 +51,7 @@ ExprContext::~ExprContext() {
 
 // TODO(zc): memory tracker
 Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                            std::shared_ptr tracker) {
+                            const std::shared_ptr& tracker) {
     DCHECK(tracker != nullptr) << std::endl << get_stack_trace();
     DCHECK(_pool.get() == NULL);
     _prepared = true;
diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h
index 8e62b35651362f..7fe294cb2d65e9 100644
--- a/be/src/exprs/expr_context.h
+++ b/be/src/exprs/expr_context.h
@@ -52,7 +52,7 @@ class ExprContext {
     /// Prepare expr tree for evaluation.
     /// Allocations from this context will be counted against 'tracker'.
     Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
-                   std::shared_ptr tracker);
+                   const std::shared_ptr& tracker);
 
     /// Must be called after calling Prepare(). Does not need to be called on clones.
     /// Idempotent (this allows exprs to be opened multiple times in subplans without
diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc
index a969a0b0e602d6..f1a72897c7f669 100644
--- a/be/src/exprs/new_agg_fn_evaluator.cc
+++ b/be/src/exprs/new_agg_fn_evaluator.cc
@@ -89,14 +89,14 @@ typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&);
 
 const int DEFAULT_MULTI_DISTINCT_COUNT_STRING_BUFFER_SIZE = 1024;
 
-NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, std::shared_ptr tracker, bool is_clone)
-  : _total_mem_consumption(0),
-    _accumulated_mem_consumption(0), 
-    is_clone_(is_clone),
-    agg_fn_(agg_fn),
-    mem_pool_(mem_pool),
-    _mem_tracker(tracker) {
-}
+NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool,
+                                     const std::shared_ptr& tracker, bool is_clone)
+        : _total_mem_consumption(0),
+          _accumulated_mem_consumption(0),
+          is_clone_(is_clone),
+          agg_fn_(agg_fn),
+          mem_pool_(mem_pool),
+          _mem_tracker(tracker) {}
 
 NewAggFnEvaluator::~NewAggFnEvaluator() {
   if (UNLIKELY(_total_mem_consumption > 0)) {
@@ -114,11 +114,12 @@ const TypeDescriptor& NewAggFnEvaluator::intermediate_type() const {
 }
 
 Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool,
-                  MemPool* mem_pool, NewAggFnEvaluator** result,
-                  std::shared_ptr tracker, const RowDescriptor& row_desc) {
-  *result = nullptr;
+                                 MemPool* mem_pool, NewAggFnEvaluator** result,
+                                 const std::shared_ptr& tracker,
+                                 const RowDescriptor& row_desc) {
+    *result = nullptr;
 
-  // Create a new AggFn evaluator.
+    // Create a new AggFn evaluator.
   NewAggFnEvaluator* agg_fn_eval = pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, tracker, false));
   
   agg_fn_eval->agg_fn_ctx_.reset(FunctionContextImpl::create_context(state, mem_pool,
@@ -168,11 +169,13 @@ Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, Objec
 }
 
 Status NewAggFnEvaluator::Create(const vector& agg_fns, RuntimeState* state,
-    ObjectPool* pool, MemPool* mem_pool, vector* evals,
-    std::shared_ptr tracker, const RowDescriptor& row_desc) {
-  for (const AggFn* agg_fn : agg_fns) {
-    NewAggFnEvaluator* agg_fn_eval;
-    RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, 
+                                 ObjectPool* pool, MemPool* mem_pool,
+                                 vector* evals,
+                                 const std::shared_ptr& tracker,
+                                 const RowDescriptor& row_desc) {
+    for (const AggFn* agg_fn : agg_fns) {
+        NewAggFnEvaluator* agg_fn_eval;
+        RETURN_IF_ERROR(NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool,
                                            &agg_fn_eval, tracker, row_desc));
     evals->push_back(agg_fn_eval);
   }
diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h
index d3f11de166d90f..7be1c3d825c3f3 100644
--- a/be/src/exprs/new_agg_fn_evaluator.h
+++ b/be/src/exprs/new_agg_fn_evaluator.h
@@ -69,13 +69,13 @@ class NewAggFnEvaluator {
   /// from 'mem_pool'. Note that it's the responsibility to call Close() all evaluators
   /// even if this function returns error status on initialization failure.
   static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool,
-      MemPool* mem_pool, NewAggFnEvaluator** eval, std::shared_ptr tracker, 
+      MemPool* mem_pool, NewAggFnEvaluator** eval, const std::shared_ptr& tracker,
       const RowDescriptor& row_desc) WARN_UNUSED_RESULT;
 
   /// Convenience functions for creating evaluators for multiple aggregate functions.
   static Status Create(const std::vector& agg_fns, RuntimeState* state,
       ObjectPool* pool, MemPool* mem_pool, std::vector* evals,
-      std::shared_ptr tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT;
+      const std::shared_ptr& tracker, const RowDescriptor& row_desc) WARN_UNUSED_RESULT;
 
   ~NewAggFnEvaluator();
 
@@ -245,7 +245,7 @@ class NewAggFnEvaluator {
   doris_udf::AnyVal* staging_merge_input_val_ = nullptr;
 
   /// Use Create() instead.
-  NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, std::shared_ptr tracker, bool is_clone);
+  NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, const std::shared_ptr& tracker, bool is_clone);
 
   /// Return the intermediate type of the aggregate function.
   inline const SlotDescriptor& intermediate_slot_desc() const;
diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp
index e652a5b4d9cc54..bcc798ae86e8a4 100644
--- a/be/src/http/default_path_handlers.cpp
+++ b/be/src/http/default_path_handlers.cpp
@@ -72,8 +72,8 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream*
 }
 
 // Registered to handle "/memz", and prints out memory allocation statistics.
-void mem_usage_handler(std::shared_ptr mem_tracker, const WebPageHandler::ArgumentMap& args,
-                       std::stringstream* output) {
+void mem_usage_handler(const std::shared_ptr& mem_tracker,
+                       const WebPageHandler::ArgumentMap& args, std::stringstream* output) {
     if (mem_tracker != nullptr) {
         (*output) << "
"
                   << "Mem Limit: " << PrettyPrinter::print(mem_tracker->limit(), TUnit::BYTES)
@@ -100,12 +100,14 @@ void mem_usage_handler(std::shared_ptr mem_tracker, const WebPageHan
 #endif
 }
 
-void add_default_path_handlers(WebPageHandler* web_page_handler, std::shared_ptr process_mem_tracker) {
+void add_default_path_handlers(WebPageHandler* web_page_handler,
+                               const std::shared_ptr& process_mem_tracker) {
     // TODO(yingchun): logs_handler is not implemented yet, so not show it on navigate bar
     web_page_handler->register_page("/logs", "Logs", logs_handler, false /* is_on_nav_bar */);
     web_page_handler->register_page("/varz", "Configs", config_handler, true /* is_on_nav_bar */);
-    web_page_handler->register_page("/memz", "Memory",
-        boost::bind(&mem_usage_handler, process_mem_tracker, _1, _2), true /* is_on_nav_bar */);
+    web_page_handler->register_page(
+            "/memz", "Memory", boost::bind(&mem_usage_handler, process_mem_tracker, _1, _2),
+            true /* is_on_nav_bar */);
     register_thread_display_page(web_page_handler);
 }
 
diff --git a/be/src/http/default_path_handlers.h b/be/src/http/default_path_handlers.h
index af13d3e5c9666e..93c0ba6bfa5650 100644
--- a/be/src/http/default_path_handlers.h
+++ b/be/src/http/default_path_handlers.h
@@ -28,7 +28,8 @@ class WebPageHandler;
 
 // Adds a set of default path handlers to the webserver to display
 // logs and configuration flags
-void add_default_path_handlers(WebPageHandler* web_page_handler, std::shared_ptr process_mem_tracker);
-}
+void add_default_path_handlers(WebPageHandler* web_page_handler,
+                               const std::shared_ptr& process_mem_tracker);
+} // namespace doris
 
 #endif // IMPALA_UTIL_DEFAULT_PATH_HANDLERS_H
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index e75380945299d7..74c76a2bb90800 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -27,12 +27,14 @@
 
 namespace doris {
 
-OLAPStatus DeltaWriter::open(WriteRequest* req, std::shared_ptr parent, DeltaWriter** writer) {
+OLAPStatus DeltaWriter::open(WriteRequest* req, const std::shared_ptr& parent,
+                             DeltaWriter** writer) {
     *writer = new DeltaWriter(req, parent, StorageEngine::instance());
     return OLAP_SUCCESS;
 }
 
-DeltaWriter::DeltaWriter(WriteRequest* req, std::shared_ptr parent, StorageEngine* storage_engine)
+DeltaWriter::DeltaWriter(WriteRequest* req, const std::shared_ptr& parent,
+                         StorageEngine* storage_engine)
         : _req(*req),
           _tablet(nullptr),
           _cur_rowset(nullptr),
diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h
index 034ecc7f01a4b6..2c4eb79a698ea2 100644
--- a/be/src/olap/delta_writer.h
+++ b/be/src/olap/delta_writer.h
@@ -56,7 +56,8 @@ struct WriteRequest {
 // This class is NOT thread-safe, external synchronization is required.
 class DeltaWriter {
 public:
-    static OLAPStatus open(WriteRequest* req, std::shared_ptr parent, DeltaWriter** writer);
+    static OLAPStatus open(WriteRequest* req, const std::shared_ptr& parent,
+                           DeltaWriter** writer);
 
     ~DeltaWriter();
 
@@ -83,7 +84,8 @@ class DeltaWriter {
     int64_t mem_consumption() const;
 
 private:
-    DeltaWriter(WriteRequest* req, std::shared_ptr parent, StorageEngine* storage_engine);
+    DeltaWriter(WriteRequest* req, const std::shared_ptr& parent,
+                StorageEngine* storage_engine);
 
     // push a full memtable to flush executor
     OLAPStatus _flush_memtable_async();
diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index fc69ff09270a89..25a6a81912f811 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -31,7 +31,8 @@ namespace doris {
 
 MemTable::MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
                    const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-                   KeysType keys_type, RowsetWriter* rowset_writer, std::shared_ptr parent_tracker)
+                   KeysType keys_type, RowsetWriter* rowset_writer,
+                   const std::shared_ptr& parent_tracker)
         : _tablet_id(tablet_id),
           _schema(schema),
           _tablet_schema(tablet_schema),
diff --git a/be/src/olap/memtable.h b/be/src/olap/memtable.h
index 00b7009f65c395..583eefdb72467c 100644
--- a/be/src/olap/memtable.h
+++ b/be/src/olap/memtable.h
@@ -39,7 +39,8 @@ class MemTable {
 public:
     MemTable(int64_t tablet_id, Schema* schema, const TabletSchema* tablet_schema,
              const std::vector* slot_descs, TupleDescriptor* tuple_desc,
-             KeysType keys_type, RowsetWriter* rowset_writer, std::shared_ptr parent_tracker);
+             KeysType keys_type, RowsetWriter* rowset_writer,
+             const std::shared_ptr& parent_tracker);
     ~MemTable();
 
     int64_t tablet_id() const { return _tablet_id; }
diff --git a/be/src/olap/row_block.h b/be/src/olap/row_block.h
index 20e935444a535a..1292181f8075c2 100644
--- a/be/src/olap/row_block.h
+++ b/be/src/olap/row_block.h
@@ -57,7 +57,7 @@ class RowBlock {
     friend class VectorizedRowBatch;
 public:
     RowBlock(const TabletSchema* schema,
-             const std::shared_ptr& parent_tracker = std::shared_ptr());
+             const std::shared_ptr& parent_tracker = nullptr);
 
     // 注意回收内部buffer
     ~RowBlock();
diff --git a/be/src/runtime/mysql_table_sink.cpp b/be/src/runtime/mysql_table_sink.cpp
index 4adb8ba57e8c52..8377d8ad28f397 100644
--- a/be/src/runtime/mysql_table_sink.cpp
+++ b/be/src/runtime/mysql_table_sink.cpp
@@ -29,15 +29,13 @@
 namespace doris {
 
 MysqlTableSink::MysqlTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
-                               const std::vector& t_exprs) :
-        _pool(pool),
-        _row_desc(row_desc),
-        _t_output_expr(t_exprs),
-        _mem_tracker(new MemTracker(-1, "MysqlTableSink")) {
-}
+                               const std::vector& t_exprs)
+        : _pool(pool),
+          _row_desc(row_desc),
+          _t_output_expr(t_exprs),
+          _mem_tracker(MemTracker::CreateTracker(-1, "MysqlTableSink")) {}
 
-MysqlTableSink::~MysqlTableSink() {
-}
+MysqlTableSink::~MysqlTableSink() {}
 
 Status MysqlTableSink::init(const TDataSink& t_sink) {
     RETURN_IF_ERROR(DataSink::init(t_sink));
diff --git a/be/src/runtime/vectorized_row_batch.h b/be/src/runtime/vectorized_row_batch.h
index 47f5cbe553bf6b..c3a8ee2f9e46eb 100644
--- a/be/src/runtime/vectorized_row_batch.h
+++ b/be/src/runtime/vectorized_row_batch.h
@@ -73,7 +73,7 @@ class ColumnVector {
 class VectorizedRowBatch {
 public:
     VectorizedRowBatch(const TabletSchema* schema, const std::vector& cols, int capacity,
-                       const std::shared_ptr& parent_tracker = std::shared_ptr());
+                       const std::shared_ptr& parent_tracker = nullptr);
 
     ~VectorizedRowBatch() {
         for (auto vec: _col_vectors) {
diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp
index 6a750f9ca750a7..5c35e44108075e 100644
--- a/be/src/util/arrow/row_batch.cpp
+++ b/be/src/util/arrow/row_batch.cpp
@@ -363,7 +363,7 @@ class ToRowBatchConverter : public arrow::ArrayVisitor {
 
     ToRowBatchConverter(const arrow::RecordBatch& batch,
                         const RowDescriptor& row_desc,
-                        std::shared_ptr tracker)
+                        const std::shared_ptr& tracker)
         : _batch(batch), _row_desc(row_desc), _tracker(tracker) { }
 
 #define PRIMITIVE_VISIT(TYPE) \
@@ -454,7 +454,7 @@ Status ToRowBatchConverter:: convert(std::shared_ptr* result) {
 
 Status convert_to_row_batch(const arrow::RecordBatch& batch,
                             const RowDescriptor& row_desc,
-                            std::shared_ptr tracker,
+                            const std::shared_ptr& tracker,
                             std::shared_ptr* result) {
     ToRowBatchConverter converter(batch, row_desc, tracker);
     return converter.convert(result);