From c3e9bcd82188eaf5aa69adcd573197631353f0f3 Mon Sep 17 00:00:00 2001
From: Xinyi Zou <zouxinyi02@gmail.com>
Date: Wed, 29 Mar 2023 17:52:30 +0800
Subject: [PATCH 1/3] 1

---
 be/src/common/config.h                                |  4 ----
 be/src/common/daemon.cpp                              | 11 -----------
 be/src/runtime/memory/mem_tracker_limiter.cpp         |  5 +----
 docs/en/community/developer-guide/be-vscode-dev.md    |  2 ++
 docs/zh-CN/community/developer-guide/be-vscode-dev.md |  2 ++
 5 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index 7e3a5d9431c4ff..42baf9592c4449 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -77,10 +77,6 @@ CONF_Int64(max_sys_mem_available_low_water_mark_bytes, "1717986918");
 // The size of the memory that gc wants to release each time, as a percentage of the mem limit.
 CONF_mString(process_minor_gc_size, "10%");
 CONF_mString(process_full_gc_size, "20%");
-// Some caches have their own gc threads, such as segment cache.
-// For caches that do not have a separate gc thread, perform regular gc in the memory maintenance thread.
-// Currently only storage page cache, chunk allocator, more in the future.
-CONF_mInt32(cache_gc_interval_s, "60");
 
 // If true, when the process does not exceed the soft mem limit, the query memory will not be limited;
 // when the process memory exceeds the soft mem limit, the query with the largest ratio between the currently
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index c388fac9aafa05..176456f6369015 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -205,7 +205,6 @@ void Daemon::memory_maintenance_thread() {
 
 void Daemon::memory_gc_thread() {
     int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
-    int32_t cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
     int32_t memory_minor_gc_sleep_time_ms = 0;
     int32_t memory_full_gc_sleep_time_ms = 0;
     int64_t cache_gc_freed_mem = 0;
@@ -221,7 +220,6 @@ void Daemon::memory_gc_thread() {
             // No longer full gc and minor gc during sleep.
             memory_full_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
             memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
-            cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
             doris::MemTrackerLimiter::print_log_process_usage("process full gc", false);
             if (doris::MemInfo::process_full_gc()) {
                 // If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
@@ -234,7 +232,6 @@ void Daemon::memory_gc_thread() {
                             doris::MemInfo::soft_mem_limit())) {
             // No minor gc during sleep, but full gc is possible.
             memory_minor_gc_sleep_time_ms = config::memory_gc_sleep_time_s * 1000;
-            cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
             doris::MemTrackerLimiter::print_log_process_usage("process minor gc", false);
             if (doris::MemInfo::process_minor_gc()) {
                 doris::MemTrackerLimiter::enable_print_log_process_usage();
@@ -246,14 +243,6 @@ void Daemon::memory_gc_thread() {
             if (memory_minor_gc_sleep_time_ms > 0) {
                 memory_minor_gc_sleep_time_ms -= interval_milliseconds;
             }
-            cache_gc_interval_ms -= interval_milliseconds;
-            if (cache_gc_interval_ms < 0) {
-                cache_gc_freed_mem = 0;
-                doris::MemInfo::process_cache_gc(cache_gc_freed_mem);
-                LOG(INFO) << fmt::format("Process regular GC Cache, Free Memory {} Bytes",
-                                         cache_gc_freed_mem);
-                cache_gc_interval_ms = config::cache_gc_interval_s * 1000;
-            }
         }
     }
 }
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 9015485b8e2a81..f817da9d782df7 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -305,9 +305,6 @@ int64_t MemTrackerLimiter::free_top_memory_query(int64_t min_free_mem,
         std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
         for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
             if (tracker->type() == type) {
-                if (tracker->consumption() <= 104857600) { // 100M small query does not cancel
-                    continue;
-                }
                 if (ExecEnv::GetInstance()->fragment_mgr()->query_is_canceled(
                             label_to_queryid(tracker->label()))) {
                     continue;
@@ -351,7 +348,7 @@ int64_t MemTrackerLimiter::free_top_overcommit_query(int64_t min_free_mem,
         std::lock_guard<std::mutex> l(mem_tracker_limiter_pool[i].group_lock);
         for (auto tracker : mem_tracker_limiter_pool[i].trackers) {
             if (tracker->type() == type) {
-                if (tracker->consumption() <= 104857600) { // 100M small query does not cancel
+                if (tracker->consumption() <= 33554432) { // 32M small query does not cancel
                     continue;
                 }
                 if (ExecEnv::GetInstance()->fragment_mgr()->query_is_canceled(
diff --git a/docs/en/community/developer-guide/be-vscode-dev.md b/docs/en/community/developer-guide/be-vscode-dev.md
index 6552d5c8d78800..db02f6df3e2661 100644
--- a/docs/en/community/developer-guide/be-vscode-dev.md
+++ b/docs/en/community/developer-guide/be-vscode-dev.md
@@ -207,6 +207,8 @@ In the configuration **"request": "attach", "processId": PID**, these two config
 ps -ef | grep palo*
 ```
 
+Or write **"processId": "${command:pickProcess}"** to specify the pid when starting attach.
+
 As shown in the figure:
 
 ![](/images/image-20210618095240216.png)
diff --git a/docs/zh-CN/community/developer-guide/be-vscode-dev.md b/docs/zh-CN/community/developer-guide/be-vscode-dev.md
index 4bcaf5d40730c9..338413dc388e5c 100644
--- a/docs/zh-CN/community/developer-guide/be-vscode-dev.md
+++ b/docs/zh-CN/community/developer-guide/be-vscode-dev.md
@@ -206,6 +206,8 @@ mkdir -p /soft/be/storage
 ps -ef | grep palo*
 ```
 
+或者写作 **"processId": "${command:pickProcess}"**，可在启动attach时指定pid.
+
 如图：
 
 ![](/images/image-20210618095240216.png)

From 014c695fa0e0f6941dde209710749e3381eef9d4 Mon Sep 17 00:00:00 2001
From: Xinyi Zou <zouxinyi02@gmail.com>
Date: Wed, 29 Mar 2023 18:21:14 +0800
Subject: [PATCH 2/3] 2

---
 bin/start_be.sh                               |  2 +-
 .../community/developer-guide/debug-tool.md   | 31 ++++++++++---------
 .../community/developer-guide/debug-tool.md   | 11 ++++---
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/bin/start_be.sh b/bin/start_be.sh
index 199990a4b2a585..7204d6511441e4 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -236,7 +236,7 @@ if [[ -f "${DORIS_HOME}/conf/hdfs-site.xml" ]]; then
 fi
 
 # see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
-export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof:true,prof_prefix:jeprof.out"
+export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
 
 if [[ "${RUN_DAEMON}" -eq 1 ]]; then
     nohup ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/doris_be" "$@" >>"${LOG_DIR}/be.out" 2>&1 </dev/null &
diff --git a/docs/en/community/developer-guide/debug-tool.md b/docs/en/community/developer-guide/debug-tool.md
index 96c705b4543136..9d14782ab51aa4 100644
--- a/docs/en/community/developer-guide/debug-tool.md
+++ b/docs/en/community/developer-guide/debug-tool.md
@@ -238,7 +238,7 @@ From the above output, we can see that 1024 bytes have been leaked, and the stac
 #### JEMALLOC HEAP PROFILE
 
 ##### 1. runtime heap dump by http
-No need to restart BE, use jemalloc heap dump http interface, jemalloc generates heap dump file on the corresponding BE machine according to the current memory usage.
+Add `,prof:true,lg_prof_sample:10` to `JEMALLOC_CONF` in `start_be.sh` and restart BE, then use the jemalloc heap dump http interface to generate a heap dump file on the corresponding BE machine.
 
 The directory where the heap dump file is located can be configured through the ``jeprofile_dir`` variable in ``be.conf``, and the default is ``${DORIS_HOME}/log``
 
@@ -246,21 +246,12 @@ The directory where the heap dump file is located can be configured through the
 curl http://be_host:be_webport/jeheap/dump
 ```
 
-##### 2. heap dump by JEMALLOC_CONF
-Perform heap dump by restarting BE after changing the `JEMALLOC_CONF` variable in `start_be.sh`
-
-1. Dump every 1MB:
-
-   Two new variable settings `prof:true,lg_prof_interval:20` have been added to the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and `lg_prof_interval:20` means that a dump is generated every 1MB (2^20)
-2. Dump each time a new high is reached:
-
-   Added two variable settings `prof:true,prof_gdump:true` in the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and `prof_gdump:true` means to generate a dump when the memory usage reaches a new high
-3. Memory leak dump when the program exits:
-
-   Added three new variable settings `prof_leak: true, lg_prof_sample: 0, prof_final: true` in the `JEMALLOC_CONF` variable
+`prof`: After opening, jemalloc will generate a heap dump file according to the current memory usage. There is a small amount of performance loss in heap profile sampling, which can be turned off during performance testing.
+`lg_prof_sample`: heap profile sampling interval, the default value is 19, that is, the default sampling interval is 512K (2^19 B), which will result in only 10% of the memory recorded by the heap profile, `lg_prof_sample:10` can reduce the sampling interval to 1K (2^10 B), more frequent sampling will make the heap profile close to real memory, but this will bring greater performance loss.
 
+For detailed parameter description, refer to https://linux.die.net/man/3/jemalloc.
 
-#### 3. jemalloc heap dump profiling
+#### 2. jemalloc heap dump profiling
 
 3.1 Generating plain text analysis results
 ```shell
@@ -287,6 +278,18 @@ jeprof lib/doris_be --base=heap_dump_file_1 heap_dump_file_2
 
 In the above jeprof related commands, remove the `--base` option to analyze only a single heap dump file
 
+##### 3. heap dump by JEMALLOC_CONF
+Perform heap dump by restarting BE after changing the `JEMALLOC_CONF` variable in `start_be.sh`
+
+1. Dump every 1MB:
+
+   Two new variable settings `prof:true,lg_prof_interval:20` have been added to the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and `lg_prof_interval:20` means that a dump is generated every 1MB (2^20)
+2. Dump each time a new high is reached:
+
+   Added two variable settings `prof:true,prof_gdump:true` in the `JEMALLOC_CONF` variable, where `prof:true` is to enable profiling, and `prof_gdump:true` means to generate a dump when the memory usage reaches a new high
+3. Memory leak dump when the program exits:
+
+   Added three new variable settings `prof_leak: true, lg_prof_sample: 0, prof_final: true` in the `JEMALLOC_CONF` variable
 
 #### ASAN
 
diff --git a/docs/zh-CN/community/developer-guide/debug-tool.md b/docs/zh-CN/community/developer-guide/debug-tool.md
index a1b4e24947aaca..29278d17576500 100644
--- a/docs/zh-CN/community/developer-guide/debug-tool.md
+++ b/docs/zh-CN/community/developer-guide/debug-tool.md
@@ -202,7 +202,7 @@ Total: 1296.4 MB
 #### JEMALLOC HEAP PROFILE
 
 ##### 1. runtime heap dump by http 
-无需重启BE, 使用jemalloc heap dump http接口，jemalloc根据当前内存使用情况，在对应的BE机器上生成heap dump文件。
+在`start_be.sh` 中`JEMALLOC_CONF` 增加 `,prof:true,lg_prof_sample:10` 并重启BE，然后使用jemalloc heap dump http接口，在对应的BE机器上生成heap dump文件。
 
 heap dump文件所在目录可以在 ``be.conf`` 中通过``jeprofile_dir``变量进行配置，默认为``${DORIS_HOME}/log``
 
@@ -210,6 +210,11 @@ heap dump文件所在目录可以在 ``be.conf`` 中通过``jeprofile_dir``变
 curl http://be_host:be_webport/jeheap/dump
 ```
 
+`prof`: 打开后jemalloc将根据当前内存使用情况生成heap dump文件，heap profile采样存在少量性能损耗，性能测试时可关闭。
+`lg_prof_sample`: heap profile采样间隔，默认值19，即默认采样间隔为512K(2^19 B)，这会导致heap profile记录的内存通常只有10%，`lg_prof_sample:10`可以减少采样间隔到1K (2^10 B), 更频繁的采样会使heap profile接近真实内存，但这会带来更大的性能损耗。
+
+详细参数说明参考 https://linux.die.net/man/3/jemalloc。
+
 #### 2. jemalloc heap dump profiling
 
 1.  单个heap dump文件生成纯文本分析结果
@@ -240,10 +245,6 @@ curl http://be_host:be_webport/jeheap/dump
    jeprof --pdf lib/doris_be --base=heap_dump_file_1 heap_dump_file_2 > result.pdf
    ```
 
-默认heap profile采样间隔512K(2^19 B)，这会导致heap profile记录的内存通常只有10%，通过在`start_be.sh`中`JEMALLOC_CONF`增加`,lg_prof_sample:10`然后重启BE，可以减少采样间隔到1K (2^10 B), 更频繁的采样会使heap profile接近真实内存，但这会带来更大的性能损耗。详细参考 https://linux.die.net/man/3/jemalloc。
-
-如果在做性能测试，尝试删掉`JEMALLOC_CONF`中的`,prof:true`，避免heap profile采样的性能损耗。
-
 ##### 3. heap dump by JEMALLOC_CONF
 通过更改`start_be.sh` 中`JEMALLOC_CONF` 变量后重新启动BE 来进行heap dump
 

From 63d8f1d75fc4517df45c82b89da2d43f8387d756 Mon Sep 17 00:00:00 2001
From: Xinyi Zou <zouxinyi02@gmail.com>
Date: Wed, 29 Mar 2023 18:28:08 +0800
Subject: [PATCH 3/3] 3

---
 be/src/common/daemon.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 176456f6369015..9b7ee5dddc5a18 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -207,7 +207,6 @@ void Daemon::memory_gc_thread() {
     int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
     int32_t memory_minor_gc_sleep_time_ms = 0;
     int32_t memory_full_gc_sleep_time_ms = 0;
-    int64_t cache_gc_freed_mem = 0;
     while (!_stop_background_threads_latch.wait_for(
             std::chrono::milliseconds(interval_milliseconds))) {
         if (!MemInfo::initialized() || !ExecEnv::GetInstance()->initialized()) {