From e65d2a29f5b6dcf57c46eb331d5a6d4c4b00e71c Mon Sep 17 00:00:00 2001 From: toutdesuite Date: Tue, 17 Mar 2020 14:24:13 +0800 Subject: [PATCH 1/5] cherry pick #1989 to release-3.0 Signed-off-by: sre-bot --- how-to/monitor/monitor-a-cluster.md | 147 +++++++++++++++++++++++++++- how-to/scale/with-ansible.md | 56 +++++++++-- 2 files changed, 192 insertions(+), 11 deletions(-) diff --git a/how-to/monitor/monitor-a-cluster.md b/how-to/monitor/monitor-a-cluster.md index a44fe7a651fc3..8b1cdc40d7898 100644 --- a/how-to/monitor/monitor-a-cluster.md +++ b/how-to/monitor/monitor-a-cluster.md @@ -107,25 +107,110 @@ Assume that the TiDB cluster topology is as follows: #### Step 1: Download the binary package +<<<<<<< HEAD 1. Download the package. +======= +{{< copyable "shell-regular" >}} + +```bash +# Downloads the package. +wget https://download.pingcap.org/prometheus-2.8.1.linux-amd64.tar.gz +wget https://download.pingcap.org/node_exporter-0.17.0.linux-amd64.tar.gz +wget https://download.pingcap.org/grafana-6.1.6.linux-amd64.tar.gz +``` + +{{< copyable "shell-regular" >}} + +```bash +# Extracts the package. +tar -xzf prometheus-2.8.1.linux-amd64.tar.gz +tar -xzf node_exporter-0.17.0.linux-amd64.tar.gz +tar -xzf grafana-6.1.6.linux-amd64.tar.gz +``` +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) {{< copyable "shell-regular" >}} +<<<<<<< HEAD ```bash wget https://github.com/prometheus/prometheus/releases/download/v2.2.1/prometheus-2.2.1.linux-amd64.tar.gz && wget https://github.com/prometheus/node_exporter/releases/download/v0.15.2/node_exporter-0.15.2.linux-amd64.tar.gz && wget https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-4.6.3.linux-x64.tar.gz ``` +======= +{{< copyable "shell-regular" >}} + +```bash +cd node_exporter-0.17.0.linux-amd64 +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) 2. Extract the package. {{< copyable "shell-regular" >}} +<<<<<<< HEAD ```bash tar -xzf prometheus-2.2.1.linux-amd64.tar.gz && tar -xzf node_exporter-0.15.2.linux-amd64.tar.gz && tar -xzf grafana-4.6.3.linux-x64.tar.gz ``` +======= +Edit the Prometheus configuration file: + +{{< copyable "shell-regular" >}} + +```bash +cd prometheus-2.8.1.linux-amd64 && +vi prometheus.yml +``` + +```ini +... + +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default value (10s). + external_labels: + cluster: 'test-cluster' + monitor: "prometheus" + +scrape_configs: + - job_name: 'overwritten-nodes' + honor_labels: true # Do not overwrite job & instance labels. + static_configs: + - targets: + - '192.168.199.113:9100' + - '192.168.199.114:9100' + - '192.168.199.115:9100' + - '192.168.199.116:9100' + - '192.168.199.117:9100' + - '192.168.199.118:9100' + + - job_name: 'tidb' + honor_labels: true # Do not overwrite job & instance labels. + static_configs: + - targets: + - '192.168.199.113:10080' + + - job_name: 'pd' + honor_labels: true # Do not overwrite job & instance labels. + static_configs: + - targets: + - '192.168.199.113:2379' + - '192.168.199.114:2379' + - '192.168.199.115:2379' + + - job_name: 'tikv' + honor_labels: true # Do not overwrite job & instance labels. + static_configs: + - targets: + - '192.168.199.116:20180' + - '192.168.199.117:20180' + - '192.168.199.118:20180' + +... +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) #### Step 2: Start `node_exporter` on Node1, Node2, Node3, and Node4 @@ -137,7 +222,57 @@ Assume that the TiDB cluster topology is as follows: cd node_exporter-0.15.2.linux-amd64 ``` +<<<<<<< HEAD 2. Start the node_exporter service. +======= +Edit the Grafana configuration file: + +{{< copyable "shell-regular" >}} + +```ini +cd grafana-6.1.6 && +vi conf/grafana.ini + +... + +[paths] +data = ./data +logs = ./data/log +plugins = ./data/plugins +[server] +http_port = 3000 +domain = 192.168.199.113 +[database] +[session] +[analytics] +check_for_updates = true +[security] +admin_user = admin +admin_password = admin +[snapshots] +[users] +[auth.anonymous] +[auth.basic] +[auth.ldap] +[smtp] +[emails] +[log] +mode = file +[log.console] +[log.file] +level = info +format = text +[log.syslog] +[event_publisher] +[dashboards.json] +enabled = false +path = ./data/dashboards +[metrics] +[grafana_net] +url = https://grafana.net + +... +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) {{< copyable "shell-regular" >}} @@ -295,20 +430,22 @@ This section describes how to configure Grafana. - Default account: admin - Default password: admin -2. Click the Grafana logo to open the sidebar menu. + > **Note:** + > + > For the **Change Password** step, you can choose **Skip**. -3. In the sidebar menu, click **Data Source**. +2. In the Grafana sidebar menu, click **Data Source** within the **Configuration**. -4. Click **Add data source**. +3. Click **Add data source**. -5. Specify the data source information. +4. Specify the data source information. - Specify a **Name** for the data source. - For **Type**, select **Prometheus**. - For **URL**, specify the Prometheus address. - Specify other fields as needed. -6. Click **Add** to save the new data source. +5. Click **Add** to save the new data source. #### Step 2: Import a Grafana dashboard diff --git a/how-to/scale/with-ansible.md b/how-to/scale/with-ansible.md index d2e6f53b882ee..42e5c2aa996f6 100644 --- a/how-to/scale/with-ansible.md +++ b/how-to/scale/with-ansible.md @@ -215,7 +215,8 @@ For example, if you want to add a PD node (node103) with the IP address `172.16. > You cannot add the `#` character at the beginning of the line. Otherwise, the following configuration cannot take effect. 2. Add `--join="http://172.16.10.1:2379" \`. The IP address (`172.16.10.1`) can be any of the existing PD IP address in the cluster. - 3. Manually start the PD service in the newly added PD node: + + 3. Start the PD service in the newly added PD node: {{< copyable "shell-regular" >}} @@ -239,32 +240,56 @@ For example, if you want to add a PD node (node103) with the IP address `172.16. > > `pd-ctl` is a command used to check the number of PD nodes. -5. Apply a rolling update to the entire cluster: +5. Start the monitoring service: +<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash ansible-playbook rolling_update.yml +======= + ``` + ansible-playbook start.yml -l 172.16.10.103 +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` -6. Start the monitor service: + > **Note:** + > + > If you use an alias (inventory_name), use the `-l` option to specify the alias. +6. Update the cluster configuration: + +<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash ansible-playbook start.yml -l 172.16.10.103 +======= + ``` + ansible-playbook deploy.yml +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` -7. Update the Prometheus configuration and restart the cluster: +7. Restart Prometheus, and enable the monitoring of PD nodes used for increasing the capacity: +<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash ansible-playbook rolling_update_monitor.yml --tags=prometheus +======= + ``` + ansible-playbook stop.yml --tags=prometheus + ansible-playbook start.yml --tags=prometheus +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 8. Monitor the status of the entire cluster and the newly added node by opening a browser to access the monitoring platform: `http://172.16.10.3:3000`. +> **Note:** +> +> The PD Client in TiKV caches the list of PD nodes. Currently, the list is updated only if the PD leader is switched or the TiKV server is restarted to load the latest configuration. To avoid TiKV caching an outdated list, there should be at least two existing PD members in the PD cluster after increasing or decreasing the capacity of a PD node. If this condition is not met, transfer the PD leader manually to update the list of PD nodes. + ## Decrease the capacity of a TiDB node For example, if you want to remove a TiDB node (node5) with the IP address `172.16.10.5`, take the following steps: @@ -477,6 +502,10 @@ For example, if you want to remove a PD node (node2) with the IP address `172.16 ansible-playbook stop.yml -l 172.16.10.2 ``` + > **Note:** + > + > In this example, you can only stop the PD service on node2. If there are any other services deployed with the IP address `172.16.10.2`, use the `-t` option to specify the service (such as `-t tidb`). + 4. Edit the `inventory.ini` file and remove the node information: ```ini @@ -527,20 +556,35 @@ For example, if you want to remove a PD node (node2) with the IP address `172.16 | node8 | 172.16.10.8 | TiKV3 | | node9 | 172.16.10.9 | TiKV4 | -5. Perform a rolling update to the entire TiDB cluster: +5. Update the cluster configuration: +<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash ansible-playbook rolling_update.yml +======= + ``` + ansible-playbook deploy.yml +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` -6. Update the Prometheus configuration and restart the cluster: +6. Restart Prometheus, and disable the monitoring of PD nodes used for increasing the capacity: +<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash ansible-playbook rolling_update_monitor.yml --tags=prometheus +======= + ``` + ansible-playbook stop.yml --tags=prometheus + ansible-playbook start.yml --tags=prometheus +>>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 7. To monitor the status of the entire cluster, open a browser to access the monitoring platform: `http://172.16.10.3:3000`. + +> **Note:** +> +> The PD Client in TiKV caches the list of PD nodes. Currently, the list is updated only if the PD leader is switched or the TiKV server is restarted to load the latest configuration. To avoid TiKV caching an outdated list, there should be at least two existing PD members in the PD cluster after increasing or decreasing the capacity of a PD node. If this condition is not met, transfer the PD leader manually to update the list of PD nodes. From d9f3901ee8e7ad5c37ca1aa190a2f2df06ad4eda Mon Sep 17 00:00:00 2001 From: toutdesuite Date: Tue, 17 Mar 2020 16:10:05 +0800 Subject: [PATCH 2/5] resolve conflicts --- how-to/monitor/monitor-a-cluster.md | 145 +--------------------------- how-to/scale/with-ansible.md | 25 ----- 2 files changed, 5 insertions(+), 165 deletions(-) diff --git a/how-to/monitor/monitor-a-cluster.md b/how-to/monitor/monitor-a-cluster.md index 8b1cdc40d7898..5e76e74c00ff5 100644 --- a/how-to/monitor/monitor-a-cluster.md +++ b/how-to/monitor/monitor-a-cluster.md @@ -107,9 +107,8 @@ Assume that the TiDB cluster topology is as follows: #### Step 1: Download the binary package -<<<<<<< HEAD 1. Download the package. -======= + {{< copyable "shell-regular" >}} ```bash @@ -127,90 +126,6 @@ tar -xzf prometheus-2.8.1.linux-amd64.tar.gz tar -xzf node_exporter-0.17.0.linux-amd64.tar.gz tar -xzf grafana-6.1.6.linux-amd64.tar.gz ``` ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) - - {{< copyable "shell-regular" >}} - -<<<<<<< HEAD - ```bash - wget https://github.com/prometheus/prometheus/releases/download/v2.2.1/prometheus-2.2.1.linux-amd64.tar.gz && - wget https://github.com/prometheus/node_exporter/releases/download/v0.15.2/node_exporter-0.15.2.linux-amd64.tar.gz && - wget https://s3-us-west-2.amazonaws.com/grafana-releases/release/grafana-4.6.3.linux-x64.tar.gz - ``` -======= -{{< copyable "shell-regular" >}} - -```bash -cd node_exporter-0.17.0.linux-amd64 ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) - -2. Extract the package. - - {{< copyable "shell-regular" >}} - -<<<<<<< HEAD - ```bash - tar -xzf prometheus-2.2.1.linux-amd64.tar.gz && - tar -xzf node_exporter-0.15.2.linux-amd64.tar.gz && - tar -xzf grafana-4.6.3.linux-x64.tar.gz - ``` -======= -Edit the Prometheus configuration file: - -{{< copyable "shell-regular" >}} - -```bash -cd prometheus-2.8.1.linux-amd64 && -vi prometheus.yml -``` - -```ini -... - -global: - scrape_interval: 15s # By default, scrape targets every 15 seconds. - evaluation_interval: 15s # By default, scrape targets every 15 seconds. - # scrape_timeout is set to the global default value (10s). - external_labels: - cluster: 'test-cluster' - monitor: "prometheus" - -scrape_configs: - - job_name: 'overwritten-nodes' - honor_labels: true # Do not overwrite job & instance labels. - static_configs: - - targets: - - '192.168.199.113:9100' - - '192.168.199.114:9100' - - '192.168.199.115:9100' - - '192.168.199.116:9100' - - '192.168.199.117:9100' - - '192.168.199.118:9100' - - - job_name: 'tidb' - honor_labels: true # Do not overwrite job & instance labels. - static_configs: - - targets: - - '192.168.199.113:10080' - - - job_name: 'pd' - honor_labels: true # Do not overwrite job & instance labels. - static_configs: - - targets: - - '192.168.199.113:2379' - - '192.168.199.114:2379' - - '192.168.199.115:2379' - - - job_name: 'tikv' - honor_labels: true # Do not overwrite job & instance labels. - static_configs: - - targets: - - '192.168.199.116:20180' - - '192.168.199.117:20180' - - '192.168.199.118:20180' - -... ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) #### Step 2: Start `node_exporter` on Node1, Node2, Node3, and Node4 @@ -219,66 +134,16 @@ scrape_configs: {{< copyable "shell-regular" >}} ```bash - cd node_exporter-0.15.2.linux-amd64 + cd node_exporter-0.17.0.linux-amd64 ``` -<<<<<<< HEAD 2. Start the node_exporter service. -======= -Edit the Grafana configuration file: - -{{< copyable "shell-regular" >}} - -```ini -cd grafana-6.1.6 && -vi conf/grafana.ini - -... - -[paths] -data = ./data -logs = ./data/log -plugins = ./data/plugins -[server] -http_port = 3000 -domain = 192.168.199.113 -[database] -[session] -[analytics] -check_for_updates = true -[security] -admin_user = admin -admin_password = admin -[snapshots] -[users] -[auth.anonymous] -[auth.basic] -[auth.ldap] -[smtp] -[emails] -[log] -mode = file -[log.console] -[log.file] -level = info -format = text -[log.syslog] -[event_publisher] -[dashboards.json] -enabled = false -path = ./data/dashboards -[metrics] -[grafana_net] -url = https://grafana.net - -... ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) {{< copyable "shell-regular" >}} ```bash ./node_exporter --web.listen-address=":9100" \ - --log.level="info" & + --log.level="info" & ``` #### Step 3: Start Prometheus on Node1 @@ -288,7 +153,7 @@ url = https://grafana.net {{< copyable "shell-regular" >}} ```bash - cd prometheus-2.2.1.linux-amd64 && + cd prometheus-2.8.1.linux-amd64 && vi prometheus.yml ``` @@ -363,7 +228,7 @@ url = https://grafana.net {{< copyable "shell-regular" >}} ```bash - cd grafana-4.6.3 && + cd grafana-6.1.6 && vi conf/grafana.ini ``` diff --git a/how-to/scale/with-ansible.md b/how-to/scale/with-ansible.md index 42e5c2aa996f6..d93de78a5d292 100644 --- a/how-to/scale/with-ansible.md +++ b/how-to/scale/with-ansible.md @@ -242,15 +242,10 @@ For example, if you want to add a PD node (node103) with the IP address `172.16. 5. Start the monitoring service: -<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash - ansible-playbook rolling_update.yml -======= - ``` ansible-playbook start.yml -l 172.16.10.103 ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` > **Note:** @@ -259,29 +254,19 @@ For example, if you want to add a PD node (node103) with the IP address `172.16. 6. Update the cluster configuration: -<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash - ansible-playbook start.yml -l 172.16.10.103 -======= - ``` ansible-playbook deploy.yml ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 7. Restart Prometheus, and enable the monitoring of PD nodes used for increasing the capacity: -<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash - ansible-playbook rolling_update_monitor.yml --tags=prometheus -======= - ``` ansible-playbook stop.yml --tags=prometheus ansible-playbook start.yml --tags=prometheus ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 8. Monitor the status of the entire cluster and the newly added node by opening a browser to access the monitoring platform: `http://172.16.10.3:3000`. @@ -558,29 +543,19 @@ For example, if you want to remove a PD node (node2) with the IP address `172.16 5. Update the cluster configuration: -<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash - ansible-playbook rolling_update.yml -======= - ``` ansible-playbook deploy.yml ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 6. Restart Prometheus, and disable the monitoring of PD nodes used for increasing the capacity: -<<<<<<< HEAD {{< copyable "shell-regular" >}} ```bash - ansible-playbook rolling_update_monitor.yml --tags=prometheus -======= - ``` ansible-playbook stop.yml --tags=prometheus ansible-playbook start.yml --tags=prometheus ->>>>>>> fbcc983... how-to: optimize pd operation and upgrade monitor version (#1989) ``` 7. To monitor the status of the entire cluster, open a browser to access the monitoring platform: `http://172.16.10.3:3000`. From b173b5740bbd3053b4b10a536dbdda538055c132 Mon Sep 17 00:00:00 2001 From: toutdesuite Date: Tue, 17 Mar 2020 16:36:32 +0800 Subject: [PATCH 3/5] fix indent --- how-to/monitor/monitor-a-cluster.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/how-to/monitor/monitor-a-cluster.md b/how-to/monitor/monitor-a-cluster.md index 5e76e74c00ff5..454d1de5ce28f 100644 --- a/how-to/monitor/monitor-a-cluster.md +++ b/how-to/monitor/monitor-a-cluster.md @@ -143,7 +143,7 @@ tar -xzf grafana-6.1.6.linux-amd64.tar.gz ```bash ./node_exporter --web.listen-address=":9100" \ - --log.level="info" & + --log.level="info" & ``` #### Step 3: Start Prometheus on Node1 From 5ea39df8135ee591965cd287000d05a74e223c37 Mon Sep 17 00:00:00 2001 From: toutdesuite Date: Tue, 17 Mar 2020 18:09:39 +0800 Subject: [PATCH 4/5] add subtitle mistakenly deleted --- how-to/monitor/monitor-a-cluster.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/how-to/monitor/monitor-a-cluster.md b/how-to/monitor/monitor-a-cluster.md index 454d1de5ce28f..033353a7e7e1a 100644 --- a/how-to/monitor/monitor-a-cluster.md +++ b/how-to/monitor/monitor-a-cluster.md @@ -112,16 +112,16 @@ Assume that the TiDB cluster topology is as follows: {{< copyable "shell-regular" >}} ```bash -# Downloads the package. wget https://download.pingcap.org/prometheus-2.8.1.linux-amd64.tar.gz wget https://download.pingcap.org/node_exporter-0.17.0.linux-amd64.tar.gz wget https://download.pingcap.org/grafana-6.1.6.linux-amd64.tar.gz ``` +2. Extract the package. + {{< copyable "shell-regular" >}} ```bash -# Extracts the package. tar -xzf prometheus-2.8.1.linux-amd64.tar.gz tar -xzf node_exporter-0.17.0.linux-amd64.tar.gz tar -xzf grafana-6.1.6.linux-amd64.tar.gz From 02f7235186bf7f78d1b69ca8224ff4e85f7852af Mon Sep 17 00:00:00 2001 From: toutdesuite Date: Tue, 17 Mar 2020 18:17:34 +0800 Subject: [PATCH 5/5] "make indentations" --- how-to/monitor/monitor-a-cluster.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/how-to/monitor/monitor-a-cluster.md b/how-to/monitor/monitor-a-cluster.md index 033353a7e7e1a..9e82585c74b42 100644 --- a/how-to/monitor/monitor-a-cluster.md +++ b/how-to/monitor/monitor-a-cluster.md @@ -109,23 +109,23 @@ Assume that the TiDB cluster topology is as follows: 1. Download the package. -{{< copyable "shell-regular" >}} + {{< copyable "shell-regular" >}} -```bash -wget https://download.pingcap.org/prometheus-2.8.1.linux-amd64.tar.gz -wget https://download.pingcap.org/node_exporter-0.17.0.linux-amd64.tar.gz -wget https://download.pingcap.org/grafana-6.1.6.linux-amd64.tar.gz -``` + ```bash + wget https://download.pingcap.org/prometheus-2.8.1.linux-amd64.tar.gz + wget https://download.pingcap.org/node_exporter-0.17.0.linux-amd64.tar.gz + wget https://download.pingcap.org/grafana-6.1.6.linux-amd64.tar.gz + ``` 2. Extract the package. -{{< copyable "shell-regular" >}} + {{< copyable "shell-regular" >}} -```bash -tar -xzf prometheus-2.8.1.linux-amd64.tar.gz -tar -xzf node_exporter-0.17.0.linux-amd64.tar.gz -tar -xzf grafana-6.1.6.linux-amd64.tar.gz -``` + ```bash + tar -xzf prometheus-2.8.1.linux-amd64.tar.gz + tar -xzf node_exporter-0.17.0.linux-amd64.tar.gz + tar -xzf grafana-6.1.6.linux-amd64.tar.gz + ``` #### Step 2: Start `node_exporter` on Node1, Node2, Node3, and Node4