diff --git a/dev/reference/tools/download.md b/dev/reference/tools/download.md index 0ee5348a32e1f..bd312c09ac8ab 100644 --- a/dev/reference/tools/download.md +++ b/dev/reference/tools/download.md @@ -25,7 +25,7 @@ If you want to download the latest version of [TiDB Lightning](/dev/reference/to | Package name | OS | Architecture | SHA256 checksum | |:---|:---|:---|:---| -| [tidb-toolkit-latest-linux-amd64.tar.gz](http://download.pingcap.org/tidb-toolkit-latest-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-latest-linux-amd64.sha256](http://download.pingcap.org/tidb-toolkit-latest-linux-amd64.sha256) | +| [tidb-toolkit-latest-linux-amd64.tar.gz](https://download.pingcap.org/tidb-toolkit-latest-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-latest-linux-amd64.sha256](https://download.pingcap.org/tidb-toolkit-latest-linux-amd64.sha256) | ## DM (Data Migration) diff --git a/dev/reference/tools/tidb-lightning/config.md b/dev/reference/tools/tidb-lightning/config.md index f33a63421e34b..802810ddb4579 100644 --- a/dev/reference/tools/tidb-lightning/config.md +++ b/dev/reference/tools/tidb-lightning/config.md @@ -89,8 +89,15 @@ driver = "file" #keep-after-success = false [tikv-importer] -# The listening address of tikv-importer. Change it to the actual address. +# Delivery back end, can be "importer" or "tidb". +# backend = "importer" +# The listening address of tikv-importer when back end is "importer". Change it to the actual address. addr = "172.16.31.10:8287" +# Action to do when trying to insert a duplicated entry in the "tidb" back end. +# - replace: new entry replaces existing entry +# - ignore: keep existing entry, ignore new entry +# - error: report error and quit the program +#on-duplicate = "replace" [mydumper] # Block size for file reading. Keep it longer than the longest string of @@ -288,6 +295,7 @@ min-available-ratio = 0.05 | -V | Prints program version | | | -d *directory* | Directory of the data dump to read from | `mydumper.data-source-dir` | | -L *level* | Log level: debug, info, warn, error, fatal (default = info) | `lightning.log-level` | +| --backend *backend* | [Delivery back end](/dev/reference/tools/tidb-lightning/tidb-backend.md) (`importer` or `tidb`) | `tikv-importer.backend` | | --log-file *file* | Log file path | `lightning.log-file` | | --status-addr *ip:port* | Listening address of the TiDB Lightning server | `lightning.status-port` | | --importer *host:port* | Address of TiKV Importer | `tikv-importer.addr` | diff --git a/dev/reference/tools/tidb-lightning/deployment.md b/dev/reference/tools/tidb-lightning/deployment.md index 8fc00aa56ebfd..f6c86a56bb832 100644 --- a/dev/reference/tools/tidb-lightning/deployment.md +++ b/dev/reference/tools/tidb-lightning/deployment.md @@ -6,7 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning on separate deployment and mixed deployment, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. + +If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/dev/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes diff --git a/dev/reference/tools/tidb-lightning/overview.md b/dev/reference/tools/tidb-lightning/overview.md index 8e643d3d9aac5..0dcd1ea8be426 100644 --- a/dev/reference/tools/tidb-lightning/overview.md +++ b/dev/reference/tools/tidb-lightning/overview.md @@ -40,3 +40,5 @@ The complete import process is as follows: The auto-increment ID of a table is computed by the estimated *upper bound* of the number of rows, which is proportional to the total file size of the data files of the table. Therefore, the final auto-increment ID is often much larger than the actual number of rows. This is expected since in TiDB auto-increment is [not necessarily allocated sequentially](/dev/reference/mysql-compatibility.md#auto-increment-id). 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. + +TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/dev/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/dev/reference/tools/tidb-lightning/table-filter.md b/dev/reference/tools/tidb-lightning/table-filter.md index 059152d490ce1..11dcc6c3080d3 100644 --- a/dev/reference/tools/tidb-lightning/table-filter.md +++ b/dev/reference/tools/tidb-lightning/table-filter.md @@ -26,6 +26,10 @@ ignore-dbs = ["pattern4", "pattern5"] The pattern can either be a simple name, or a regular expression in [Go dialect](https://golang.org/pkg/regexp/syntax/#hdr-syntax) if it starts with a `~` character. +> **Note:** +> +> The system databases `INFORMATION_SCHEMA`, `PERFORMANCE_SCHEMA`, `mysql` and `sys` are always black-listed regardless of the table filter settings. + ## Filtering tables ```toml diff --git a/dev/reference/tools/tidb-lightning/tidb-backend.md b/dev/reference/tools/tidb-lightning/tidb-backend.md new file mode 100644 index 0000000000000..a7c8e3dbf6ba4 --- /dev/null +++ b/dev/reference/tools/tidb-lightning/tidb-backend.md @@ -0,0 +1,223 @@ +--- +title: TiDB Lightning "TiDB" Back End +summary: Choose how to write data into the TiDB cluster. +category: reference +--- + +# TiDB Lightning "TiDB" Back End + +TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. + +The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. + +The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. + +| Back end | "Importer" | "TiDB" | +|:---|:---|:---| +| Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | +| Resource usage | High | Low | +| ACID respected while importing | No | Yes | +| Target tables | Must be empty | Can be populated | + +## Deployment for "TiDB" back end + +When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/dev/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: + +* Steps involving `tikv-importer` can all be skipped. +* The configuration must be changed to indicate the "TiDB" back end is used. + +### Ansible deployment + +1. The `[importer_server]` section in `inventory.ini` can be left blank. + + ```ini + ... + + [importer_server] + # keep empty + + [lightning_server] + 192.168.20.10 + + ... + ``` + +2. The `tikv_importer_port` setting in `group_vars/all.yml` is ignored, and the file `group_vars/importer_server.yml` does not need to be changed. But you need to edit `conf/tidb-lightning.yml` and change the `backend` setting to `tidb`. + + ```yaml + ... + tikv_importer: + backend: "tidb" # <-- change this + ... + ``` + +3. Bootstrap and deploy the cluster as usual. + +4. Mount the data source for TiDB Lightning as usual. + +5. Start `tidb-lightning` as usual. + +### Manual deployment + +You do not need to download and configure `tikv-importer`. + +Before running `tidb-lightning`, add the following lines into the configuration file: + +```toml +[tikv-importer] +backend = "tidb" +``` + +or supplying the `--backend tidb` arguments when executing `tidb-lightning`. + +## Conflict resolution + +The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. + +```toml +[tikv-importer] +backend = "tidb" +on-duplicate = "replace" # or "error" or "ignore" +``` + +| Setting | Behavior on conflict | Equivalent SQL statement | +|:---|:---|:---| +| replace | New entries replace old ones | `REPLACE INTO ...` | +| ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | +| error | Abort import | `INSERT INTO ...` | + +## Migrating from Loader to TiDB Lightning "TiDB" back end + +TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/dev/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/dev/reference/tools/tidb-lightning/config.md). + + + + + + + + + + + +
LoaderTiDB Lightning
+ +```toml + +# logging +log-level = "info" +log-file = "loader.log" + +# Prometheus +status-addr = ":8272" + +# concurrency +pool-size = 16 +``` + + + +```toml +[lightning] +# logging +level = "info" +file = "tidb-lightning.log" + +# Prometheus +pprof-port = 8289 + +# concurrency (better left as default) +#region-concurrency = 16 +``` + +
+ +```toml + +# checkpoint database + +checkpoint-schema = "tidb_loader" + + + + + + +``` + + + +```toml +[checkpoint] +# checkpoint storage +enable = true +schema = "tidb_lightning_checkpoint" +# by default the checkpoint is stored in +# a local file, which is more efficient. +# but you could still choose to store the +# checkpoints in the target database with +# this setting: +#driver = "mysql" +``` + +
+ +```toml + + + +``` + + + +```toml +[tikv-importer] +# use the "TiDB" back end +backend = "tidb" +``` + +
+ +```toml + +# data source directory +dir = "/data/export/" +``` + + + +```toml +[mydumper] +# data source directory +data-source-dir = "/data/export" +``` + +
+ +```toml +[db] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 + +user = "root" +password = "" + +#sql-mode = "" +``` + + + +```toml +[tidb] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 +status-port = 10080 # <- this is required +user = "root" +password = "" + +#sql-mode = "" +``` + +
diff --git a/v2.1/reference/tools/download.md b/v2.1/reference/tools/download.md index fb453b75b313c..7fbd274f38e56 100644 --- a/v2.1/reference/tools/download.md +++ b/v2.1/reference/tools/download.md @@ -16,7 +16,7 @@ In addition, the Kafka version of TiDB Binlog is also provided. | Package name | OS | Architecture | SHA256 checksum | |:---|:---|:---|:---| -| [tidb-v2.1.16-linux-amd64.tar.gz](http://download.pingcap.org/tidb-v2.1.16-linux-amd64.tar.gz) (TiDB Binlog, TiDB Lightning) | Linux | amd64 |[tidb-v2.1.16-linux-amd64.sha256](http://download.pingcap.org/tidb-v2.1.16-linux-amd64.sha256)| +| [tidb-v2.1.17-linux-amd64.tar.gz](https://download.pingcap.org/tidb-v2.1.17-linux-amd64.tar.gz) (TiDB Binlog, TiDB Lightning) | Linux | amd64 |[tidb-v2.1.17-linux-amd64.sha256](https://download.pingcap.org/tidb-v2.1.17-linux-amd64.sha256)| | [tidb-binlog-kafka-linux-amd64.tar.gz](http://download.pingcap.org/tidb-binlog-kafka-linux-amd64.tar.gz) (the Kafka version of TiDB Binlog) | Linux | amd64 |[tidb-binlog-kafka-linux-amd64.sha256](http://download.pingcap.org/tidb-binlog-kafka-linux-amd64.sha256)| ## DM (Data Migration) diff --git a/v3.0/reference/tools/download.md b/v3.0/reference/tools/download.md index e0e74c1776dc0..064312100b792 100644 --- a/v3.0/reference/tools/download.md +++ b/v3.0/reference/tools/download.md @@ -26,7 +26,7 @@ If you want to download the 3.0 version of [TiDB Lightning](/v3.0/reference/tool | Package name | OS | Architecture | SHA256 checksum | |:---|:---|:---|:---| -| [tidb-toolkit-v3.0.3-linux-amd64.tar.gz](http://download.pingcap.org/tidb-toolkit-v3.0.3-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-v3.0.3-linux-amd64.sha256](http://download.pingcap.org/tidb-toolkit-v3.0.3-linux-amd64.sha256) | +| [tidb-toolkit-v3.0.5-linux-amd64.tar.gz](https://download.pingcap.org/tidb-toolkit-v3.0.5-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-v3.0.5-linux-amd64.sha256](https://download.pingcap.org/tidb-toolkit-v3.0.5-linux-amd64.sha256) | ## DM (Data Migration) diff --git a/v3.0/reference/tools/tidb-lightning/deployment.md b/v3.0/reference/tools/tidb-lightning/deployment.md index 2e9da3fd10e14..7409f35a4d95a 100644 --- a/v3.0/reference/tools/tidb-lightning/deployment.md +++ b/v3.0/reference/tools/tidb-lightning/deployment.md @@ -6,7 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning on separate deployment and mixed deployment, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. + +If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes @@ -343,8 +345,15 @@ Follow the link to download the TiDB Lightning package (choose the same version # keep-after-success = false [tikv-importer] - # The listening address of tikv-importer. Change it to the actual address. + # Delivery back end, can be "importer" or "tidb". + # backend = "importer" + # The listening address of tikv-importer when back end is "importer". Change it to the actual address. addr = "172.16.31.10:8287" + # Action to do when trying to insert a duplicated entry in the "tidb" back end. + # - replace: new entry replaces existing entry + # - ignore: keep existing entry, ignore new entry + # - error: report error and quit the program + # on-duplicate = "replace" [mydumper] # Block size for file reading. Keep it longer than the longest string of diff --git a/v3.0/reference/tools/tidb-lightning/overview.md b/v3.0/reference/tools/tidb-lightning/overview.md index 372d220ee4001..5bdbfdf82a518 100644 --- a/v3.0/reference/tools/tidb-lightning/overview.md +++ b/v3.0/reference/tools/tidb-lightning/overview.md @@ -36,6 +36,10 @@ The complete import process is as follows: There are two kinds of engine files: *data engines* and *index engines*, each corresponding to two kinds of KV pairs: the row data and secondary indices. Normally, the row data are entirely sorted in the data source, while the secondary indices are out of order. Because of this, the data engines are uploaded as soon as a batch is completed, while the index engines are imported only after all batches of the entire table are encoded. -6. After all engines associated to a table are imported, `tidb-lightning` performs a checksum comparison between the local data source and those calculated from the cluster, to ensure there is no data corruption in the process, and tells TiDB to `ANALYZE` all imported tables, to prepare for optimal query planning. +6. After all engines associated to a table are imported, `tidb-lightning` performs a checksum comparison between the local data source and those calculated from the cluster, to ensure there is no data corruption in the process; tells TiDB to `ANALYZE` all imported tables, to prepare for optimal query planning; and adjusts the `AUTO_INCREMENT` value so future insertions will not cause conflict. + + The auto-increment ID of a table is computed by the estimated *upper bound* of the number of rows, which is proportional to the total file size of the data files of the table. Therefore, the final auto-increment ID is often much larger than the actual number of rows. This is expected since in TiDB auto-increment is [not necessarily allocated sequentially](/v3.0/reference/mysql-compatibility.md#auto-increment-id). 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. + +TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/v3.0/reference/tools/tidb-lightning/table-filter.md b/v3.0/reference/tools/tidb-lightning/table-filter.md index 0304596fe16a6..17dc4aff465ee 100644 --- a/v3.0/reference/tools/tidb-lightning/table-filter.md +++ b/v3.0/reference/tools/tidb-lightning/table-filter.md @@ -27,6 +27,10 @@ ignore-dbs = ["pattern4", "pattern5"] The pattern can either be a simple name, or a regular expression in [Go dialect](https://golang.org/pkg/regexp/syntax/#hdr-syntax) if it starts with a `~` character. +> **Note:** +> +> The system databases `INFORMATION_SCHEMA`, `PERFORMANCE_SCHEMA`, `mysql` and `sys` are always black-listed regardless of the table filter settings. + ## Filtering tables ```toml diff --git a/v3.0/reference/tools/tidb-lightning/tidb-backend.md b/v3.0/reference/tools/tidb-lightning/tidb-backend.md new file mode 100644 index 0000000000000..9242ec6653c7a --- /dev/null +++ b/v3.0/reference/tools/tidb-lightning/tidb-backend.md @@ -0,0 +1,223 @@ +--- +title: TiDB Lightning "TiDB" Back End +summary: Choose how to write data into the TiDB cluster. +category: reference +--- + +# TiDB Lightning "TiDB" Back End + +TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. + +The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. + +The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. + +| Back end | "Importer" | "TiDB" | +|:---|:---|:---| +| Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | +| Resource usage | High | Low | +| ACID respected while importing | No | Yes | +| Target tables | Must be empty | Can be populated | + +## Deployment for "TiDB" back end + +When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.0/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: + +* Steps involving `tikv-importer` can all be skipped. +* The configuration must be changed to indicate the "TiDB" back end is used. + +### Ansible deployment + +1. The `[importer_server]` section in `inventory.ini` can be left blank. + + ```ini + ... + + [importer_server] + # keep empty + + [lightning_server] + 192.168.20.10 + + ... + ``` + +2. The `tikv_importer_port` setting in `group_vars/all.yml` is ignored, and the file `group_vars/importer_server.yml` does not need to be changed. But you need to edit `conf/tidb-lightning.yml` and change the `backend` setting to `tidb`. + + ```yaml + ... + tikv_importer: + backend: "tidb" # <-- change this + ... + ``` + +3. Bootstrap and deploy the cluster as usual. + +4. Mount the data source for TiDB Lightning as usual. + +5. Start `tidb-lightning` as usual. + +### Manual deployment + +You do not need to download and configure `tikv-importer`. + +Before running `tidb-lightning`, add the following lines into the configuration file: + +```toml +[tikv-importer] +backend = "tidb" +``` + +or supplying the `--backend tidb` arguments when executing `tidb-lightning`. + +## Conflict resolution + +The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. + +```toml +[tikv-importer] +backend = "tidb" +on-duplicate = "replace" # or "error" or "ignore" +``` + +| Setting | Behavior on conflict | Equivalent SQL statement | +|:---|:---|:---| +| replace | New entries replace old ones | `REPLACE INTO ...` | +| ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | +| error | Abort import | `INSERT INTO ...` | + +## Migrating from Loader to TiDB Lightning "TiDB" back end + +TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/v3.0/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.0/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning). + + + + + + + + + + + +
LoaderTiDB Lightning
+ +```toml + +# logging +log-level = "info" +log-file = "loader.log" + +# Prometheus +status-addr = ":8272" + +# concurrency +pool-size = 16 +``` + + + +```toml +[lightning] +# logging +level = "info" +file = "tidb-lightning.log" + +# Prometheus +pprof-port = 8289 + +# concurrency (better left as default) +#region-concurrency = 16 +``` + +
+ +```toml + +# checkpoint database + +checkpoint-schema = "tidb_loader" + + + + + + +``` + + + +```toml +[checkpoint] +# checkpoint storage +enable = true +schema = "tidb_lightning_checkpoint" +# by default the checkpoint is stored in +# a local file, which is more efficient. +# but you could still choose to store the +# checkpoints in the target database with +# this setting: +#driver = "mysql" +``` + +
+ +```toml + + + +``` + + + +```toml +[tikv-importer] +# use the "TiDB" back end +backend = "tidb" +``` + +
+ +```toml + +# data source directory +dir = "/data/export/" +``` + + + +```toml +[mydumper] +# data source directory +data-source-dir = "/data/export" +``` + +
+ +```toml +[db] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 + +user = "root" +password = "" + +#sql-mode = "" +``` + + + +```toml +[tidb] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 +status-port = 10080 # <- this is required +user = "root" +password = "" + +#sql-mode = "" +``` + +
diff --git a/v3.1/reference/tools/download.md b/v3.1/reference/tools/download.md index 2843a676766a1..4b4bff4dd5321 100644 --- a/v3.1/reference/tools/download.md +++ b/v3.1/reference/tools/download.md @@ -25,7 +25,7 @@ If you want to download the 3.0 version of [TiDB Lightning](/v3.1/reference/tool | Package name | OS | Architecture | SHA256 checksum | |:---|:---|:---|:---| -| [tidb-toolkit-v3.0.3-linux-amd64.tar.gz](http://download.pingcap.org/tidb-toolkit-v3.0.3-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-v3.0.3-linux-amd64.sha256](http://download.pingcap.org/tidb-toolkit-v3.0.3-linux-amd64.sha256) | +| [tidb-toolkit-v3.0.5-linux-amd64.tar.gz](https://download.pingcap.org/tidb-toolkit-v3.0.5-linux-amd64.tar.gz) | Linux | amd64 | [tidb-toolkit-v3.0.5-linux-amd64.sha256](https://download.pingcap.org/tidb-toolkit-v3.0.5-linux-amd64.sha256) | ## DM (Data Migration) diff --git a/v3.1/reference/tools/tidb-lightning/deployment.md b/v3.1/reference/tools/tidb-lightning/deployment.md index 68b7a9492cc8e..539614a8014de 100644 --- a/v3.1/reference/tools/tidb-lightning/deployment.md +++ b/v3.1/reference/tools/tidb-lightning/deployment.md @@ -6,7 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning on separate deployment and mixed deployment, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. + +If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes @@ -343,8 +345,15 @@ Follow the link to download the TiDB Lightning package (choose the same version # keep-after-success = false [tikv-importer] - # The listening address of tikv-importer. Change it to the actual address. + # Delivery back end, can be "importer" or "tidb". + # backend = "importer" + # The listening address of tikv-importer when back end is "importer". Change it to the actual address. addr = "172.16.31.10:8287" + # Action to do when trying to insert a duplicated entry in the "tidb" back end. + # - replace: new entry replaces existing entry + # - ignore: keep existing entry, ignore new entry + # - error: report error and quit the program + # on-duplicate = "replace" [mydumper] # Block size for file reading. Keep it longer than the longest string of diff --git a/v3.1/reference/tools/tidb-lightning/overview.md b/v3.1/reference/tools/tidb-lightning/overview.md index 107459d266586..40f51d4dd55bb 100644 --- a/v3.1/reference/tools/tidb-lightning/overview.md +++ b/v3.1/reference/tools/tidb-lightning/overview.md @@ -35,6 +35,10 @@ The complete import process is as follows: There are two kinds of engine files: *data engines* and *index engines*, each corresponding to two kinds of KV pairs: the row data and secondary indices. Normally, the row data are entirely sorted in the data source, while the secondary indices are out of order. Because of this, the data engines are uploaded as soon as a batch is completed, while the index engines are imported only after all batches of the entire table are encoded. -6. After all engines associated to a table are imported, `tidb-lightning` performs a checksum comparison between the local data source and those calculated from the cluster, to ensure there is no data corruption in the process, and tells TiDB to `ANALYZE` all imported tables, to prepare for optimal query planning. +6. After all engines associated to a table are imported, `tidb-lightning` performs a checksum comparison between the local data source and those calculated from the cluster, to ensure there is no data corruption in the process; tells TiDB to `ANALYZE` all imported tables, to prepare for optimal query planning; and adjusts the `AUTO_INCREMENT` value so future insertions will not cause conflict. + + The auto-increment ID of a table is computed by the estimated *upper bound* of the number of rows, which is proportional to the total file size of the data files of the table. Therefore, the final auto-increment ID is often much larger than the actual number of rows. This is expected since in TiDB auto-increment is [not necessarily allocated sequentially](/v3.1/reference/mysql-compatibility.md#auto-increment-id). 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. + +TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/v3.1/reference/tools/tidb-lightning/table-filter.md b/v3.1/reference/tools/tidb-lightning/table-filter.md index 059152d490ce1..11dcc6c3080d3 100644 --- a/v3.1/reference/tools/tidb-lightning/table-filter.md +++ b/v3.1/reference/tools/tidb-lightning/table-filter.md @@ -26,6 +26,10 @@ ignore-dbs = ["pattern4", "pattern5"] The pattern can either be a simple name, or a regular expression in [Go dialect](https://golang.org/pkg/regexp/syntax/#hdr-syntax) if it starts with a `~` character. +> **Note:** +> +> The system databases `INFORMATION_SCHEMA`, `PERFORMANCE_SCHEMA`, `mysql` and `sys` are always black-listed regardless of the table filter settings. + ## Filtering tables ```toml diff --git a/v3.1/reference/tools/tidb-lightning/tidb-backend.md b/v3.1/reference/tools/tidb-lightning/tidb-backend.md new file mode 100644 index 0000000000000..c3a998404a8ca --- /dev/null +++ b/v3.1/reference/tools/tidb-lightning/tidb-backend.md @@ -0,0 +1,223 @@ +--- +title: TiDB Lightning "TiDB" Back End +summary: Choose how to write data into the TiDB cluster. +category: reference +--- + +# TiDB Lightning "TiDB" Back End + +TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. + +The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. + +The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. + +| Back end | "Importer" | "TiDB" | +|:---|:---|:---| +| Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | +| Resource usage | High | Low | +| ACID respected while importing | No | Yes | +| Target tables | Must be empty | Can be populated | + +## Deployment for "TiDB" back end + +When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.1/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: + +* Steps involving `tikv-importer` can all be skipped. +* The configuration must be changed to indicate the "TiDB" back end is used. + +### Ansible deployment + +1. The `[importer_server]` section in `inventory.ini` can be left blank. + + ```ini + ... + + [importer_server] + # keep empty + + [lightning_server] + 192.168.20.10 + + ... + ``` + +2. The `tikv_importer_port` setting in `group_vars/all.yml` is ignored, and the file `group_vars/importer_server.yml` does not need to be changed. But you need to edit `conf/tidb-lightning.yml` and change the `backend` setting to `tidb`. + + ```yaml + ... + tikv_importer: + backend: "tidb" # <-- change this + ... + ``` + +3. Bootstrap and deploy the cluster as usual. + +4. Mount the data source for TiDB Lightning as usual. + +5. Start `tidb-lightning` as usual. + +### Manual deployment + +You do not need to download and configure `tikv-importer`. + +Before running `tidb-lightning`, add the following lines into the configuration file: + +```toml +[tikv-importer] +backend = "tidb" +``` + +or supplying the `--backend tidb` arguments when executing `tidb-lightning`. + +## Conflict resolution + +The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. + +```toml +[tikv-importer] +backend = "tidb" +on-duplicate = "replace" # or "error" or "ignore" +``` + +| Setting | Behavior on conflict | Equivalent SQL statement | +|:---|:---|:---| +| replace | New entries replace old ones | `REPLACE INTO ...` | +| ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | +| error | Abort import | `INSERT INTO ...` | + +## Migrating from Loader to TiDB Lightning "TiDB" back end + +TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/v3.1/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.1/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning). + + + + + + + + + + + +
LoaderTiDB Lightning
+ +```toml + +# logging +log-level = "info" +log-file = "loader.log" + +# Prometheus +status-addr = ":8272" + +# concurrency +pool-size = 16 +``` + + + +```toml +[lightning] +# logging +level = "info" +file = "tidb-lightning.log" + +# Prometheus +pprof-port = 8289 + +# concurrency (better left as default) +#region-concurrency = 16 +``` + +
+ +```toml + +# checkpoint database + +checkpoint-schema = "tidb_loader" + + + + + + +``` + + + +```toml +[checkpoint] +# checkpoint storage +enable = true +schema = "tidb_lightning_checkpoint" +# by default the checkpoint is stored in +# a local file, which is more efficient. +# but you could still choose to store the +# checkpoints in the target database with +# this setting: +#driver = "mysql" +``` + +
+ +```toml + + + +``` + + + +```toml +[tikv-importer] +# use the "TiDB" back end +backend = "tidb" +``` + +
+ +```toml + +# data source directory +dir = "/data/export/" +``` + + + +```toml +[mydumper] +# data source directory +data-source-dir = "/data/export" +``` + +
+ +```toml +[db] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 + +user = "root" +password = "" + +#sql-mode = "" +``` + + + +```toml +[tidb] +# TiDB connection parameters +host = "127.0.0.1" +port = 4000 +status-port = 10080 # <- this is required +user = "root" +password = "" + +#sql-mode = "" +``` + +