diff --git a/dev/TOC.md b/dev/TOC.md index 6234ec58d1f30..6a896225ea82f 100644 --- a/dev/TOC.md +++ b/dev/TOC.md @@ -361,10 +361,12 @@ - [Checkpoints](/dev/reference/tools/tidb-lightning/checkpoints.md) - [Table Filter](/dev/reference/tools/tidb-lightning/table-filter.md) - [CSV Support](/dev/reference/tools/tidb-lightning/csv.md) + - [TiDB-backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) - [Web Interface](/dev/reference/tools/tidb-lightning/web.md) - [Monitor](/dev/reference/tools/tidb-lightning/monitor.md) - [Troubleshoot](/dev/how-to/troubleshoot/tidb-lightning.md) - [FAQ](/dev/faq/tidb-lightning.md) + - [Glossary](/dev/reference/tools/tidb-lightning/glossary.md) - [sync-diff-inspector](/dev/reference/tools/sync-diff-inspector/overview.md) - [PD Control](/dev/reference/tools/pd-control.md) - [PD Recover](/dev/reference/tools/pd-recover.md) diff --git a/dev/faq/tidb-lightning.md b/dev/faq/tidb-lightning.md index fc062259dd4ef..0512174030dd3 100644 --- a/dev/faq/tidb-lightning.md +++ b/dev/faq/tidb-lightning.md @@ -28,12 +28,14 @@ TiDB Lightning requires the following privileges: * CREATE * DROP -If the target database is used to store checkpoints, it additionally requires these privileges: +If the [TiDB-backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) is chosen, or the target database is used to store checkpoints, it additionally requires these privileges: * INSERT * DELETE -If the `checksum` configuration item of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. +The Importer-backend does not require these two privileges because data is ingested into TiKV directly, which bypasses the entire TiDB privilege system. This is secure as long as the ports of TiKV, TiKV Importer and TiDB Lightning are not reachable outside the cluster. + +If the `checksum` configuration of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. ## TiDB Lightning encountered an error when importing one table. Will it affect other tables? Will the process be terminated? diff --git a/dev/reference/tools/tidb-lightning/config.md b/dev/reference/tools/tidb-lightning/config.md index 5270780109df7..a6704199499c6 100644 --- a/dev/reference/tools/tidb-lightning/config.md +++ b/dev/reference/tools/tidb-lightning/config.md @@ -89,11 +89,11 @@ driver = "file" #keep-after-success = false [tikv-importer] -# Delivery back end, can be "importer" or "tidb". +# Delivery backend, can be "importer" or "tidb". # backend = "importer" -# The listening address of tikv-importer when back end is "importer". Change it to the actual address. +# The listening address of tikv-importer when backend is "importer". Change it to the actual address. addr = "172.16.31.10:8287" -# Action to do when trying to insert a duplicated entry in the "tidb" back end. +# Action to do when trying to insert a duplicated entry in the "tidb" backend. # - replace: new entry replaces existing entry # - ignore: keep existing entry, ignore new entry # - error: report error and quit the program @@ -184,6 +184,9 @@ checksum-table-concurrency = 16 # The default SQL mode used to parse and execute the SQL statements. sql-mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION" +# Sets maximum packet size allowed for SQL connections. +# Set this to 0 to automatically fetch the `max_allowed_packet` variable from server on every connection. +max-allowed-packet = 67_108_864 # When data importing is complete, tidb-lightning can automatically perform # the Checksum, Compact and Analyze operations. It is recommended to leave @@ -295,7 +298,7 @@ min-available-ratio = 0.05 | -V | Prints program version | | | -d *directory* | Directory of the data dump to read from | `mydumper.data-source-dir` | | -L *level* | Log level: debug, info, warn, error, fatal (default = info) | `lightning.log-level` | -| --backend *backend* | [Delivery back end](/dev/reference/tools/tidb-lightning/tidb-backend.md) (`importer` or `tidb`) | `tikv-importer.backend` | +| --backend *backend* | [Delivery backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) (`importer` or `tidb`) | `tikv-importer.backend` | | --log-file *file* | Log file path | `lightning.log-file` | | --status-addr *ip:port* | Listening address of the TiDB Lightning server | `lightning.status-port` | | --importer *host:port* | Address of TiKV Importer | `tikv-importer.addr` | diff --git a/dev/reference/tools/tidb-lightning/deployment.md b/dev/reference/tools/tidb-lightning/deployment.md index 62d5218351d65..f029f22555fbd 100644 --- a/dev/reference/tools/tidb-lightning/deployment.md +++ b/dev/reference/tools/tidb-lightning/deployment.md @@ -6,9 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default Importer-backend, and how to deploy it using Ansible or manually. -If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/dev/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. +If you wish to use the TiDB-backend, also read [TiDB Lightning TiDB-backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes @@ -271,3 +271,9 @@ Refer to the [TiDB enterprise tools download page](/dev/reference/tools/download ```sh nohup ./tidb-lightning -config tidb-lightning.toml > nohup.out & ``` + +## Upgrading TiDB Lightning + +You can upgrade TiDB Lightning by replacing the binaries alone. No further configuration is needed. See [FAQ](/dev/faq/tidb-lightning.md#how-to-properly-restart-tidb-lightning) for the detailed instructions of restarting TiDB Lightning. + +If an import task is running, we recommend you to wait until it finishes before upgrading TiDB Lightning. Otherwise, there might be chances that you need to reimport from scratch, because there is no guarantee that checkpoints work across versions. diff --git a/dev/reference/tools/tidb-lightning/glossary.md b/dev/reference/tools/tidb-lightning/glossary.md new file mode 100644 index 0000000000000..42098d41a8245 --- /dev/null +++ b/dev/reference/tools/tidb-lightning/glossary.md @@ -0,0 +1,189 @@ +--- +title: TiDB Lightning Glossary +summary: List of special terms used in TiDB Lightning. +category: glossary +--- + +# TiDB Lightning Glossary + +This page explains the special terms used in TiDB Lightning's logs, monitoring, configurations, and documentation. + + + +## A + +### Analyze + +An operation to rebuild the [statistics](/dev/reference/performance/statistics.md) information of a TiDB table, i.e. running the [`ANALYZE TABLE`](/dev/reference/sql/statements/analyze-table.md) statement. + +Because TiDB Lightning imports data without going through TiDB, the statistics information is not automatically updated. Therefore, TiDB Lightning explicitly analyzes every table after importing. This step can be omitted by setting the `post-restore.analyze` configuration to `false`. + +### `AUTO_INCREMENT_ID` + +Every table has an associated `AUTO_INCREMENT_ID` counter to provide the default value of an auto-incrementing column. In TiDB, this counter is additionally used to assign row IDs. + +Because TiDB Lightning imports data without going through TiDB, the `AUTO_INCREMENT_ID` counter is not automatically updated. Therefore, TiDB Lightning explicitly alters `AUTO_INCREMENT_ID` to a valid value. This step is always performed, even if the table has no `AUTO_INCREMENT` columns. + + + +## B + +### Back end + +Back end is the destination where TiDB Lightning sends the parsed result. Also spelled as "backend". + +See [TiDB Lightning TiDB-backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) for details. + +### Black-white list + +A configuration list that specifies which tables to be imported and which should be excluded. + +See [TiDB Lightning Table Filter](/dev/reference/tools/tidb-lightning/table-filter.md) for details. + + + +## C + +### Checkpoint + +TiDB Lightning continuously saves its progress into a local file or a remote database while importing. This allows it to resume from an intermediate state should it crashes in the process. See the [Checkpoints](/dev/reference/tools/tidb-lightning/checkpoints.md) section for details. + +### Checksum + +In TiDB Lightning, the checksum of a table is a set of 3 numbers calculated from the content of each KV pair in that table. These numbers are respectively: + +* the number of KV pairs, +* total length of all KV pairs, and +* the bitwise-XOR of [CRC-64-ECMA](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) value each pair. + +TiDB Lightning [validates the imported data](/dev/faq/tidb-lightning.md#how-to-ensure-the-integrity-of-the-imported-data) by comparing the [local](/dev/reference/tools/tidb-lightning/glossary.md#local-checksum) and [remote checksums](/dev/reference/tools/tidb-lightning/glossary.md#remote-checksum) of every table. The program would stop if any pair does not match. You can skip this check by setting the `post-restore.checksum` configuration to `false`. + +See also the [Troubleshooting guide](/dev/how-to/troubleshoot/tidb-lightning.md#checksum-failed-checksum-mismatched-remote-vs-local) for how to properly handle checksum mismatch. + +### Chunk + +Equivalent to a single file in the data source. + +### Compaction + +An operation that merges multiple small SST files into one large SST file, and cleans up the deleted entries. TiKV automatically compacts data in background while TiDB Lightning is importing. + +> **Note:** +> +> For legacy reasons, you can still configure TiDB Lightning to explicitly trigger a compaction every time a table is imported. However, this is not recommended and the corresponding settings are disabled by default. + +See [RocksDB's wiki page on Compaction](https://github.com/facebook/rocksdb/wiki/Compaction) for its technical details. + + + +## D + +### Data engine + +An [engine](/dev/reference/tools/tidb-lightning/glossary.md#engine) for sorting actual row data. + +When a table is very large, its data is placed into multiple data engines to improve task pipelining and save space of TiKV Importer. By default, a new data engine is opened for every 100 GB of SQL data, which can be configured through the `mydumper.batch-size` setting. + +TiDB Lightning processes multiple data engines concurrently. This is controlled by the `lightning.table-concurrency` setting. + + + +## E + +### Engine + +In TiKV Importer, an engine is a RocksDB instance for sorting KV pairs. + +TiDB Lightning transfers data to TiKV Importer through engines. It first opens an engine, sends KV pairs to it (with no particular order), and finally closes the engine. The engine sorts the received KV pairs after it is closed. These closed engines can then be further uploaded to the TiKV stores for ingestion. + +Engines use TiKV Importer's `import-dir` as temporary storage, which are sometimes referred to as "engine files". + +See also [data engine](/dev/reference/tools/tidb-lightning/glossary.md#data-engine) and [index engine](/dev/reference/tools/tidb-lightning/glossary.md#index-engine). + + + +## I + +### Import mode + +A configuration that optimizes TiKV for writing at the cost of degraded read speed and space usage. + +TiDB Lightning automatically switches to and off the import mode while running. However, if TiKV gets stuck in import mode, you can use `tidb-lightning-ctl` to [force revert](/dev/faq/tidb-lightning.md#why-my-tidb-cluster-is-using-lots-of-cpu-resources-and-running-very-slowly-after-using-tidb-lightning) to [normal mode](/dev/reference/tools/tidb-lightning/glossary.md#normal-mode). + +### Index engine + +An [engine](/dev/reference/tools/tidb-lightning/glossary.md#engine) for sorting indices. + +Regardless of number of indices, every table is associated with exactly one index engine. + +TiDB Lightning processes multiple index engines concurrently. This is controlled by the `lightning.index-concurrency` setting. Since every table has exactly one index engine, this also configures the maximum number of tables to process at the same time. + +### Ingest + +An operation which inserts the entire content of an [SST file](/dev/reference/tools/tidb-lightning/glossary.md#sst-file) into the RocksDB (TiKV) store. + +Ingestion is a very fast operation compared with inserting KV pairs one by one. This operation is the determinant factor for the performance of TiDB Lightning. + +See [RocksDB's wiki page on Creating and Ingesting SST files](https://github.com/facebook/rocksdb/wiki/Creating-and-Ingesting-SST-files) for its technical details. + + + +## K + +### KV pair + +Abbreviation of "key-value pair". + +### KV encoder + +A routine which parses SQL or CSV rows to KV pairs. Multiple KV encoders run in parallel to speed up processing. + + + +## L + +### Local checksum + +The [checksum](/dev/reference/tools/tidb-lightning/glossary.md#checksum) of a table calculated by TiDB Lightning itself before sending the KV pairs to TiKV Importer. + + + +## N + +### Normal mode + +The mode where [import mode](/dev/reference/tools/tidb-lightning/glossary.md#import-mode) is disabled. + + + +## P + +### Post-processing + +The period of time after the entire data source is parsed and sent to TiKV Importer. TiDB Lightning is waiting for TiKV Importer to upload and [ingest](/dev/reference/tools/tidb-lightning/glossary.md#ingest) the [SST files](/dev/reference/tools/tidb-lightning/glossary.md#sst-file). + + + +## R + +### Remote checksum + +The [checksum](/dev/reference/tools/tidb-lightning/glossary.md#checksum) of a table calculated by TiDB after it has been imported. + + + +## S + +### Scattering + +An operation that randomly reassigns the leader and the peers of a [Region](/dev/glossary.md#regionpeerraft-group). Scattering ensures that the imported data are distributed evenly among TiKV stores. This reduces stress on PD. + +### Splitting + +An engine is typically very large (around 100 GB), which is not friendly to TiKV if treated as a single [region](/dev/glossary.md#regionpeerraft-group). TiKV Importer splits an engine into multiple small [SST files](/dev/reference/tools/tidb-lightning/glossary.md#sst-file) (configurable by TiKV Importer's `import.region-split-size` setting) before uploading. + +### SST file + +SST is the abbreviation of "sorted string table". An SST file is RocksDB's (and thus TiKV's) native storage format of a collection of KV pairs. + +TiKV Importer produces SST files from a closed [engine](/dev/reference/tools/tidb-lightning/glossary.md#engine). These SST files are uploaded and then [ingested](/dev/reference/tools/tidb-lightning/glossary.md#ingest) into TiKV stores. diff --git a/dev/reference/tools/tidb-lightning/overview.md b/dev/reference/tools/tidb-lightning/overview.md index 0dcd1ea8be426..d7efe90e98637 100644 --- a/dev/reference/tools/tidb-lightning/overview.md +++ b/dev/reference/tools/tidb-lightning/overview.md @@ -41,4 +41,4 @@ The complete import process is as follows: 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. -TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/dev/reference/tools/tidb-lightning/tidb-backend.md) for details. +TiDB Lightning also supports using TiDB instead of Importer as the backend. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning TiDB-backend](/dev/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/dev/reference/tools/tidb-lightning/tidb-backend.md b/dev/reference/tools/tidb-lightning/tidb-backend.md index a7c8e3dbf6ba4..e1e6b1375adbd 100644 --- a/dev/reference/tools/tidb-lightning/tidb-backend.md +++ b/dev/reference/tools/tidb-lightning/tidb-backend.md @@ -1,30 +1,38 @@ --- -title: TiDB Lightning "TiDB" Back End +title: TiDB Lightning TiDB-backend summary: Choose how to write data into the TiDB cluster. category: reference --- -# TiDB Lightning "TiDB" Back End +# TiDB Lightning TiDB-backend -TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. +TiDB Lightning supports two backends: Importer and TiDB. It determines how `tidb-lightning` delivers data into the target cluster. -The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. +The Importer-backend (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. -The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. +The TiDB-backend requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. -| Back end | "Importer" | "TiDB" | +| Back end | Importer | TiDB | |:---|:---|:---| | Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | | Resource usage | High | Low | | ACID respected while importing | No | Yes | | Target tables | Must be empty | Can be populated | -## Deployment for "TiDB" back end +## Deployment for TiDB-backend -When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/dev/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: +When using the TiDB-backend, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/dev/reference/tools/tidb-lightning/deployment.md), the TiDB-backend deployment has the following two differences: * Steps involving `tikv-importer` can all be skipped. -* The configuration must be changed to indicate the "TiDB" back end is used. +* The configuration must be changed to indicate the TiDB-backend is used. + +### Hardware requirements + +The speed of TiDB Lightning using TiDB-backend is limited by the SQL processing speed of TiDB. Therefore, even a lower-end machine may max out the possible performance. The recommended hardware configuration is: + +* 16 logical cores CPU +* An SSD large enough to store the entire data source, preferring higher read speed +* 1 Gigabit network card ### Ansible deployment @@ -72,7 +80,7 @@ or supplying the `--backend tidb` arguments when executing `tidb-lightning`. ## Conflict resolution -The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. +The TiDB-backend supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. ```toml [tikv-importer] @@ -86,9 +94,9 @@ on-duplicate = "replace" # or "error" or "ignore" | ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | | error | Abort import | `INSERT INTO ...` | -## Migrating from Loader to TiDB Lightning "TiDB" back end +## Migrating from Loader to TiDB Lightning TiDB-backend -TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/dev/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/dev/reference/tools/tidb-lightning/config.md). +TiDB Lightning using the TiDB-backend can completely replace functions of [Loader](/dev/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/dev/reference/tools/tidb-lightning/config.md). @@ -167,7 +175,7 @@ schema = "tidb_lightning_checkpoint" ```toml [tikv-importer] -# use the "TiDB" back end +# use the TiDB-backend backend = "tidb" ``` diff --git a/v3.0/TOC.md b/v3.0/TOC.md index 61e5c1ce47ed5..8160c03d8fdb9 100644 --- a/v3.0/TOC.md +++ b/v3.0/TOC.md @@ -357,6 +357,7 @@ - [Checkpoints](/v3.0/reference/tools/tidb-lightning/checkpoints.md) - [Table Filter](/v3.0/reference/tools/tidb-lightning/table-filter.md) - [CSV Support](/v3.0/reference/tools/tidb-lightning/csv.md) + - [TiDB-backend](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) - [Web Interface](/v3.0/reference/tools/tidb-lightning/web.md) - [Monitor](/v3.0/reference/tools/tidb-lightning/monitor.md) - [Troubleshoot](/v3.0/how-to/troubleshoot/tidb-lightning.md) diff --git a/v3.0/faq/tidb-lightning.md b/v3.0/faq/tidb-lightning.md index 3308df77993eb..bf5635521510f 100644 --- a/v3.0/faq/tidb-lightning.md +++ b/v3.0/faq/tidb-lightning.md @@ -29,12 +29,14 @@ TiDB Lightning requires the following privileges: * CREATE * DROP -If the target database is used to store checkpoints, it additionally requires these privileges: +If the [TiDB-backend](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) is chosen, or the target database is used to store checkpoints, it additionally requires these privileges: * INSERT * DELETE -If the `checksum` configuration item of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. +The Importer-backend does not require these two privileges because data is ingested into TiKV directly, which bypasses the entire TiDB privilege system. This is secure as long as the ports of TiKV, TiKV Importer and TiDB Lightning are not reachable outside the cluster. + +If the `checksum` configuration of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. ## TiDB Lightning encountered an error when importing one table. Will it affect other tables? Will the process be terminated? diff --git a/v3.0/reference/tools/tidb-lightning/deployment.md b/v3.0/reference/tools/tidb-lightning/deployment.md index ee165393148d1..0b3177b65d8ad 100644 --- a/v3.0/reference/tools/tidb-lightning/deployment.md +++ b/v3.0/reference/tools/tidb-lightning/deployment.md @@ -6,9 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default Importer-backend, and how to deploy it using Ansible or manually. -If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. +If you wish to use the TiDB-backend, also read [TiDB Lightning TiDB-backend](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes @@ -246,9 +246,9 @@ Refer to the [TiDB enterprise tools download page](/v3.0/reference/tools/downloa file = "tidb-lightning.log" [tikv-importer] - # Delivery back end, can be "importer" or "tidb". + # Delivery backend, can be "importer" or "tidb". # backend = "importer" - # The listening address of tikv-importer when back end is "importer". Change it to the actual address. + # The listening address of tikv-importer when backend is "importer". Change it to the actual address. addr = "172.16.31.10:8287" [mydumper] @@ -273,3 +273,9 @@ Refer to the [TiDB enterprise tools download page](/v3.0/reference/tools/downloa ```sh nohup ./tidb-lightning -config tidb-lightning.toml > nohup.out & ``` + +## Upgrading TiDB Lightning + +You can upgrade TiDB Lightning by replacing the binaries alone. No further configuration is needed. See [FAQ](/v3.0/faq/tidb-lightning.md#how-to-properly-restart-tidb-lightning) for the detailed instructions of restarting TiDB Lightning. + +If an import task is running, we recommend you to wait until it finishes before upgrading TiDB Lightning. Otherwise, there might be chances that you need to reimport from scratch, because there is no guarantee that checkpoints work across versions. diff --git a/v3.0/reference/tools/tidb-lightning/overview.md b/v3.0/reference/tools/tidb-lightning/overview.md index 5bdbfdf82a518..425f658efe4f3 100644 --- a/v3.0/reference/tools/tidb-lightning/overview.md +++ b/v3.0/reference/tools/tidb-lightning/overview.md @@ -42,4 +42,4 @@ The complete import process is as follows: 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. -TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for details. +TiDB Lightning also supports using TiDB instead of Importer as the backend. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning TiDB-backend](/v3.0/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/v3.0/reference/tools/tidb-lightning/tidb-backend.md b/v3.0/reference/tools/tidb-lightning/tidb-backend.md index 9242ec6653c7a..606f7e99bd73e 100644 --- a/v3.0/reference/tools/tidb-lightning/tidb-backend.md +++ b/v3.0/reference/tools/tidb-lightning/tidb-backend.md @@ -1,30 +1,38 @@ --- -title: TiDB Lightning "TiDB" Back End +title: TiDB Lightning TiDB-backend summary: Choose how to write data into the TiDB cluster. category: reference --- -# TiDB Lightning "TiDB" Back End +# TiDB Lightning TiDB-backend -TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. +TiDB Lightning supports two backends: Importer and TiDB. It determines how `tidb-lightning` delivers data into the target cluster. -The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. +The Importer-backend (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. -The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. +The TiDB-backend requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. -| Back end | "Importer" | "TiDB" | +| Back end | Importer | TiDB | |:---|:---|:---| | Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | | Resource usage | High | Low | | ACID respected while importing | No | Yes | | Target tables | Must be empty | Can be populated | -## Deployment for "TiDB" back end +## Deployment for TiDB-backend -When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.0/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: +When using the TiDB-backend, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.0/reference/tools/tidb-lightning/deployment.md), the TiDB-backend deployment has the following two differences: * Steps involving `tikv-importer` can all be skipped. -* The configuration must be changed to indicate the "TiDB" back end is used. +* The configuration must be changed to indicate the TiDB-backend is used. + +### Hardware requirements + +The speed of TiDB Lightning using TiDB-backend is limited by the SQL processing speed of TiDB. Therefore, even a lower-end machine may max out the possible performance. The recommended hardware configuration is: + +* 16 logical cores CPU +* An SSD large enough to store the entire data source, preferring higher read speed +* 1 Gigabit network card ### Ansible deployment @@ -72,7 +80,7 @@ or supplying the `--backend tidb` arguments when executing `tidb-lightning`. ## Conflict resolution -The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. +The TiDB-backend supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. ```toml [tikv-importer] @@ -86,9 +94,9 @@ on-duplicate = "replace" # or "error" or "ignore" | ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | | error | Abort import | `INSERT INTO ...` | -## Migrating from Loader to TiDB Lightning "TiDB" back end +## Migrating from Loader to TiDB Lightning TiDB-backend -TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/v3.0/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.0/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning). +TiDB Lightning using the TiDB-backend can completely replace functions of [Loader](/v3.0/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.0/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning).
LoaderTiDB Lightning
@@ -167,7 +175,7 @@ schema = "tidb_lightning_checkpoint" ```toml [tikv-importer] -# use the "TiDB" back end +# use the TiDB-backend backend = "tidb" ``` diff --git a/v3.1/TOC.md b/v3.1/TOC.md index 783d3c658dc5f..2528d71c8d5b5 100644 --- a/v3.1/TOC.md +++ b/v3.1/TOC.md @@ -360,6 +360,7 @@ - [Checkpoints](/v3.1/reference/tools/tidb-lightning/checkpoints.md) - [Table Filter](/v3.1/reference/tools/tidb-lightning/table-filter.md) - [CSV Support](/v3.1/reference/tools/tidb-lightning/csv.md) + - [TiDB-backend](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) - [Web Interface](/v3.1/reference/tools/tidb-lightning/web.md) - [Monitor](/v3.1/reference/tools/tidb-lightning/monitor.md) - [Troubleshoot](/v3.1/how-to/troubleshoot/tidb-lightning.md) diff --git a/v3.1/faq/tidb-lightning.md b/v3.1/faq/tidb-lightning.md index 926d8468a4147..9f6155ff2e744 100644 --- a/v3.1/faq/tidb-lightning.md +++ b/v3.1/faq/tidb-lightning.md @@ -28,12 +28,14 @@ TiDB Lightning requires the following privileges: * CREATE * DROP -If the target database is used to store checkpoints, it additionally requires these privileges: +If the [TiDB-backend](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) is chosen, or the target database is used to store checkpoints, it additionally requires these privileges: * INSERT * DELETE -If the `checksum` configuration item of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. +The Importer-backend does not require these two privileges because data is ingested into TiKV directly, which bypasses the entire TiDB privilege system. This is secure as long as the ports of TiKV, TiKV Importer and TiDB Lightning are not reachable outside the cluster. + +If the `checksum` configuration of TiDB Lightning is set to `true`, then the admin user privileges in the downstream TiDB need to be granted to TiDB Lightning. ## TiDB Lightning encountered an error when importing one table. Will it affect other tables? Will the process be terminated? diff --git a/v3.1/reference/tools/tidb-lightning/deployment.md b/v3.1/reference/tools/tidb-lightning/deployment.md index b0dd07dcef014..f942e35e97b05 100644 --- a/v3.1/reference/tools/tidb-lightning/deployment.md +++ b/v3.1/reference/tools/tidb-lightning/deployment.md @@ -6,9 +6,9 @@ category: reference # TiDB Lightning Deployment -This document describes the hardware requirements of TiDB Lightning using the default "Importer" back end, and how to deploy it using Ansible or manually. +This document describes the hardware requirements of TiDB Lightning using the default Importer-backend, and how to deploy it using Ansible or manually. -If you wish to use the "TiDB" back end, also read [TiDB Lightning "TiDB" Back End](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. +If you wish to use the TiDB-backend, also read [TiDB Lightning TiDB-backend](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for the changes to the deployment steps. ## Notes @@ -246,9 +246,9 @@ Refer to the [TiDB enterprise tools download page](/v3.1/reference/tools/downloa file = "tidb-lightning.log" [tikv-importer] - # Delivery back end, can be "importer" or "tidb". + # Delivery backend, can be "importer" or "tidb". # backend = "importer" - # The listening address of tikv-importer when back end is "importer". Change it to the actual address. + # The listening address of tikv-importer when backend is "importer". Change it to the actual address. addr = "172.16.31.10:8287" [mydumper] @@ -273,3 +273,9 @@ Refer to the [TiDB enterprise tools download page](/v3.1/reference/tools/downloa ```sh nohup ./tidb-lightning -config tidb-lightning.toml > nohup.out & ``` + +## Upgrading TiDB Lightning + +You can upgrade TiDB Lightning by replacing the binaries alone. No further configuration is needed. See [FAQ](/v3.1/faq/tidb-lightning.md#how-to-properly-restart-tidb-lightning) for the detailed instructions of restarting TiDB Lightning. + +If an import task is running, we recommend you to wait until it finishes before upgrading TiDB Lightning. Otherwise, there might be chances that you need to reimport from scratch, because there is no guarantee that checkpoints work across versions. diff --git a/v3.1/reference/tools/tidb-lightning/overview.md b/v3.1/reference/tools/tidb-lightning/overview.md index 40f51d4dd55bb..b595a16c3041d 100644 --- a/v3.1/reference/tools/tidb-lightning/overview.md +++ b/v3.1/reference/tools/tidb-lightning/overview.md @@ -41,4 +41,4 @@ The complete import process is as follows: 7. Finally, `tidb-lightning` switches the TiKV cluster back to "normal mode", so the cluster resumes normal services. -TiDB Lightning also supports using "TiDB" instead of "Importer" as the back end. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning "TiDB" Back End](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for details. +TiDB Lightning also supports using TiDB instead of Importer as the backend. In this configuration, `tidb-lightning` transforms data into SQL `INSERT` statements and directly executes them on the target cluster, similar to Loader. See [TiDB Lightning TiDB-backend](/v3.1/reference/tools/tidb-lightning/tidb-backend.md) for details. diff --git a/v3.1/reference/tools/tidb-lightning/tidb-backend.md b/v3.1/reference/tools/tidb-lightning/tidb-backend.md index c3a998404a8ca..4c0c7ccd4c37c 100644 --- a/v3.1/reference/tools/tidb-lightning/tidb-backend.md +++ b/v3.1/reference/tools/tidb-lightning/tidb-backend.md @@ -1,30 +1,38 @@ --- -title: TiDB Lightning "TiDB" Back End +title: TiDB Lightning TiDB-backend summary: Choose how to write data into the TiDB cluster. category: reference --- -# TiDB Lightning "TiDB" Back End +# TiDB Lightning TiDB-backend -TiDB Lightning supports two back ends: "Importer" and "TiDB". It determines how `tidb-lightning` delivers data into the target cluster. +TiDB Lightning supports two backends: Importer and TiDB. It determines how `tidb-lightning` delivers data into the target cluster. -The "Importer" back end (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. +The Importer-backend (default) requires `tidb-lightning` to first encode the SQL or CSV data into KV pairs, and relies on the external `tikv-importer` program to sort these KV pairs and ingest directly into the TiKV nodes. -The "TiDB" back end requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. +The TiDB-backend requires `tidb-lightning` to encode these data into SQL `INSERT` statements, and has these statements executed directly on the TiDB node. -| Back end | "Importer" | "TiDB" | +| Back end | Importer | TiDB | |:---|:---|:---| | Speed | Fast (~300 GB/hr) | Slow (~50 GB/hr) | | Resource usage | High | Low | | ACID respected while importing | No | Yes | | Target tables | Must be empty | Can be populated | -## Deployment for "TiDB" back end +## Deployment for TiDB-backend -When using the "TiDB" back end, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.1/reference/tools/tidb-lightning/deployment.md), the "TiDB" back end deployment has the following two differences: +When using the TiDB-backend, you no longer need `tikv-importer`. Compared with the [standard deployment procedure](/v3.1/reference/tools/tidb-lightning/deployment.md), the TiDB-backend deployment has the following two differences: * Steps involving `tikv-importer` can all be skipped. -* The configuration must be changed to indicate the "TiDB" back end is used. +* The configuration must be changed to indicate the TiDB-backend is used. + +### Hardware requirements + +The speed of TiDB Lightning using TiDB-backend is limited by the SQL processing speed of TiDB. Therefore, even a lower-end machine may max out the possible performance. The recommended hardware configuration is: + +* 16 logical cores CPU +* An SSD large enough to store the entire data source, preferring higher read speed +* 1 Gigabit network card ### Ansible deployment @@ -72,7 +80,7 @@ or supplying the `--backend tidb` arguments when executing `tidb-lightning`. ## Conflict resolution -The "TiDB" back end supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. +The TiDB-backend supports importing to an already-populated table. However, the new data might cause a unique key conflict with the old data. You can control how to resolve the conflict by using this task configuration. ```toml [tikv-importer] @@ -86,9 +94,9 @@ on-duplicate = "replace" # or "error" or "ignore" | ignore | Keep old entries and ignore new ones | `INSERT IGNORE INTO ...` | | error | Abort import | `INSERT INTO ...` | -## Migrating from Loader to TiDB Lightning "TiDB" back end +## Migrating from Loader to TiDB Lightning TiDB-backend -TiDB Lightning using the "TiDB" back end can completely replace functions of [Loader](/v3.1/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.1/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning). +TiDB Lightning using the TiDB-backend can completely replace functions of [Loader](/v3.1/reference/tools/loader.md). The following list shows how to translate Loader configurations into [TiDB Lightning configurations](/v3.1/reference/tools/tidb-lightning/deployment.md#step-4-start-tidb-lightning).
LoaderTiDB Lightning
@@ -167,7 +175,7 @@ schema = "tidb_lightning_checkpoint" ```toml [tikv-importer] -# use the "TiDB" back end +# use the TiDB-backend backend = "tidb" ```
LoaderTiDB Lightning