From ff2e9ed56ca9b1ab0f88df9e2ab5c333cedf4a2c Mon Sep 17 00:00:00 2001 From: Null not nil Date: Fri, 4 Sep 2020 11:12:12 -0600 Subject: [PATCH 1/5] stmt-reference, sysvars: Document dml-batch-size --- import-example-data.md | 5 +++-- sql-statements/sql-statement-load-data.md | 7 +++++-- system-variables.md | 10 +++++++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/import-example-data.md b/import-example-data.md index 49136bdb2a883..2ecadba9e2816 100644 --- a/import-example-data.md +++ b/import-example-data.md @@ -44,6 +44,7 @@ CREATE TABLE trips ( You can import files individually using the example `LOAD DATA` command here, or import all files using the bash loop below: ```sql +SET tidb_dml_batch_size = 20000; LOAD DATA LOCAL INFILE '2017Q1-capitalbikeshare-tripdata.csv' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY '\r\n' @@ -61,8 +62,8 @@ end_station_number, end_station, bike_number, member_type); To import all `*.csv` files into TiDB in a bash loop: ```bash -for FILE in `ls *.csv`; do +for FILE in *.csv; do echo "== $FILE ==" - mysql bikeshare --local-infile=1 -e "LOAD DATA LOCAL INFILE '${FILE}' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\r\n' IGNORE 1 LINES (duration, start_date, end_date, start_station_number, start_station, end_station_number, end_station, bike_number, member_type);" + mysql bikeshare --local-infile=1 -e "SET tidb_dml_batch_size = 20000; LOAD DATA LOCAL INFILE '${FILE}' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\r\n' IGNORE 1 LINES (duration, start_date, end_date, start_station_number, start_station, end_station_number, end_station, bike_number, member_type);" done; ``` diff --git a/sql-statements/sql-statement-load-data.md b/sql-statements/sql-statement-load-data.md index 6b0094ef865f8..3bb1307798866 100644 --- a/sql-statements/sql-statement-load-data.md +++ b/sql-statements/sql-statement-load-data.md @@ -104,13 +104,16 @@ In the above example, `x'2c'` is the hexadecimal representation of the `,` chara ## MySQL compatibility -* TiDB will by default commit every 20 000 rows. This behavior is similar to MySQL NDB Cluster, but not the default configuration with the InnoDB storage engine. +This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. > **Note:** > -> Committing through splitting a transaction is at the expense of breaking the atomicity and isolation of the transaction. When performing this operation, you must ensure that there are **no other** ongoing operations on the table. When an error occurs, **manual intervention is required to check the consistency and integrity of the data**. Therefore, it is not recommended to use `LOAD DATA` on any tables which are actively being read from or written to. +> In earlier releases of TiDB, `LOAD DATA` committed every 20 000 rows. By default, TiDB now commits all rows in one transaction. This can result in the error `ERROR 8004 (HY000) at line 1: Transaction is too large, size: 100000058` after upgrading from TiDB 4.0 or earlier. +> +> The recommended way to resolve this error is to increase the `txn-total-size-limit` in your tidb.toml file. If you are unable to increase this limit, you can also restore the previous behavior by setting [`tidb_dml_batch_size`](/system-variables.md#tidb_dml_batch_size) to `20000`. ## See also * [INSERT](/sql-statements/sql-statement-insert.md) * [Import Example Database](/import-example-data.md) +* [TiDB Lightning](/tidb-lightning/tidb-lightning-overview.md) diff --git a/system-variables.md b/system-variables.md index becca2edb4364..8e626144b08c9 100644 --- a/system-variables.md +++ b/system-variables.md @@ -271,6 +271,14 @@ Constraint checking is always performed in place for pessimistic transactions (d - Use a bigger value in OLAP scenarios, and a smaller value in OLTP scenarios. - For OLAP scenarios, the maximum value cannot exceed the number of CPU cores of all the TiKV nodes. +### tidb_dml_batch_size + +- Scope: SESSION +- Default value: 0 +- Example value: 20000 +- When greater than `0`, TiDB will batch commit statements such as `INSERT` or `LOAD DATA` into smaller transactions. This reduces memory usage and helps ensure that the `txn-total-size-limit` is not reached by bulk modifications. +- Only a value `0` provides ACID compliance. Setting this to any other value will break the atomicity and isolation guarantees of TiDB. + ### tidb_enable_cascades_planner - Scope: SESSION | GLOBAL @@ -819,7 +827,7 @@ SET tidb_slow_log_threshold = 200; - Scope: SESSION | GLOBAL - Default value: SYSTEM -- This variable sets the sytem time zone. Values can be specified as either an offset such as '-8:00' or a named zone 'America/Los_Angeles'. +- This variable sets the system time zone. Values can be specified as either an offset such as '-8:00' or a named zone 'America/Los_Angeles'. ### transaction_isolation From dcd53fcafdb1d02e26e62617527bf834d31e0154 Mon Sep 17 00:00:00 2001 From: Null not nil Date: Sat, 5 Sep 2020 16:21:55 -0600 Subject: [PATCH 2/5] s/a/the/ --- system-variables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system-variables.md b/system-variables.md index 8e626144b08c9..cfab1e3c1c724 100644 --- a/system-variables.md +++ b/system-variables.md @@ -277,7 +277,7 @@ Constraint checking is always performed in place for pessimistic transactions (d - Default value: 0 - Example value: 20000 - When greater than `0`, TiDB will batch commit statements such as `INSERT` or `LOAD DATA` into smaller transactions. This reduces memory usage and helps ensure that the `txn-total-size-limit` is not reached by bulk modifications. -- Only a value `0` provides ACID compliance. Setting this to any other value will break the atomicity and isolation guarantees of TiDB. +- Only the value `0` provides ACID compliance. Setting this to any other value will break the atomicity and isolation guarantees of TiDB. ### tidb_enable_cascades_planner From 5b79df0639746703fa742beea5056c16062fc2d3 Mon Sep 17 00:00:00 2001 From: Null not nil <67764674+nullnotnil@users.noreply.github.com> Date: Wed, 9 Sep 2020 00:37:09 -0600 Subject: [PATCH 3/5] Update sql-statements/sql-statement-load-data.md Co-authored-by: TomShawn <41534398+TomShawn@users.noreply.github.com> --- sql-statements/sql-statement-load-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-statements/sql-statement-load-data.md b/sql-statements/sql-statement-load-data.md index 3bb1307798866..18c9742d9ef9e 100644 --- a/sql-statements/sql-statement-load-data.md +++ b/sql-statements/sql-statement-load-data.md @@ -108,7 +108,7 @@ This statement is understood to be fully compatible with MySQL. Any compatibilit > **Note:** > -> In earlier releases of TiDB, `LOAD DATA` committed every 20 000 rows. By default, TiDB now commits all rows in one transaction. This can result in the error `ERROR 8004 (HY000) at line 1: Transaction is too large, size: 100000058` after upgrading from TiDB 4.0 or earlier. +> In earlier releases of TiDB, `LOAD DATA` committed every 20000 rows. By default, TiDB now commits all rows in one transaction. This can result in the error `ERROR 8004 (HY000) at line 1: Transaction is too large, size: 100000058` after upgrading from TiDB 4.0 or earlier versions. > > The recommended way to resolve this error is to increase the `txn-total-size-limit` in your tidb.toml file. If you are unable to increase this limit, you can also restore the previous behavior by setting [`tidb_dml_batch_size`](/system-variables.md#tidb_dml_batch_size) to `20000`. From 06902d908b8c8b112543a24f8126218706760415 Mon Sep 17 00:00:00 2001 From: Null not nil <67764674+nullnotnil@users.noreply.github.com> Date: Wed, 9 Sep 2020 00:37:27 -0600 Subject: [PATCH 4/5] Update sql-statements/sql-statement-load-data.md Co-authored-by: TomShawn <41534398+TomShawn@users.noreply.github.com> --- sql-statements/sql-statement-load-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-statements/sql-statement-load-data.md b/sql-statements/sql-statement-load-data.md index 18c9742d9ef9e..73659d1218f8d 100644 --- a/sql-statements/sql-statement-load-data.md +++ b/sql-statements/sql-statement-load-data.md @@ -110,7 +110,7 @@ This statement is understood to be fully compatible with MySQL. Any compatibilit > > In earlier releases of TiDB, `LOAD DATA` committed every 20000 rows. By default, TiDB now commits all rows in one transaction. This can result in the error `ERROR 8004 (HY000) at line 1: Transaction is too large, size: 100000058` after upgrading from TiDB 4.0 or earlier versions. > -> The recommended way to resolve this error is to increase the `txn-total-size-limit` in your tidb.toml file. If you are unable to increase this limit, you can also restore the previous behavior by setting [`tidb_dml_batch_size`](/system-variables.md#tidb_dml_batch_size) to `20000`. +> The recommended way to resolve this error is to increase the `txn-total-size-limit` value in your `tidb.toml` file. If you are unable to increase this limit, you can also restore the previous behavior by setting [`tidb_dml_batch_size`](/system-variables.md#tidb_dml_batch_size) to `20000`. ## See also From e1475f237c2c54d810f5c8bfe3d136e648e07ec3 Mon Sep 17 00:00:00 2001 From: Null not nil <67764674+nullnotnil@users.noreply.github.com> Date: Wed, 9 Sep 2020 00:37:37 -0600 Subject: [PATCH 5/5] Update system-variables.md Co-authored-by: TomShawn <41534398+TomShawn@users.noreply.github.com> --- system-variables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system-variables.md b/system-variables.md index cfab1e3c1c724..95c4c69fcc5bc 100644 --- a/system-variables.md +++ b/system-variables.md @@ -276,7 +276,7 @@ Constraint checking is always performed in place for pessimistic transactions (d - Scope: SESSION - Default value: 0 - Example value: 20000 -- When greater than `0`, TiDB will batch commit statements such as `INSERT` or `LOAD DATA` into smaller transactions. This reduces memory usage and helps ensure that the `txn-total-size-limit` is not reached by bulk modifications. +- When this value is greater than `0`, TiDB will batch commit statements such as `INSERT` or `LOAD DATA` into smaller transactions. This reduces memory usage and helps ensure that the `txn-total-size-limit` is not reached by bulk modifications. - Only the value `0` provides ACID compliance. Setting this to any other value will break the atomicity and isolation guarantees of TiDB. ### tidb_enable_cascades_planner