-
Notifications
You must be signed in to change notification settings - Fork 118
Closed
Description
In the design doc https://github.com/tikv/rfcs/blob/master/text/0072-online-bulk-load-for-rawkv.md, one thing is missing.
During bulk load, we will firstly split and scatter regions. After that, we will sort the data. If sort cost more than 10 minutes, PD will try to merge the regions we split, which will cause errors during ingest.
We should let PD pause region merge during ingest (just like what lightning does).
This PR tikv/pd#4092 try to let pd support pause region merge.
org.tikv.common.exception.GrpcException: message: "peer is not leader for region 1324, leader may Some(id: 1327 store_id: 4)"
not_leader {
region_id: 1324
leader {
id: 1327
store_id: 4
}
}
at org.tikv.common.importer.ImporterStoreClient.multiIngest(ImporterStoreClient.java:152)
at org.tikv.common.importer.ImporterClient.ingest(ImporterClient.java:224)
at org.tikv.common.importer.ImporterClient.write(ImporterClient.java:96)
at org.tikv.bulkload.RawKVBulkLoader.writeAndIngest(RawKVBulkLoader.scala:152)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5(RawKVBulkLoader.scala:110)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5$adapted(RawKVBulkLoader.scala:109)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:994)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:994)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2154)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
org.tikv.common.exception.GrpcException: message: "EpochNotMatch current epoch of region 4601 is conf_ver: 5 version: 71, but you sent conf_ver: 5 version: 69"
epoch_not_match {
current_regions {
id: 4601
start_key: "c0fb99f6463525f16782957af25b21a4"
end_key: "c13256dc9dc9fedebb7f8fa623274d86"
region_epoch {
conf_ver: 5
version: 71
}
peers {
id: 4602
store_id: 1
}
peers {
id: 4603
store_id: 4
}
peers {
id: 4604
store_id: 7
}
}
current_regions {
id: 3001
start_key: "c0d229b6079d6a3e32cb8294ef2ead09"
end_key: "c0fb99f6463525f16782957af25b21a4"
region_epoch {
conf_ver: 5
version: 68
}
peers {
id: 3002
store_id: 1
}
peers {
id: 3003
store_id: 4
}
peers {
id: 3004
store_id: 7
}
}
current_regions {
id: 3629
start_key: "c09bda130a29e4b1a3fca3290b6e0e7a"
end_key: "c0d229b6079d6a3e32cb8294ef2ead09"
region_epoch {
conf_ver: 5
version: 68
}
peers {
id: 3630
store_id: 1
}
peers {
id: 3631
store_id: 4
}
peers {
id: 3632
store_id: 7
}
}
}
at org.tikv.common.importer.ImporterStoreClient.multiIngest(ImporterStoreClient.java:152)
at org.tikv.common.importer.ImporterClient.ingest(ImporterClient.java:224)
at org.tikv.common.importer.ImporterClient.write(ImporterClient.java:96)
at org.tikv.bulkload.RawKVBulkLoader.writeAndIngest(RawKVBulkLoader.scala:152)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5(RawKVBulkLoader.scala:110)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5$adapted(RawKVBulkLoader.scala:109)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:994)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:994)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2139)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
org.tikv.common.exception.RegionException: Region Exception occurred region 5697 not found
at org.tikv.common.importer.ImporterStoreClient.multiIngest(ImporterStoreClient.java:153)
at org.tikv.common.importer.ImporterClient.ingest(ImporterClient.java:230)
at org.tikv.common.importer.ImporterClient.write(ImporterClient.java:101)
at org.tikv.bulkload.RawKVBulkLoader.writeAndIngest(RawKVBulkLoader.scala:152)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5(RawKVBulkLoader.scala:110)
at org.tikv.bulkload.RawKVBulkLoader.$anonfun$bulkLoad$5$adapted(RawKVBulkLoader.scala:109)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2(RDD.scala:994)
at org.apache.spark.rdd.RDD.$anonfun$foreachPartition$2$adapted(RDD.scala:994)
at org.apache.spark.SparkContext.$anonfun$runJob$5(SparkContext.scala:2154)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:127)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:463)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Metadata
Metadata
Assignees
Labels
No labels