diff --git a/README.md b/README.md
index b0077027ad3..d4d06778c07 100644
--- a/README.md
+++ b/README.md
@@ -166,6 +166,30 @@ The following includes ThreadPool related parameters, which can be passed in thr
- a PKCS#8 private key file in PEM format. e.g. /home/tidb/client-key.pem.
- default: null
+#### tikv.rawkv.read_timeout_in_ms
+- RawKV read timeout in milliseconds. This parameter controls the timeout of `get` `getKeyTTL`.
+- default: 2000 (2 seconds)
+
+#### tikv.rawkv.write_timeout_in_ms
+- RawKV write timeout in milliseconds. This parameter controls the timeout of `put` `putAtomic` `putIfAbsent` `delete` `deleteAtomic`.
+- default: 2000 (2 seconds)
+
+#### tikv.rawkv.batch_read_timeout_in_ms
+- RawKV batch read timeout in milliseconds. This parameter controls the timeout of `batchGet`.
+- default: 2000 (2 seconds)
+
+#### tikv.rawkv.batch_write_timeout_in_ms
+- RawKV batch write timeout in milliseconds. This parameter controls the timeout of `batchPut` `batchDelete` `batchDeleteAtomic`.
+- default: 2000 (2 seconds)
+
+#### tikv.rawkv.scan_timeout_in_ms
+- RawKV scan timeout in milliseconds. This parameter controls the timeout of `batchScan` `scan` `scanPrefix`.
+- default: 10000 (10 seconds)
+
+#### tikv.rawkv.clean_timeout_in_ms
+- RawKV clean timeout in milliseconds. This parameter controls the timeout of `deleteRange` `deletePrefix`.
+- default: 600000 (10 minutes)
+
## Metrics
Client Java supports exporting metrics to Prometheus using poll mode and viewing on Grafana. The following steps shows how to enable this function.
diff --git a/metrics/grafana/client_java_summary.json b/metrics/grafana/client_java_summary.json
index 160c013f0b4..2146682760a 100644
--- a/metrics/grafana/client_java_summary.json
+++ b/metrics/grafana/client_java_summary.json
@@ -49,6 +49,20 @@
"iteration": 1636696687563,
"links": [],
"panels": [
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 23,
+ "panels": [],
+ "title": "Service Quality",
+ "type": "row"
+ },
{
"aliasColors": {},
"bars": false,
@@ -65,7 +79,7 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 0
+ "y": 1
},
"hiddenSeries": false,
"id": 2,
@@ -99,7 +113,7 @@
"targets": [
{
"exemplar": true,
- "expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[1m])) by (type)",
+ "expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@@ -127,6 +141,7 @@
},
"yaxes": [
{
+ "$$hashKey": "object:2785",
"format": "short",
"label": null,
"logBase": 1,
@@ -135,6 +150,7 @@
"show": true
},
{
+ "$$hashKey": "object:2786",
"format": "short",
"label": null,
"logBase": 1,
@@ -164,7 +180,7 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 0
+ "y": 1
},
"hiddenSeries": false,
"id": 8,
@@ -197,8 +213,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(client_java_raw_requests_failure_total[1m])) by (type)",
+ "exemplar": true,
+ "expr": "sum(increase(client_java_raw_requests_failure_total[$__interval])) by (type)",
"format": "time_series",
+ "interval": "",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
@@ -208,7 +226,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Client Failures",
+ "title": "Request Failure OPS",
"tooltip": {
"shared": true,
"sort": 0,
@@ -224,14 +242,16 @@
},
"yaxes": [
{
+ "$$hashKey": "object:2863",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
- "min": null,
+ "min": "0",
"show": true
},
{
+ "$$hashKey": "object:2864",
"format": "short",
"label": null,
"logBase": 1,
@@ -261,10 +281,10 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 8
+ "y": 9
},
"hiddenSeries": false,
- "id": 15,
+ "id": 4,
"legend": {
"alignAsTable": true,
"avg": false,
@@ -295,19 +315,37 @@
"targets": [
{
"exemplar": true,
- "expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\"}[1m])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[1m])) by (type)",
+ "expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__interval])) by (le, type))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(1, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__interval])) by (le, type))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [
+ {
+ "$$hashKey": "object:151",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
}
],
- "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Raw Request Avg Latency",
+ "title": "Request Duration 99th",
"tooltip": {
"shared": true,
"sort": 0,
@@ -323,20 +361,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:3019",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:3020",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -360,10 +400,10 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 8
+ "y": 9
},
"hiddenSeries": false,
- "id": 4,
+ "id": 15,
"legend": {
"alignAsTable": true,
"avg": false,
@@ -394,19 +434,30 @@
"targets": [
{
"exemplar": true,
- "expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[1m])) by (le, type))",
+ "expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\"}[$__interval])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__interval])) by (type)",
"format": "time_series",
+ "hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
- "thresholds": [],
+ "thresholds": [
+ {
+ "$$hashKey": "object:186",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
+ }
+ ],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Raw Request 99th Latency",
+ "title": "Request Duration Avg",
"tooltip": {
"shared": true,
"sort": 0,
@@ -422,20 +473,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:2941",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:2942",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -443,14 +496,31 @@
"alignLevel": null
}
},
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 17
+ },
+ "id": 25,
+ "panels": [],
+ "title": "Causes of TiKV Request Latency",
+ "type": "row"
+ },
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
+ "description": "",
"fieldConfig": {
- "defaults": {},
+ "defaults": {
+ "unit": "s"
+ },
"overrides": []
},
"fill": 1,
@@ -459,14 +529,15 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 16
+ "y": 18
},
"hiddenSeries": false,
- "id": 7,
+ "id": 10,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
+ "hideEmpty": false,
"max": true,
"min": false,
"rightSide": true,
@@ -476,7 +547,6 @@
},
"lines": true,
"linewidth": 1,
- "links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
@@ -493,19 +563,31 @@
"targets": [
{
"exemplar": true,
- "expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\"}[1m])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\"}[1m])) by (type)",
+ "expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[$__interval])) by (le, type))",
"format": "time_series",
+ "instant": false,
"interval": "",
"intervalFactor": 1,
- "legendFormat": "{{type}}",
+ "legendFormat": "{{ type }}",
+ "queryType": "randomWalk",
"refId": "A"
}
],
- "thresholds": [],
+ "thresholds": [
+ {
+ "$$hashKey": "object:413",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
+ }
+ ],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "gRPC Raw Requests Avg Latency",
+ "title": "Request Duration + Backoff (Single Run) 99th",
"tooltip": {
"shared": true,
"sort": 0,
@@ -521,20 +603,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:3179",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:3180",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -548,11 +632,8 @@
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
- "description": "",
"fieldConfig": {
- "defaults": {
- "unit": "s"
- },
+ "defaults": {},
"overrides": []
},
"fill": 1,
@@ -561,15 +642,14 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 16
+ "y": 18
},
"hiddenSeries": false,
- "id": 10,
+ "id": 7,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
- "hideEmpty": false,
"max": true,
"min": false,
"rightSide": true,
@@ -579,6 +659,7 @@
},
"lines": true,
"linewidth": 1,
+ "links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
@@ -595,21 +676,29 @@
"targets": [
{
"exemplar": true,
- "expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[1m])) by (le, type))",
+ "expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\"}[$__interval])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\"}[$__interval])) by (type)",
"format": "time_series",
- "instant": false,
"interval": "",
"intervalFactor": 1,
- "legendFormat": "{{ type }}",
- "queryType": "randomWalk",
+ "legendFormat": "{{type}}",
"refId": "A"
}
],
- "thresholds": [],
+ "thresholds": [
+ {
+ "$$hashKey": "object:396",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
+ }
+ ],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "gRPC Raw Requests 99th latency",
+ "title": "Request Duration + Backoff (Single Run) Avg",
"tooltip": {
"shared": true,
"sort": 0,
@@ -625,20 +714,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:3101",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:3102",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -662,10 +753,10 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 24
+ "y": 26
},
"hiddenSeries": false,
- "id": 12,
+ "id": 13,
"legend": {
"alignAsTable": true,
"avg": false,
@@ -695,18 +786,28 @@
"targets": [
{
"exemplar": true,
- "expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\"}[1m])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\"}[1m])) by (type)",
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__interval])) by (le, type))",
"interval": "",
"legendFormat": "{{ type }}",
"queryType": "randomWalk",
"refId": "A"
}
],
- "thresholds": [],
+ "thresholds": [
+ {
+ "$$hashKey": "object:427",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
+ }
+ ],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "gRPC Single Request Avg Latency",
+ "title": "TiKV Request Duration (Single Run) 99th",
"tooltip": {
"shared": true,
"sort": 0,
@@ -722,20 +823,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:3335",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:3336",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -759,10 +862,10 @@
"h": 8,
"w": 12,
"x": 12,
- "y": 24
+ "y": 26
},
"hiddenSeries": false,
- "id": 13,
+ "id": 12,
"legend": {
"alignAsTable": true,
"avg": false,
@@ -792,18 +895,28 @@
"targets": [
{
"exemplar": true,
- "expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\"}[1m])) by (le, type))",
+ "expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__interval])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__interval])) by (type)",
"interval": "",
"legendFormat": "{{ type }}",
"queryType": "randomWalk",
"refId": "A"
}
],
- "thresholds": [],
+ "thresholds": [
+ {
+ "$$hashKey": "object:382",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.85,
+ "yaxis": "left"
+ }
+ ],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "gRPC Single Request 99th Latency",
+ "title": "TiKV Request Duration (Single Run) Avg",
"tooltip": {
"shared": true,
"sort": 0,
@@ -819,20 +932,22 @@
},
"yaxes": [
{
+ "$$hashKey": "object:3257",
"format": "s",
"label": null,
- "logBase": 1,
+ "logBase": 2,
"max": null,
"min": null,
"show": true
},
{
+ "$$hashKey": "object:3258",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
- "show": true
+ "show": false
}
],
"yaxis": {
@@ -856,10 +971,10 @@
"h": 8,
"w": 12,
"x": 0,
- "y": 32
+ "y": 34
},
"hiddenSeries": false,
- "id": 14,
+ "id": 21,
"legend": {
"alignAsTable": true,
"avg": false,
@@ -889,26 +1004,153 @@
"targets": [
{
"exemplar": true,
- "expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\"}[1m])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[1m]))",
+ "expr": "sum(rate(client_java_seek_leader_store_duration_sum{instance=~\"$instance\"}[$__interval])) by (le) / sum(rate(client_java_seek_leader_store_duration_count{instance=~\"$instance\"}[$__interval])) by (le)",
"interval": "",
- "legendFormat": "avg",
+ "legendFormat": "seek-leader-store-avg",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
- "expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\"}[1m])) by (le))",
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_leader_store_duration_bucket{instance=~\"$instance\"}[$__interval])) by (le, type))",
"hide": false,
"interval": "",
- "legendFormat": "99th",
- "refId": "B"
+ "legendFormat": "seek-leader-store-99",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+ {
+ "$$hashKey": "object:441",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.3,
+ "yaxis": "left"
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff: Seek Leader Store Duration (Single Run)",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:252",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:253",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 34
+ },
+ "hiddenSeries": false,
+ "id": 47,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_seek_proxy_store_duration_sum{instance=~\"$instance\"}[$__interval])) by (le) / sum(rate(client_java_seek_proxy_store_duration_count{instance=~\"$instance\"}[$__interval])) by (le)",
+ "interval": "",
+ "legendFormat": "seek-proxy-store-avg",
+ "queryType": "randomWalk",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_proxy_store_duration_bucket{instance=~\"$instance\"}[$__interval])) by (le, type))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "seek-proxy-store-99",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [
+ {
+ "$$hashKey": "object:441",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "value": 0.2,
+ "yaxis": "left"
}
],
- "thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
- "title": "Get Region Latency",
+ "title": "Backoff: Seek Proxy Store Duration (Single Run)",
"tooltip": {
"shared": true,
"sort": 0,
@@ -924,14 +1166,1090 @@
},
"yaxes": [
{
+ "$$hashKey": "object:252",
"format": "s",
"label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:253",
+ "format": "short",
+ "label": null,
"logBase": 1,
"max": null,
"min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 42
+ },
+ "hiddenSeries": false,
+ "id": 17,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__interval])) by (le, type)",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{type}}-total",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff Duration (Single Run + Inter-Run) Total",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:221",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:222",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 42
+ },
+ "hiddenSeries": false,
+ "id": 19,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\"}[$__interval])) by (le, type)",
+ "interval": "",
+ "legendFormat": "{{type}}-count",
+ "queryType": "randomWalk",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff OPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:82",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:83",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 50
+ },
+ "hiddenSeries": false,
+ "id": 46,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__interval])) by (le, type)",
+ "interval": "",
+ "legendFormat": "{{type}}-avg",
+ "queryType": "randomWalk",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff Duration (Single Run + Inter-Run) Avg",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:221",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:222",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "datasource": null,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 58
+ },
+ "id": 40,
+ "panels": [],
+ "title": "Causes of PD Request Latency",
+ "type": "row"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 14,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\"}[$__interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__interval]))",
+ "interval": "",
+ "legendFormat": "avg",
+ "queryType": "randomWalk",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\"}[$__interval])) by (le))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "99th",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Get Region from Cache/PD Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3413",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3414",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 59
+ },
+ "hiddenSeries": false,
+ "id": 37,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": false,
+ "max": false,
+ "min": true,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "1 - sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__interval]))",
+ "interval": "",
+ "legendFormat": "hit ratio",
+ "queryType": "randomWalk",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [
+ {
+ "$$hashKey": "object:640",
+ "colorMode": "critical",
+ "fill": true,
+ "line": true,
+ "op": "gt",
+ "yaxis": "left"
+ }
+ ],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Region Cache Hit Ratio",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:277",
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:278",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 67
+ },
+ "hiddenSeries": false,
+ "id": 38,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__interval])) by (le, type))",
+ "interval": "",
+ "legendFormat": "{{ type }}-99th",
+ "queryType": "randomWalk",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__interval])) by (le, type))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{ type }}-99th",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__interval])) by (le, type))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{ type }}-99th",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "PD Request Duration 99th",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:3335",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:3336",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 67
+ },
+ "hiddenSeries": false,
+ "id": 43,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__interval])) by (type)",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{type}}",
+ "refId": "B"
+ },
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__interval])) by (type)",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{type}}",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__interval])) by (type)",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{type}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "PD Request OPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:2785",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:2786",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 75
+ },
+ "hiddenSeries": false,
+ "id": 44,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "max",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__interval])) by (le, type)",
+ "interval": "",
+ "legendFormat": "{{type}}-avg",
+ "queryType": "randomWalk",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__interval])) by (le, type)",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{type}}-sum",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff Duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:221",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:222",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 75
+ },
+ "hiddenSeries": false,
+ "id": 45,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__interval])) by (le, type)",
+ "interval": "",
+ "legendFormat": "{{type}}",
+ "queryType": "randomWalk",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "Backoff OPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:82",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "$$hashKey": "object:83",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_TEST-CLUSTER}",
+ "fieldConfig": {
+ "defaults": {},
+ "overrides": []
+ },
+ "fill": 1,
+ "fillGradient": 0,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 83
+ },
+ "hiddenSeries": false,
+ "id": 49,
+ "legend": {
+ "alignAsTable": true,
+ "avg": false,
+ "current": true,
+ "max": true,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 1,
+ "nullPointMode": "null",
+ "options": {
+ "alertThreshold": true
+ },
+ "percentage": false,
+ "pluginVersion": "7.5.7",
+ "pointradius": 2,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99, sum(rate(client_java_pd_seek_leader_duration_bucket{instance=\"$instance\"}[5s])))",
+ "interval": "",
+ "legendFormat": "seek-pd-leader-99",
+ "queryType": "randomWalk",
+ "refId": "A"
+ },
+ {
+ "exemplar": true,
+ "expr": "histogram_quantile(0.99, sum(rate(client_java_pd_seek_proxy_duration_bucket{instance=\"$instance\"}[5s])))",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "seek-pd-proxy-99",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeRegions": [],
+ "title": "PD seek leader/proxy duration",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "$$hashKey": "object:226",
+ "format": "s",
+ "label": null,
+ "logBase": 2,
+ "max": null,
+ "min": null,
"show": true
},
{
+ "$$hashKey": "object:227",
"format": "short",
"label": null,
"logBase": 1,
@@ -1032,6 +2350,6 @@
},
"timezone": "browser",
"title": "Client-Java-Summary",
- "uid": "000000911",
- "version": 15
+ "uid": "000000912",
+ "version": 16
}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 4aa8c968931..794a9f3e849 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,8 +65,9 @@
3.5.1
1.2.17
1.7.16
- 1.24.0
- 2.0.25.Final
+ 1.38.0
+ 2.0.34.Final
+ 2.8.5
1.6.6
2.12.3
3.0.1
@@ -151,6 +152,11 @@
${grpc.version}
test
+
+ com.google.code.gson
+ gson
+ ${gson.version}
+
com.fasterxml.jackson.core
jackson-annotations
diff --git a/src/main/java/org/tikv/common/AbstractGRPCClient.java b/src/main/java/org/tikv/common/AbstractGRPCClient.java
index 813b98629aa..a3c4ed5efff 100644
--- a/src/main/java/org/tikv/common/AbstractGRPCClient.java
+++ b/src/main/java/org/tikv/common/AbstractGRPCClient.java
@@ -23,6 +23,7 @@
import io.grpc.health.v1.HealthCheckRequest;
import io.grpc.health.v1.HealthCheckResponse;
import io.grpc.health.v1.HealthGrpc;
+import io.grpc.stub.AbstractFutureStub;
import io.grpc.stub.AbstractStub;
import io.grpc.stub.ClientCalls;
import io.grpc.stub.StreamObserver;
@@ -38,14 +39,15 @@
import org.tikv.common.util.ChannelFactory;
public abstract class AbstractGRPCClient<
- BlockingStubT extends AbstractStub, StubT extends AbstractStub>
+ BlockingStubT extends AbstractStub,
+ FutureStubT extends AbstractFutureStub>
implements AutoCloseable {
protected final Logger logger = LoggerFactory.getLogger(this.getClass());
protected final ChannelFactory channelFactory;
protected TiConfiguration conf;
protected long timeout;
protected BlockingStubT blockingStub;
- protected StubT asyncStub;
+ protected FutureStubT asyncStub;
protected AbstractGRPCClient(TiConfiguration conf, ChannelFactory channelFactory) {
this.conf = conf;
@@ -57,7 +59,7 @@ protected AbstractGRPCClient(
TiConfiguration conf,
ChannelFactory channelFactory,
BlockingStubT blockingStub,
- StubT asyncStub) {
+ FutureStubT asyncStub) {
this.conf = conf;
this.timeout = conf.getTimeout();
this.channelFactory = channelFactory;
@@ -88,7 +90,8 @@ public RespT callWithRetry(
return ClientCalls.blockingUnaryCall(
stub.getChannel(), method, stub.getCallOptions(), requestFactory.get());
},
- method.getFullMethodName());
+ method.getFullMethodName(),
+ backOffer);
if (logger.isTraceEnabled()) {
logger.trace(String.format("leaving %s...", method.getFullMethodName()));
@@ -109,14 +112,15 @@ protected void callAsyncWithRetry(
.create(handler)
.callWithRetry(
() -> {
- StubT stub = getAsyncStub();
+ FutureStubT stub = getAsyncStub();
ClientCalls.asyncUnaryCall(
stub.getChannel().newCall(method, stub.getCallOptions()),
requestFactory.get(),
responseObserver);
return null;
},
- method.getFullMethodName());
+ method.getFullMethodName(),
+ backOffer);
logger.debug(String.format("leaving %s...", method.getFullMethodName()));
}
@@ -133,11 +137,12 @@ StreamObserver callBidiStreamingWithRetry(
.create(handler)
.callWithRetry(
() -> {
- StubT stub = getAsyncStub();
+ FutureStubT stub = getAsyncStub();
return asyncBidiStreamingCall(
stub.getChannel().newCall(method, stub.getCallOptions()), responseObserver);
},
- method.getFullMethodName());
+ method.getFullMethodName(),
+ backOffer);
logger.debug(String.format("leaving %s...", method.getFullMethodName()));
return observer;
}
@@ -160,7 +165,8 @@ public StreamingResponse callServerStreamingWithRetry(
blockingServerStreamingCall(
stub.getChannel(), method, stub.getCallOptions(), requestFactory.get()));
},
- method.getFullMethodName());
+ method.getFullMethodName(),
+ backOffer);
logger.debug(String.format("leaving %s...", method.getFullMethodName()));
return response;
}
@@ -175,7 +181,7 @@ public long getTimeout() {
protected abstract BlockingStubT getBlockingStub();
- protected abstract StubT getAsyncStub();
+ protected abstract FutureStubT getAsyncStub();
protected boolean checkHealth(String addressStr, HostMapping hostMapping) {
ManagedChannel channel = channelFactory.getChannel(addressStr, hostMapping);
diff --git a/src/main/java/org/tikv/common/ConfigUtils.java b/src/main/java/org/tikv/common/ConfigUtils.java
index 35086a87ba0..8f1c3e9d361 100644
--- a/src/main/java/org/tikv/common/ConfigUtils.java
+++ b/src/main/java/org/tikv/common/ConfigUtils.java
@@ -70,6 +70,22 @@ public class ConfigUtils {
public static final String TIKV_SCATTER_WAIT_SECONDS = "tikv.scatter_wait_seconds";
public static final String TIKV_RAWKV_DEFAULT_BACKOFF_IN_MS = "tikv.rawkv.default_backoff_in_ms";
+ public static final String TIKV_RAWKV_READ_TIMEOUT_IN_MS = "tikv.rawkv.read_timeout_in_ms";
+ public static final String TIKV_RAWKV_WRITE_TIMEOUT_IN_MS = "tikv.rawkv.write_timeout_in_ms";
+ public static final String TIKV_RAWKV_BATCH_READ_TIMEOUT_IN_MS =
+ "tikv.rawkv.batch_read_timeout_in_ms";
+ public static final String TIKV_RAWKV_BATCH_WRITE_TIMEOUT_IN_MS =
+ "tikv.rawkv.batch_write_timeout_in_ms";
+ public static final String TIKV_RAWKV_SCAN_TIMEOUT_IN_MS = "tikv.rawkv.scan_timeout_in_ms";
+ public static final String TIKV_RAWKV_CLEAN_TIMEOUT_IN_MS = "tikv.rawkv.clean_timeout_in_ms";
+ public static final String TIKV_BO_REGION_MISS_BASE_IN_MS = "tikv.bo_region_miss_base_in_ms";
+ public static final String TIKV_RAWKV_READ_SLOWLOG_IN_MS = "tikv.rawkv.read_slowlog_in_ms";
+ public static final String TIKV_RAWKV_WRITE_SLOWLOG_IN_MS = "tikv.rawkv.write_slowlog_in_ms";
+ public static final String TIKV_RAWKV_BATCH_READ_SLOWLOG_IN_MS =
+ "tikv.rawkv.batch_read_slowlog_in_ms";
+ public static final String TIKV_RAWKV_BATCH_WRITE_SLOWLOG_IN_MS =
+ "tikv.rawkv.batch_write_slowlog_in_ms";
+ public static final String TIKV_RAWKV_SCAN_SLOWLOG_IN_MS = "tikv.rawkv.scan_slowlog_in_ms";
public static final String TIKV_TLS_ENABLE = "tikv.tls_enable";
public static final String TIKV_TRUST_CERT_COLLECTION = "tikv.trust_cert_collection";
@@ -117,6 +133,16 @@ public class ConfigUtils {
public static final int DEF_TIKV_SCATTER_WAIT_SECONDS = 300;
public static final int DEF_TIKV_RAWKV_DEFAULT_BACKOFF_IN_MS = BackOffer.RAWKV_MAX_BACKOFF;
+ public static final int DEF_TIKV_RAWKV_READ_TIMEOUT_IN_MS = 2000;
+ public static final int DEF_TIKV_RAWKV_WRITE_TIMEOUT_IN_MS = 2000;
+ public static final int DEF_TIKV_RAWKV_BATCH_READ_TIMEOUT_IN_MS = 2000;
+ public static final int DEF_TIKV_RAWKV_BATCH_WRITE_TIMEOUT_IN_MS = 2000;
+ public static final int DEF_TIKV_RAWKV_SCAN_TIMEOUT_IN_MS = 10000;
+ public static final int DEF_TIKV_RAWKV_CLEAN_TIMEOUT_IN_MS = 600000;
+
+ public static final int DEF_TIKV_BO_REGION_MISS_BASE_IN_MS = 20;
+ public static final String DEF_TIKV_RAWKV_SCAN_SLOWLOG_IN_MS = "5000";
+
public static final String NORMAL_COMMAND_PRIORITY = "NORMAL";
public static final String LOW_COMMAND_PRIORITY = "LOW";
public static final String HIGH_COMMAND_PRIORITY = "HIGH";
diff --git a/src/main/java/org/tikv/common/MetricsServer.java b/src/main/java/org/tikv/common/MetricsServer.java
index 2e3ae1e355e..42e39076bd0 100644
--- a/src/main/java/org/tikv/common/MetricsServer.java
+++ b/src/main/java/org/tikv/common/MetricsServer.java
@@ -14,15 +14,10 @@
*/
package org.tikv.common;
-import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.exporter.HTTPServer;
-import java.net.InetSocketAddress;
+import io.prometheus.client.hotspot.DefaultExports;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.tikv.common.policy.RetryPolicy;
-import org.tikv.common.region.RegionManager;
-import org.tikv.common.region.RegionStoreClient;
-import org.tikv.raw.RawKVClient;
public class MetricsServer {
private static final Logger logger = LoggerFactory.getLogger(MetricsServer.class);
@@ -57,16 +52,9 @@ public static MetricsServer getInstance(TiConfiguration conf) {
private MetricsServer(int port) {
try {
- CollectorRegistry collectorRegistry = new CollectorRegistry();
- collectorRegistry.register(RawKVClient.RAW_REQUEST_LATENCY);
- collectorRegistry.register(RawKVClient.RAW_REQUEST_FAILURE);
- collectorRegistry.register(RawKVClient.RAW_REQUEST_SUCCESS);
- collectorRegistry.register(RegionStoreClient.GRPC_RAW_REQUEST_LATENCY);
- collectorRegistry.register(RetryPolicy.GRPC_SINGLE_REQUEST_LATENCY);
- collectorRegistry.register(RegionManager.GET_REGION_BY_KEY_REQUEST_LATENCY);
- collectorRegistry.register(PDClient.PD_GET_REGION_BY_KEY_REQUEST_LATENCY);
this.port = port;
- this.server = new HTTPServer(new InetSocketAddress(port), collectorRegistry, true);
+ DefaultExports.initialize();
+ this.server = new HTTPServer(port, true);
logger.info("http server is up " + this.server.getPort());
} catch (Exception e) {
logger.error("http server not up");
diff --git a/src/main/java/org/tikv/common/PDClient.java b/src/main/java/org/tikv/common/PDClient.java
index 784819a0d47..f5d8b0f4005 100644
--- a/src/main/java/org/tikv/common/PDClient.java
+++ b/src/main/java/org/tikv/common/PDClient.java
@@ -39,6 +39,8 @@
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
@@ -76,7 +78,7 @@
import org.tikv.kvproto.Metapb.Store;
import org.tikv.kvproto.PDGrpc;
import org.tikv.kvproto.PDGrpc.PDBlockingStub;
-import org.tikv.kvproto.PDGrpc.PDStub;
+import org.tikv.kvproto.PDGrpc.PDFutureStub;
import org.tikv.kvproto.Pdpb;
import org.tikv.kvproto.Pdpb.Error;
import org.tikv.kvproto.Pdpb.ErrorType;
@@ -99,7 +101,7 @@
import org.tikv.kvproto.Pdpb.TsoRequest;
import org.tikv.kvproto.Pdpb.TsoResponse;
-public class PDClient extends AbstractGRPCClient
+public class PDClient extends AbstractGRPCClient
implements ReadOnlyPDClient {
private static final String TIFLASH_TABLE_SYNC_PROGRESS_PATH = "/tiflash/table/sync";
private static final long MIN_TRY_UPDATE_DURATION = 50;
@@ -442,7 +444,6 @@ private synchronized boolean createLeaderClientWrapper(String leaderUrlStr) {
new PDClientWrapper(leaderUrlStr, leaderUrlStr, clientChannel, System.nanoTime());
timeout = conf.getTimeout();
} catch (IllegalArgumentException e) {
- logger.error("Error updating leader. " + leaderUrlStr, e);
return false;
}
logger.info(String.format("Switched to new leader: %s", pdClientWrapper));
@@ -462,7 +463,6 @@ synchronized boolean createFollowerClientWrapper(String followerUrlStr, String l
pdClientWrapper = new PDClientWrapper(leaderUrls, followerUrlStr, channel, System.nanoTime());
timeout = conf.getForwardTimeout();
} catch (IllegalArgumentException e) {
- logger.error("Error updating follower. " + followerUrlStr, e);
return false;
}
logger.info(String.format("Switched to new leader by follower forward: %s", pdClientWrapper));
@@ -624,7 +624,7 @@ protected PDBlockingStub getBlockingStub() {
}
@Override
- protected PDStub getAsyncStub() {
+ protected PDFutureStub getAsyncStub() {
if (pdClientWrapper == null) {
throw new GrpcException("PDClient may not be initialized");
}
@@ -632,8 +632,11 @@ protected PDStub getAsyncStub() {
}
private void initCluster() {
+ logger.info("init cluster: start");
GetMembersResponse resp = null;
- List pdAddrs = getConf().getPdAddrs();
+ List pdAddrs = new ArrayList<>(getConf().getPdAddrs());
+ // shuffle PD addresses so that clients call getMembers from different PD
+ Collections.shuffle(pdAddrs);
this.pdAddrs = pdAddrs;
this.etcdClient =
Client.builder()
@@ -645,19 +648,26 @@ private void initCluster() {
.setDaemon(true)
.build()))
.build();
+ logger.info("init host mapping: start");
this.hostMapping =
Optional.ofNullable(getConf().getHostMapping())
.orElseGet(() -> new DefaultHostMapping(this.etcdClient, conf.getNetworkMappingName()));
+ logger.info("init host mapping: end");
// The first request may cost too much latency
long originTimeout = this.timeout;
this.timeout = conf.getPdFirstGetMemberTimeout();
for (URI u : pdAddrs) {
+ logger.info("get members with pd " + u + ": start");
resp = getMembers(u);
+ logger.info("get members with pd " + u + ": end");
if (resp != null) {
break;
}
- logger.info("Could not get leader member with pd: " + u);
}
+ if (resp == null) {
+ logger.error("Could not get leader member with: " + pdAddrs);
+ }
+
this.timeout = originTimeout;
checkNotNull(resp, "Failed to init client for PD cluster.");
long clusterId = resp.getHeader().getClusterId();
@@ -673,7 +683,9 @@ private void initCluster() {
String leaderUrlStr = resp.getLeader().getClientUrls(0);
leaderUrlStr = uriToAddr(addrToUri(leaderUrlStr));
+ logger.info("createLeaderClientWrapper with leader " + leaderUrlStr + ": start");
createLeaderClientWrapper(leaderUrlStr);
+ logger.info("createLeaderClientWrapper with leader " + leaderUrlStr + ": end");
service =
Executors.newSingleThreadScheduledExecutor(
new ThreadFactoryBuilder()
@@ -702,12 +714,13 @@ private void initCluster() {
tiflashReplicaService.scheduleAtFixedRate(
this::updateTiFlashReplicaStatus, 10, 10, TimeUnit.SECONDS);
}
+ logger.info("init cluster: finish");
}
static class PDClientWrapper {
private final String leaderInfo;
private final PDBlockingStub blockingStub;
- private final PDStub asyncStub;
+ private final PDFutureStub asyncStub;
private final long createTime;
private final String storeAddress;
@@ -718,10 +731,10 @@ static class PDClientWrapper {
header.put(TiConfiguration.PD_FORWARD_META_DATA_KEY, addrToUri(leaderInfo).toString());
this.blockingStub =
MetadataUtils.attachHeaders(PDGrpc.newBlockingStub(clientChannel), header);
- this.asyncStub = MetadataUtils.attachHeaders(PDGrpc.newStub(clientChannel), header);
+ this.asyncStub = MetadataUtils.attachHeaders(PDGrpc.newFutureStub(clientChannel), header);
} else {
this.blockingStub = PDGrpc.newBlockingStub(clientChannel);
- this.asyncStub = PDGrpc.newStub(clientChannel);
+ this.asyncStub = PDGrpc.newFutureStub(clientChannel);
}
this.leaderInfo = leaderInfo;
this.storeAddress = storeAddress;
@@ -740,7 +753,7 @@ PDBlockingStub getBlockingStub() {
return blockingStub;
}
- PDStub getAsyncStub() {
+ PDFutureStub getAsyncStub() {
return asyncStub;
}
diff --git a/src/main/java/org/tikv/common/TiConfiguration.java b/src/main/java/org/tikv/common/TiConfiguration.java
index 4c1073631de..aad8df6d956 100644
--- a/src/main/java/org/tikv/common/TiConfiguration.java
+++ b/src/main/java/org/tikv/common/TiConfiguration.java
@@ -35,9 +35,9 @@ public class TiConfiguration implements Serializable {
private static final Logger logger = LoggerFactory.getLogger(TiConfiguration.class);
private static final ConcurrentHashMap settings = new ConcurrentHashMap<>();
- public static final Metadata.Key FORWARD_META_DATA_KEY =
+ public static final Metadata.Key FORWARD_META_DATA_KEY =
Metadata.Key.of("tikv-forwarded-host", Metadata.ASCII_STRING_MARSHALLER);
- public static final Metadata.Key PD_FORWARD_META_DATA_KEY =
+ public static final Metadata.Key PD_FORWARD_META_DATA_KEY =
Metadata.Key.of("pd-forwarded-host", Metadata.ASCII_STRING_MARSHALLER);
static {
@@ -122,6 +122,14 @@ private static void loadFromDefaultProperties() {
setIfMissing(TIKV_GRPC_KEEPALIVE_TIMEOUT, DEF_TIKV_GRPC_KEEPALIVE_TIMEOUT);
setIfMissing(TIKV_TLS_ENABLE, DEF_TIKV_TLS_ENABLE);
setIfMissing(TIFLASH_ENABLE, DEF_TIFLASH_ENABLE);
+ setIfMissing(TIKV_RAWKV_READ_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_READ_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_RAWKV_WRITE_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_WRITE_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_RAWKV_BATCH_READ_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_BATCH_READ_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_RAWKV_BATCH_WRITE_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_BATCH_WRITE_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_RAWKV_SCAN_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_SCAN_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_RAWKV_CLEAN_TIMEOUT_IN_MS, DEF_TIKV_RAWKV_CLEAN_TIMEOUT_IN_MS);
+ setIfMissing(TIKV_BO_REGION_MISS_BASE_IN_MS, DEF_TIKV_BO_REGION_MISS_BASE_IN_MS);
+ setIfMissing(TIKV_RAWKV_SCAN_SLOWLOG_IN_MS, DEF_TIKV_RAWKV_SCAN_SLOWLOG_IN_MS);
}
public static void listAll() {
@@ -168,10 +176,14 @@ private static String get(String key) {
return option.get();
}
- private static int getInt(String key) {
+ public static int getInt(String key) {
return Integer.parseInt(get(key));
}
+ public static Optional getIntOption(String key) {
+ return getOption(key).map(Integer::parseInt);
+ }
+
private static int getInt(String key, int defaultValue) {
try {
return getOption(key).map(Integer::parseInt).orElse(defaultValue);
@@ -322,6 +334,19 @@ private static ReplicaRead getReplicaRead(String key) {
private int scatterWaitSeconds = getInt(TIKV_SCATTER_WAIT_SECONDS);
private int rawKVDefaultBackoffInMS = getInt(TIKV_RAWKV_DEFAULT_BACKOFF_IN_MS);
+ private int rawKVReadTimeoutInMS = getInt(TIKV_RAWKV_READ_TIMEOUT_IN_MS);
+ private int rawKVWriteTimeoutInMS = getInt(TIKV_RAWKV_WRITE_TIMEOUT_IN_MS);
+ private int rawKVBatchReadTimeoutInMS = getInt(TIKV_RAWKV_BATCH_READ_TIMEOUT_IN_MS);
+ private int rawKVBatchWriteTimeoutInMS = getInt(TIKV_RAWKV_BATCH_WRITE_TIMEOUT_IN_MS);
+ private int rawKVScanTimeoutInMS = getInt(TIKV_RAWKV_SCAN_TIMEOUT_IN_MS);
+ private int rawKVCleanTimeoutInMS = getInt(TIKV_RAWKV_CLEAN_TIMEOUT_IN_MS);
+ private Optional rawKVReadSlowLogInMS = getIntOption(TIKV_RAWKV_READ_SLOWLOG_IN_MS);
+ private Optional rawKVWriteSlowLogInMS = getIntOption(TIKV_RAWKV_WRITE_SLOWLOG_IN_MS);
+ private Optional rawKVBatchReadSlowLogInMS =
+ getIntOption(TIKV_RAWKV_BATCH_READ_SLOWLOG_IN_MS);
+ private Optional rawKVBatchWriteSlowLogInMS =
+ getIntOption(TIKV_RAWKV_BATCH_WRITE_SLOWLOG_IN_MS);
+ private int rawKVScanSlowLogInMS = getInt(TIKV_RAWKV_SCAN_SLOWLOG_IN_MS);
private boolean tlsEnable = getBoolean(TIKV_TLS_ENABLE);
private String trustCertCollectionFile = getOption(TIKV_TRUST_CERT_COLLECTION).orElse(null);
@@ -768,4 +793,92 @@ public String getKeyFile() {
public void setKeyFile(String keyFile) {
this.keyFile = keyFile;
}
+
+ public int getRawKVReadTimeoutInMS() {
+ return rawKVReadTimeoutInMS;
+ }
+
+ public void setRawKVReadTimeoutInMS(int rawKVReadTimeoutInMS) {
+ this.rawKVReadTimeoutInMS = rawKVReadTimeoutInMS;
+ }
+
+ public int getRawKVWriteTimeoutInMS() {
+ return rawKVWriteTimeoutInMS;
+ }
+
+ public void setRawKVWriteTimeoutInMS(int rawKVWriteTimeoutInMS) {
+ this.rawKVWriteTimeoutInMS = rawKVWriteTimeoutInMS;
+ }
+
+ public int getRawKVBatchReadTimeoutInMS() {
+ return rawKVBatchReadTimeoutInMS;
+ }
+
+ public void setRawKVBatchReadTimeoutInMS(int rawKVBatchReadTimeoutInMS) {
+ this.rawKVBatchReadTimeoutInMS = rawKVBatchReadTimeoutInMS;
+ }
+
+ public int getRawKVBatchWriteTimeoutInMS() {
+ return rawKVBatchWriteTimeoutInMS;
+ }
+
+ public void setRawKVBatchWriteTimeoutInMS(int rawKVBatchWriteTimeoutInMS) {
+ this.rawKVBatchWriteTimeoutInMS = rawKVBatchWriteTimeoutInMS;
+ }
+
+ public int getRawKVScanTimeoutInMS() {
+ return rawKVScanTimeoutInMS;
+ }
+
+ public void setRawKVScanTimeoutInMS(int rawKVScanTimeoutInMS) {
+ this.rawKVScanTimeoutInMS = rawKVScanTimeoutInMS;
+ }
+
+ public int getRawKVCleanTimeoutInMS() {
+ return rawKVCleanTimeoutInMS;
+ }
+
+ public void setRawKVCleanTimeoutInMS(int rawKVCleanTimeoutInMS) {
+ this.rawKVCleanTimeoutInMS = rawKVCleanTimeoutInMS;
+ }
+
+ public Integer getRawKVReadSlowLogInMS() {
+ return rawKVReadSlowLogInMS.orElse((int) (getTimeout() * 2));
+ }
+
+ public void setRawKVReadSlowLogInMS(Integer rawKVReadSlowLogInMS) {
+ this.rawKVReadSlowLogInMS = Optional.of(rawKVReadSlowLogInMS);
+ }
+
+ public Integer getRawKVWriteSlowLogInMS() {
+ return rawKVWriteSlowLogInMS.orElse((int) (getTimeout() * 2));
+ }
+
+ public void setRawKVWriteSlowLogInMS(Integer rawKVWriteSlowLogInMS) {
+ this.rawKVWriteSlowLogInMS = Optional.of(rawKVWriteSlowLogInMS);
+ }
+
+ public Integer getRawKVBatchReadSlowLogInMS() {
+ return rawKVBatchReadSlowLogInMS.orElse((int) (getTimeout() * 2));
+ }
+
+ public void setRawKVBatchReadSlowLogInMS(Integer rawKVBatchReadSlowLogInMS) {
+ this.rawKVBatchReadSlowLogInMS = Optional.of(rawKVBatchReadSlowLogInMS);
+ }
+
+ public Integer getRawKVBatchWriteSlowLogInMS() {
+ return rawKVBatchWriteSlowLogInMS.orElse((int) (getTimeout() * 2));
+ }
+
+ public void setRawKVBatchWriteSlowLogInMS(Integer rawKVBatchWriteSlowLogInMS) {
+ this.rawKVBatchWriteSlowLogInMS = Optional.of(rawKVBatchWriteSlowLogInMS);
+ }
+
+ public int getRawKVScanSlowLogInMS() {
+ return rawKVScanSlowLogInMS;
+ }
+
+ public void setRawKVScanSlowLogInMS(int rawKVScanSlowLogInMS) {
+ this.rawKVScanSlowLogInMS = rawKVScanSlowLogInMS;
+ }
}
diff --git a/src/main/java/org/tikv/common/exception/InvalidStoreException.java b/src/main/java/org/tikv/common/exception/InvalidStoreException.java
new file mode 100644
index 00000000000..e97b4c677ee
--- /dev/null
+++ b/src/main/java/org/tikv/common/exception/InvalidStoreException.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.tikv.common.exception;
+
+public class InvalidStoreException extends TiKVException {
+
+ public InvalidStoreException(long storeId) {
+ super(String.format("Invalid storeId: %d", storeId));
+ }
+}
diff --git a/src/main/java/org/tikv/common/importer/ImporterStoreClient.java b/src/main/java/org/tikv/common/importer/ImporterStoreClient.java
index 87c57853608..f3aa929e117 100644
--- a/src/main/java/org/tikv/common/importer/ImporterStoreClient.java
+++ b/src/main/java/org/tikv/common/importer/ImporterStoreClient.java
@@ -41,17 +41,22 @@
import org.tikv.kvproto.Kvrpcpb;
public class ImporterStoreClient
- extends AbstractGRPCClient
+ extends AbstractGRPCClient<
+ ImportSSTGrpc.ImportSSTBlockingStub, ImportSSTGrpc.ImportSSTFutureStub>
implements StreamObserver {
private static final Logger logger = LoggerFactory.getLogger(ImporterStoreClient.class);
+ private final ImportSSTGrpc.ImportSSTStub stub;
+
protected ImporterStoreClient(
TiConfiguration conf,
ChannelFactory channelFactory,
ImportSSTGrpc.ImportSSTBlockingStub blockingStub,
- ImportSSTGrpc.ImportSSTStub asyncStub) {
+ ImportSSTGrpc.ImportSSTFutureStub asyncStub,
+ ImportSSTGrpc.ImportSSTStub stub) {
super(conf, channelFactory, blockingStub, asyncStub);
+ this.stub = stub;
}
private StreamObserver streamObserverRequest;
@@ -108,11 +113,11 @@ public void startWrite() {
if (conf.isRawKVMode()) {
streamObserverRequest =
(StreamObserver)
- getAsyncStub().rawWrite((StreamObserver) this);
+ getStub().rawWrite((StreamObserver) this);
} else {
streamObserverRequest =
(StreamObserver)
- getAsyncStub().write((StreamObserver) this);
+ getStub().write((StreamObserver) this);
}
}
@@ -174,10 +179,14 @@ protected ImportSSTGrpc.ImportSSTBlockingStub getBlockingStub() {
}
@Override
- protected ImportSSTGrpc.ImportSSTStub getAsyncStub() {
+ protected ImportSSTGrpc.ImportSSTFutureStub getAsyncStub() {
return asyncStub.withDeadlineAfter(conf.getIngestTimeout(), TimeUnit.MILLISECONDS);
}
+ protected ImportSSTGrpc.ImportSSTStub getStub() {
+ return stub.withDeadlineAfter(conf.getIngestTimeout(), TimeUnit.MILLISECONDS);
+ }
+
@Override
public void close() throws Exception {}
@@ -209,10 +218,11 @@ public synchronized ImporterStoreClient build(TiStore store) throws GrpcExceptio
ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
ImportSSTGrpc.ImportSSTBlockingStub blockingStub = ImportSSTGrpc.newBlockingStub(channel);
- ImportSSTGrpc.ImportSSTStub asyncStub = ImportSSTGrpc.newStub(channel);
+ ImportSSTGrpc.ImportSSTFutureStub asyncStub = ImportSSTGrpc.newFutureStub(channel);
+ ImportSSTGrpc.ImportSSTStub stub = ImportSSTGrpc.newStub(channel);
return new ImporterStoreClient(
- conf, channelFactory, blockingStub, asyncStub);
+ conf, channelFactory, blockingStub, asyncStub, stub);
}
}
}
diff --git a/src/main/java/org/tikv/common/log/SlowLog.java b/src/main/java/org/tikv/common/log/SlowLog.java
new file mode 100644
index 00000000000..e172a003462
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLog.java
@@ -0,0 +1,26 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+public interface SlowLog {
+ void addProperty(String key, String value);
+
+ SlowLogSpan start(String name);
+
+ void log();
+}
diff --git a/src/main/java/org/tikv/common/log/SlowLogEmptyImpl.java b/src/main/java/org/tikv/common/log/SlowLogEmptyImpl.java
new file mode 100644
index 00000000000..37e68237db5
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLogEmptyImpl.java
@@ -0,0 +1,35 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+public class SlowLogEmptyImpl implements SlowLog {
+ public static final SlowLogEmptyImpl INSTANCE = new SlowLogEmptyImpl();
+
+ private SlowLogEmptyImpl() {}
+
+ @Override
+ public void addProperty(String key, String value) {}
+
+ @Override
+ public SlowLogSpan start(String name) {
+ return SlowLogSpanEmptyImpl.INSTANCE;
+ }
+
+ @Override
+ public void log() {}
+}
diff --git a/src/main/java/org/tikv/common/log/SlowLogImpl.java b/src/main/java/org/tikv/common/log/SlowLogImpl.java
new file mode 100644
index 00000000000..76ed4c922bf
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLogImpl.java
@@ -0,0 +1,93 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SlowLogImpl implements SlowLog {
+ private static final Logger logger = LoggerFactory.getLogger(SlowLogImpl.class);
+
+ private static final int MAX_SPAN_SIZE = 1024;
+
+ public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("HH:mm:ss.SSS");
+
+ private final List slowLogSpans = new ArrayList<>();
+
+ private final long startMS;
+ private final long slowThresholdMS;
+
+ /** Key-Value pairs which will be logged, e.g. function name, key, region, etc. */
+ private final Map properties;
+
+ public SlowLogImpl(long slowThresholdMS, Map properties) {
+ this.startMS = System.currentTimeMillis();
+ this.slowThresholdMS = slowThresholdMS;
+ this.properties = new HashMap<>(properties);
+ }
+
+ @Override
+ public void addProperty(String key, String value) {
+ this.properties.put(key, value);
+ }
+
+ @Override
+ public synchronized SlowLogSpan start(String name) {
+ SlowLogSpan slowLogSpan = new SlowLogSpanImpl(name);
+ if (slowLogSpans.size() < MAX_SPAN_SIZE) {
+ slowLogSpans.add(slowLogSpan);
+ }
+ slowLogSpan.start();
+ return slowLogSpan;
+ }
+
+ @Override
+ public void log() {
+ long currentMS = System.currentTimeMillis();
+ if (slowThresholdMS >= 0 && currentMS - startMS > slowThresholdMS) {
+ logger.warn("SlowLog:" + getSlowLogString(currentMS));
+ }
+ }
+
+ private String getSlowLogString(long currentMS) {
+ JsonObject jsonObject = new JsonObject();
+
+ jsonObject.addProperty("start", DATE_FORMAT.format(startMS));
+ jsonObject.addProperty("end", DATE_FORMAT.format(currentMS));
+ jsonObject.addProperty("duration", (currentMS - startMS) + "ms");
+
+ for (Map.Entry entry : properties.entrySet()) {
+ jsonObject.addProperty(entry.getKey(), entry.getValue());
+ }
+
+ JsonArray jsonArray = new JsonArray();
+ for (SlowLogSpan slowLogSpan : slowLogSpans) {
+ jsonArray.add(slowLogSpan.toJsonElement());
+ }
+ jsonObject.add("spans", jsonArray);
+
+ return jsonObject.toString();
+ }
+}
diff --git a/src/main/java/org/tikv/common/log/SlowLogSpan.java b/src/main/java/org/tikv/common/log/SlowLogSpan.java
new file mode 100644
index 00000000000..a56e9df1885
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLogSpan.java
@@ -0,0 +1,28 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+import com.google.gson.JsonElement;
+
+public interface SlowLogSpan {
+ void start();
+
+ void end();
+
+ JsonElement toJsonElement();
+}
diff --git a/src/main/java/org/tikv/common/log/SlowLogSpanEmptyImpl.java b/src/main/java/org/tikv/common/log/SlowLogSpanEmptyImpl.java
new file mode 100644
index 00000000000..11231d1c685
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLogSpanEmptyImpl.java
@@ -0,0 +1,39 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+
+public class SlowLogSpanEmptyImpl implements SlowLogSpan {
+
+ public static final SlowLogSpanEmptyImpl INSTANCE = new SlowLogSpanEmptyImpl();
+
+ private SlowLogSpanEmptyImpl() {}
+
+ @Override
+ public void start() {}
+
+ @Override
+ public void end() {}
+
+ @Override
+ public JsonElement toJsonElement() {
+ return new JsonObject();
+ }
+}
diff --git a/src/main/java/org/tikv/common/log/SlowLogSpanImpl.java b/src/main/java/org/tikv/common/log/SlowLogSpanImpl.java
new file mode 100644
index 00000000000..960baeb41dc
--- /dev/null
+++ b/src/main/java/org/tikv/common/log/SlowLogSpanImpl.java
@@ -0,0 +1,77 @@
+/*
+ *
+ * Copyright 2021 PingCAP, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.tikv.common.log;
+
+import static org.tikv.common.log.SlowLogImpl.DATE_FORMAT;
+
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+
+public class SlowLogSpanImpl implements SlowLogSpan {
+ private final String name;
+ private long startMS;
+ private long endMS;
+
+ public SlowLogSpanImpl(String name) {
+ this.name = name;
+ this.startMS = 0;
+ this.endMS = 0;
+ }
+
+ @Override
+ public void start() {
+ this.startMS = System.currentTimeMillis();
+ }
+
+ @Override
+ public void end() {
+ this.endMS = System.currentTimeMillis();
+ }
+
+ @Override
+ public JsonElement toJsonElement() {
+ JsonObject jsonObject = new JsonObject();
+ jsonObject.addProperty("name", name);
+ jsonObject.addProperty("start", getStartString());
+ jsonObject.addProperty("end", getEndString());
+ jsonObject.addProperty("duration", getDurationString());
+
+ return jsonObject;
+ }
+
+ private String getStartString() {
+ if (startMS == 0) {
+ return "N/A";
+ }
+ return DATE_FORMAT.format(startMS);
+ }
+
+ private String getEndString() {
+ if (endMS == 0) {
+ return "N/A";
+ }
+ return DATE_FORMAT.format(endMS);
+ }
+
+ private String getDurationString() {
+ if (startMS == 0 || endMS == 0) {
+ return "N/A";
+ }
+ return (endMS - startMS) + "ms";
+ }
+}
diff --git a/src/main/java/org/tikv/common/operation/KVErrorHandler.java b/src/main/java/org/tikv/common/operation/KVErrorHandler.java
index dfbe24c5a65..3321b560740 100644
--- a/src/main/java/org/tikv/common/operation/KVErrorHandler.java
+++ b/src/main/java/org/tikv/common/operation/KVErrorHandler.java
@@ -94,8 +94,6 @@ public boolean handleResponseError(BackOffer backOffer, RespT resp) {
Errorpb.Error error = regionHandler.getRegionError(resp);
if (error != null) {
return regionHandler.handleRegionError(backOffer, error);
- } else {
- regionHandler.tryUpdateRegionStore();
}
// Key error handling logic
diff --git a/src/main/java/org/tikv/common/operation/PDErrorHandler.java b/src/main/java/org/tikv/common/operation/PDErrorHandler.java
index 0d280da5eef..421755164ee 100644
--- a/src/main/java/org/tikv/common/operation/PDErrorHandler.java
+++ b/src/main/java/org/tikv/common/operation/PDErrorHandler.java
@@ -19,6 +19,7 @@
import static org.tikv.common.pd.PDError.buildFromPdpbError;
+import io.grpc.StatusRuntimeException;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -74,6 +75,10 @@ public boolean handleResponseError(BackOffer backOffer, RespT resp) {
@Override
public boolean handleRequestError(BackOffer backOffer, Exception e) {
+ // store id is not found
+ if (e instanceof StatusRuntimeException && e.getMessage().contains("invalid store ID")) {
+ return false;
+ }
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoPDRPC, e);
client.updateLeaderOrforwardFollower();
return true;
diff --git a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java
index 452aad13d06..6b3e5115127 100644
--- a/src/main/java/org/tikv/common/operation/RegionErrorHandler.java
+++ b/src/main/java/org/tikv/common/operation/RegionErrorHandler.java
@@ -3,17 +3,21 @@
import com.google.protobuf.ByteString;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tikv.common.codec.KeyUtils;
import org.tikv.common.exception.GrpcException;
+import org.tikv.common.exception.TiKVException;
import org.tikv.common.region.RegionErrorReceiver;
import org.tikv.common.region.RegionManager;
import org.tikv.common.region.TiRegion;
import org.tikv.common.util.BackOffFunction;
import org.tikv.common.util.BackOffer;
import org.tikv.kvproto.Errorpb;
+import org.tikv.kvproto.Metapb;
public class RegionErrorHandler implements ErrorHandler {
private static final Logger logger = LoggerFactory.getLogger(RegionErrorHandler.class);
@@ -42,16 +46,10 @@ public boolean handleResponseError(BackOffer backOffer, RespT resp) {
Errorpb.Error error = getRegionError(resp);
if (error != null) {
return handleRegionError(backOffer, error);
- } else {
- tryUpdateRegionStore();
}
return false;
}
- public void tryUpdateRegionStore() {
- recv.tryUpdateRegionStore();
- }
-
public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) {
if (error.hasNotLeader()) {
// this error is reported from raftstore:
@@ -114,11 +112,9 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) {
// throwing it out.
return false;
} else if (error.hasEpochNotMatch()) {
- // this error is reported from raftstore:
- // region has outdated version,please try later.
- logger.warn(String.format("Stale Epoch encountered for region [%s]", recv.getRegion()));
- this.regionManager.onRegionStale(recv.getRegion());
- return false;
+ logger.warn(
+ String.format("tikv reports `EpochNotMatch` retry later, region: %s", recv.getRegion()));
+ return onRegionEpochNotMatch(backOffer, error.getEpochNotMatch().getCurrentRegionsList());
} else if (error.hasServerIsBusy()) {
// this error is reported from kv:
// will occur when write pressure is high. Please try later.
@@ -170,17 +166,70 @@ public boolean handleRegionError(BackOffer backOffer, Errorpb.Error error) {
return false;
}
+ // ref: https://github.com/tikv/client-go/blob/tidb-5.2/internal/locate/region_request.go#L985
+ // OnRegionEpochNotMatch removes the old region and inserts new regions into the cache.
+ // It returns whether retries the request because it's possible the region epoch is ahead of
+ // TiKV's due to slow appling.
+ private boolean onRegionEpochNotMatch(BackOffer backOffer, List currentRegions) {
+ if (currentRegions.size() == 0) {
+ this.regionManager.onRegionStale(recv.getRegion());
+ return false;
+ }
+
+ // Find whether the region epoch in `ctx` is ahead of TiKV's. If so, backoff.
+ for (Metapb.Region meta : currentRegions) {
+ if (meta.getId() == recv.getRegion().getId()
+ && (meta.getRegionEpoch().getConfVer() < recv.getRegion().getVerID().getConfVer()
+ || meta.getRegionEpoch().getVersion() < recv.getRegion().getVerID().getVer())) {
+ String errorMsg =
+ String.format(
+ "region epoch is ahead of tikv, region: %s, currentRegions: %s",
+ recv.getRegion(), currentRegions);
+ logger.info(errorMsg);
+ backOffer.doBackOff(
+ BackOffFunction.BackOffFuncType.BoRegionMiss, new TiKVException(errorMsg));
+ return true;
+ }
+ }
+
+ boolean needInvalidateOld = true;
+ List newRegions = new ArrayList<>(currentRegions.size());
+ // If the region epoch is not ahead of TiKV's, replace region meta in region cache.
+ for (Metapb.Region meta : currentRegions) {
+ TiRegion region = regionManager.createRegion(meta, backOffer);
+ newRegions.add(region);
+ if (recv.getRegion().getVerID() == region.getVerID()) {
+ needInvalidateOld = false;
+ }
+ }
+
+ if (needInvalidateOld) {
+ this.regionManager.onRegionStale(recv.getRegion());
+ }
+
+ for (TiRegion region : newRegions) {
+ regionManager.insertRegionToCache(region);
+ }
+
+ return false;
+ }
+
@Override
public boolean handleRequestError(BackOffer backOffer, Exception e) {
- if (recv.onStoreUnreachable()) {
+ if (recv.onStoreUnreachable(backOffer.getSlowLog())) {
+ if (!backOffer.canRetryAfterSleep(BackOffFunction.BackOffFuncType.BoTiKVRPC)) {
+ regionManager.onRequestFail(recv.getRegion());
+ throw new GrpcException("retry is exhausted.", e);
+ }
return true;
}
logger.warn("request failed because of: " + e.getMessage());
- backOffer.doBackOff(
- BackOffFunction.BackOffFuncType.BoTiKVRPC,
- new GrpcException(
- "send tikv request error: " + e.getMessage() + ", try next peer later", e));
+ if (!backOffer.canRetryAfterSleep(BackOffFunction.BackOffFuncType.BoTiKVRPC)) {
+ regionManager.onRequestFail(recv.getRegion());
+ throw new GrpcException(
+ "send tikv request error: " + e.getMessage() + ", try next peer later", e);
+ }
// TiKV maybe down, so do not retry in `callWithRetry`
// should re-fetch the new leader from PD and send request to it
return false;
diff --git a/src/main/java/org/tikv/common/operation/iterator/RawScanIterator.java b/src/main/java/org/tikv/common/operation/iterator/RawScanIterator.java
index 9c2fbb724e1..5cdfe90d954 100644
--- a/src/main/java/org/tikv/common/operation/iterator/RawScanIterator.java
+++ b/src/main/java/org/tikv/common/operation/iterator/RawScanIterator.java
@@ -25,10 +25,10 @@
import org.tikv.common.region.TiRegion;
import org.tikv.common.util.BackOffFunction;
import org.tikv.common.util.BackOffer;
-import org.tikv.common.util.ConcreteBackOffer;
import org.tikv.kvproto.Kvrpcpb;
public class RawScanIterator extends ScanIterator {
+ private final BackOffer scanBackOffer;
public RawScanIterator(
TiConfiguration conf,
@@ -36,15 +36,19 @@ public RawScanIterator(
ByteString startKey,
ByteString endKey,
int limit,
- boolean keyOnly) {
+ boolean keyOnly,
+ BackOffer scanBackOffer) {
super(conf, builder, startKey, endKey, limit, keyOnly);
+
+ this.scanBackOffer = scanBackOffer;
}
+ @Override
TiRegion loadCurrentRegionToCache() throws GrpcException {
- BackOffer backOffer = ConcreteBackOffer.newScannerNextMaxBackOff();
+ BackOffer backOffer = scanBackOffer;
while (true) {
- try (RegionStoreClient client = builder.build(startKey)) {
- client.setTimeout(conf.getScanTimeout());
+ try (RegionStoreClient client = builder.build(startKey, backOffer)) {
+ client.setTimeout(conf.getRawKVScanTimeoutInMS());
TiRegion region = client.getRegion();
if (limit <= 0) {
currentCache = null;
diff --git a/src/main/java/org/tikv/common/policy/RetryPolicy.java b/src/main/java/org/tikv/common/policy/RetryPolicy.java
index 082b424345d..a7db3837a9e 100644
--- a/src/main/java/org/tikv/common/policy/RetryPolicy.java
+++ b/src/main/java/org/tikv/common/policy/RetryPolicy.java
@@ -21,6 +21,7 @@
import io.prometheus.client.Histogram;
import java.util.concurrent.Callable;
import org.tikv.common.exception.GrpcException;
+import org.tikv.common.log.SlowLogSpan;
import org.tikv.common.operation.ErrorHandler;
import org.tikv.common.util.BackOffer;
import org.tikv.common.util.ConcreteBackOffer;
@@ -33,6 +34,12 @@ public abstract class RetryPolicy {
.help("grpc request latency.")
.labelNames("type")
.register();
+ public static final Histogram CALL_WITH_RETRY_DURATION =
+ Histogram.build()
+ .name("client_java_call_with_retry_duration")
+ .help("callWithRetry duration.")
+ .labelNames("type")
+ .register();
public static final Counter GRPC_REQUEST_RETRY_NUM =
Counter.build()
.name("client_java_grpc_requests_retry_num")
@@ -61,38 +68,49 @@ private void rethrowNotRecoverableException(Exception e) {
}
}
- public RespT callWithRetry(Callable proc, String methodName) {
- while (true) {
- RespT result = null;
- try {
- // add single request duration histogram
- Histogram.Timer requestTimer = GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer();
+ public RespT callWithRetry(Callable proc, String methodName, BackOffer backOffer) {
+ Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(methodName).startTimer();
+ SlowLogSpan callWithRetrySlowLogSpan =
+ backOffer.getSlowLog().start("callWithRetry " + methodName);
+ try {
+ while (true) {
+ RespT result = null;
try {
- result = proc.call();
- } finally {
- requestTimer.observeDuration();
+ // add single request duration histogram
+ Histogram.Timer requestTimer =
+ GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer();
+ SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("gRPC " + methodName);
+ try {
+ result = proc.call();
+ } finally {
+ slowLogSpan.end();
+ requestTimer.observeDuration();
+ }
+ } catch (Exception e) {
+ rethrowNotRecoverableException(e);
+ // Handle request call error
+ boolean retry = handler.handleRequestError(backOffer, e);
+ if (retry) {
+ GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
+ continue;
+ } else {
+ return result;
+ }
}
- } catch (Exception e) {
- rethrowNotRecoverableException(e);
- // Handle request call error
- boolean retry = handler.handleRequestError(backOffer, e);
- if (retry) {
- GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
- continue;
- } else {
- return result;
- }
- }
- // Handle response error
- if (handler != null) {
- boolean retry = handler.handleResponseError(backOffer, result);
- if (retry) {
- GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
- continue;
+ // Handle response error
+ if (handler != null) {
+ boolean retry = handler.handleResponseError(backOffer, result);
+ if (retry) {
+ GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
+ continue;
+ }
}
+ return result;
}
- return result;
+ } finally {
+ callWithRetryTimer.observeDuration();
+ callWithRetrySlowLogSpan.end();
}
}
diff --git a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java
index c47bb854746..e34a0ff352d 100644
--- a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java
+++ b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java
@@ -20,34 +20,49 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
+import com.google.common.util.concurrent.ListenableFuture;
+import com.google.protobuf.ByteString;
import io.grpc.ManagedChannel;
import io.grpc.Metadata;
import io.grpc.stub.MetadataUtils;
+import io.prometheus.client.Histogram;
+import java.util.LinkedList;
import java.util.List;
import java.util.Set;
-import java.util.concurrent.TimeUnit;
+import java.util.concurrent.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tikv.common.AbstractGRPCClient;
import org.tikv.common.TiConfiguration;
import org.tikv.common.exception.GrpcException;
+import org.tikv.common.log.SlowLog;
+import org.tikv.common.log.SlowLogEmptyImpl;
+import org.tikv.common.log.SlowLogSpan;
import org.tikv.common.util.ChannelFactory;
import org.tikv.kvproto.Kvrpcpb;
import org.tikv.kvproto.Metapb;
import org.tikv.kvproto.TikvGrpc;
public abstract class AbstractRegionStoreClient
- extends AbstractGRPCClient
+ extends AbstractGRPCClient
implements RegionErrorReceiver {
private static final Logger logger = LoggerFactory.getLogger(AbstractRegionStoreClient.class);
+ public static final Histogram SEEK_LEADER_STORE_DURATION =
+ Histogram.build()
+ .name("client_java_seek_leader_store_duration")
+ .help("seek leader store duration.")
+ .register();
+
+ public static final Histogram SEEK_PROXY_STORE_DURATION =
+ Histogram.build()
+ .name("client_java_seek_proxy_store_duration")
+ .help("seek proxy store duration.")
+ .register();
+
protected final RegionManager regionManager;
protected TiRegion region;
- protected TiStore targetStore;
- protected TiStore originStore;
- private long retryForwardTimes;
- private long retryLeaderTimes;
- private Metapb.Peer candidateLeader;
+ protected TiStore store;
protected AbstractRegionStoreClient(
TiConfiguration conf,
@@ -55,7 +70,7 @@ protected AbstractRegionStoreClient(
TiStore store,
ChannelFactory channelFactory,
TikvGrpc.TikvBlockingStub blockingStub,
- TikvGrpc.TikvStub asyncStub,
+ TikvGrpc.TikvFutureStub asyncStub,
RegionManager regionManager) {
super(conf, channelFactory, blockingStub, asyncStub);
checkNotNull(region, "Region is empty");
@@ -63,15 +78,13 @@ protected AbstractRegionStoreClient(
checkArgument(region.getLeader() != null, "Leader Peer is null");
this.region = region;
this.regionManager = regionManager;
- this.targetStore = store;
- this.originStore = null;
- this.candidateLeader = null;
- this.retryForwardTimes = 0;
- this.retryLeaderTimes = 0;
- if (this.targetStore.getProxyStore() != null) {
+ this.store = store;
+ if (this.store.getProxyStore() != null) {
this.timeout = conf.getForwardTimeout();
- } else if (!this.targetStore.isReachable() && !this.targetStore.canForwardFirst()) {
- onStoreUnreachable();
+ } else if (!this.store.isReachable()) {
+ // cannot get Deadline or SlowLog instance here
+ // use SlowLogEmptyImpl instead to skip slow log record
+ onStoreUnreachable(SlowLogEmptyImpl.INSTANCE);
}
}
@@ -86,7 +99,7 @@ protected TikvGrpc.TikvBlockingStub getBlockingStub() {
}
@Override
- protected TikvGrpc.TikvStub getAsyncStub() {
+ protected TikvGrpc.TikvFutureStub getAsyncStub() {
return asyncStub.withDeadlineAfter(getTimeout(), TimeUnit.MILLISECONDS);
}
@@ -110,204 +123,252 @@ public boolean onNotLeader(TiRegion newRegion) {
return false;
}
- // If we try one peer but find the leader has not changed, we do not need try other peers.
- if (candidateLeader != null
- && region.getLeader().getStoreId() == newRegion.getLeader().getStoreId()) {
- retryLeaderTimes = newRegion.getFollowerList().size();
- originStore = null;
+ // If we try one peer but find the leader has not changed, we do not need to try other peers.
+ if (region.getLeader().getStoreId() == newRegion.getLeader().getStoreId()) {
+ store = null;
}
- candidateLeader = null;
region = newRegion;
- targetStore = regionManager.getStoreById(region.getLeader().getStoreId());
+ store = regionManager.getStoreById(region.getLeader().getStoreId());
updateClientStub();
return true;
}
@Override
- public boolean onStoreUnreachable() {
- if (!targetStore.isValid()) {
- logger.warn(
- String.format("store [%d] has been invalid", region.getId(), targetStore.getId()));
- targetStore = regionManager.getStoreById(targetStore.getId());
+ public boolean onStoreUnreachable(SlowLog slowLog) {
+ if (!store.isValid()) {
+ logger.warn(String.format("store [%d] has been invalid", store.getId()));
+ store = regionManager.getStoreById(store.getId());
updateClientStub();
return true;
}
- if (targetStore.getProxyStore() == null) {
- if (targetStore.isReachable()) {
- return true;
- }
+ // seek an available leader store to send request
+ Boolean result = seekLeaderStore(slowLog);
+ if (result != null) {
+ return result;
}
-
- // If this store has failed to forward request too many times, we shall try other peer at first
- // so that we can
- // reduce the latency cost by fail requests.
- if (targetStore.canForwardFirst()) {
- if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) {
- return retryOtherStoreByProxyForward();
- }
- if (retryOtherStoreLeader()) {
- return true;
- }
- } else {
- if (retryOtherStoreLeader()) {
- return true;
- }
- if (conf.getEnableGrpcForward() && retryForwardTimes <= region.getFollowerList().size()) {
- return retryOtherStoreByProxyForward();
- }
- return true;
+ if (conf.getEnableGrpcForward()) {
+ // seek an available proxy store to forward request
+ return seekProxyStore(slowLog);
}
-
- logger.warn(
- String.format(
- "retry time exceed for region[%d], invalid this region[%d]",
- region.getId(), targetStore.getId()));
- regionManager.onRequestFail(region);
return false;
}
protected Kvrpcpb.Context makeContext(TiStoreType storeType) {
- if (candidateLeader != null && storeType == TiStoreType.TiKV) {
- return region.getReplicaContext(candidateLeader, java.util.Collections.emptySet());
- } else {
- return region.getReplicaContext(java.util.Collections.emptySet(), storeType);
- }
+ return region.getReplicaContext(java.util.Collections.emptySet(), storeType);
}
protected Kvrpcpb.Context makeContext(Set resolvedLocks, TiStoreType storeType) {
- if (candidateLeader != null && storeType == TiStoreType.TiKV) {
- return region.getReplicaContext(candidateLeader, resolvedLocks);
- } else {
- return region.getReplicaContext(resolvedLocks, storeType);
- }
+ return region.getReplicaContext(resolvedLocks, storeType);
}
- @Override
- public void tryUpdateRegionStore() {
- if (originStore != null) {
- if (originStore.getId() == targetStore.getId()) {
- logger.warn(
- String.format(
- "update store [%s] by proxy-store [%s]",
- targetStore.getStore().getAddress(), targetStore.getProxyStore().getAddress()));
- // We do not need to mark the store can-forward, because if one store has grpc forward
- // successfully, it will
- // create a new store object, which is can-forward.
- regionManager.updateStore(originStore, targetStore);
- } else {
- // If we try to forward request to leader by follower failed, it means that the store of old
- // leader may be
- // unavailable but the new leader has not been report to PD. So we can ban this store for a
- // short time to
- // avoid too many request try forward rather than try other peer.
- originStore.forwardFail();
- }
+ private void updateClientStub() {
+ String addressStr = store.getStore().getAddress();
+ long deadline = timeout;
+ if (store.getProxyStore() != null) {
+ addressStr = store.getProxyStore().getAddress();
+ deadline = conf.getForwardTimeout();
}
- if (candidateLeader != null) {
- logger.warn(
- String.format(
- "update leader to store [%d] for region[%d]",
- candidateLeader.getStoreId(), region.getId()));
- this.regionManager.updateLeader(region, candidateLeader.getStoreId());
+ ManagedChannel channel =
+ channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
+ blockingStub =
+ TikvGrpc.newBlockingStub(channel).withDeadlineAfter(deadline, TimeUnit.MILLISECONDS);
+ asyncStub = TikvGrpc.newFutureStub(channel).withDeadlineAfter(deadline, TimeUnit.MILLISECONDS);
+ if (store.getProxyStore() != null) {
+ Metadata header = new Metadata();
+ header.put(TiConfiguration.FORWARD_META_DATA_KEY, store.getStore().getAddress());
+ blockingStub = MetadataUtils.attachHeaders(blockingStub, header);
+ asyncStub = MetadataUtils.attachHeaders(asyncStub, header);
}
}
- private boolean retryOtherStoreLeader() {
- List peers = region.getFollowerList();
- if (retryLeaderTimes >= peers.size()) {
- return false;
- }
- retryLeaderTimes += 1;
- boolean hasVisitedStore = false;
- for (Metapb.Peer cur : peers) {
- if (candidateLeader == null || hasVisitedStore) {
- TiStore store = regionManager.getStoreById(cur.getStoreId());
- if (store != null && store.isReachable()) {
- targetStore = store;
- candidateLeader = cur;
- logger.warn(
+ private Boolean seekLeaderStore(SlowLog slowLog) {
+ Histogram.Timer switchLeaderDurationTimer = SEEK_LEADER_STORE_DURATION.startTimer();
+ SlowLogSpan slowLogSpan = slowLog.start("seekLeaderStore");
+ try {
+ List peers = region.getFollowerList();
+ if (peers.isEmpty()) {
+ // no followers available, retry
+ logger.warn(String.format("no followers of region[%d] available, retry", region.getId()));
+ regionManager.onRequestFail(region);
+ return false;
+ }
+
+ logger.info(String.format("try switch leader: region[%d]", region.getId()));
+
+ Metapb.Peer peer = switchLeaderStore();
+ if (peer != null) {
+ // we found a leader
+ TiStore currentLeaderStore = regionManager.getStoreById(peer.getStoreId());
+ if (currentLeaderStore.isReachable()) {
+ logger.info(
String.format(
- "try store [%d],peer[%d] for region[%d], which may be new leader",
- targetStore.getId(), candidateLeader.getId(), region.getId()));
+ "update leader using switchLeader logic from store[%d] to store[%d]",
+ region.getLeader().getStoreId(), peer.getStoreId()));
+ // update region cache
+ region = regionManager.updateLeader(region, peer.getStoreId());
+ // switch to leader store
+ store = currentLeaderStore;
updateClientStub();
return true;
- } else {
- continue;
}
- } else if (candidateLeader.getId() == cur.getId()) {
- hasVisitedStore = true;
+ } else {
+ // no leader found, some response does not return normally, there may be network partition.
+ logger.warn(
+ String.format(
+ "leader for region[%d] is not found, it is possible that network partition occurred",
+ region.getId()));
}
+ } finally {
+ switchLeaderDurationTimer.observeDuration();
+ slowLogSpan.end();
}
- candidateLeader = null;
- retryLeaderTimes = peers.size();
- return false;
+ return null;
}
- private void updateClientStub() {
- String addressStr = targetStore.getStore().getAddress();
- if (targetStore.getProxyStore() != null) {
- addressStr = targetStore.getProxyStore().getAddress();
- }
- ManagedChannel channel =
- channelFactory.getChannel(addressStr, regionManager.getPDClient().getHostMapping());
- blockingStub = TikvGrpc.newBlockingStub(channel);
- asyncStub = TikvGrpc.newStub(channel);
- if (targetStore.getProxyStore() != null) {
- Metadata header = new Metadata();
- header.put(TiConfiguration.FORWARD_META_DATA_KEY, targetStore.getStore().getAddress());
- blockingStub = MetadataUtils.attachHeaders(blockingStub, header);
- asyncStub = MetadataUtils.attachHeaders(asyncStub, header);
+ private boolean seekProxyStore(SlowLog slowLog) {
+ SlowLogSpan slowLogSpan = slowLog.start("seekProxyStore");
+ Histogram.Timer grpcForwardDurationTimer = SEEK_PROXY_STORE_DURATION.startTimer();
+ try {
+ logger.info(String.format("try grpc forward: region[%d]", region.getId()));
+ // when current leader cannot be reached
+ TiStore storeWithProxy = switchProxyStore();
+ if (storeWithProxy == null) {
+ // no store available, retry
+ logger.warn(String.format("No store available, retry: region[%d]", region.getId()));
+ return false;
+ }
+ // use proxy store to forward requests
+ regionManager.updateStore(store, storeWithProxy);
+ store = storeWithProxy;
+ updateClientStub();
+ return true;
+ } finally {
+ grpcForwardDurationTimer.observeDuration();
+ slowLogSpan.end();
}
}
- private boolean retryOtherStoreByProxyForward() {
- TiStore proxyStore = switchProxyStore();
- if (proxyStore == null) {
- logger.warn(
- String.format(
- "no forward store can be selected for store [%s] and region[%d]",
- targetStore.getStore().getAddress(), region.getId()));
- return false;
+ // first: leader peer, second: true if any responses returned with grpc error
+ private Metapb.Peer switchLeaderStore() {
+ List responses = new LinkedList<>();
+ for (Metapb.Peer peer : region.getFollowerList()) {
+ ByteString key = region.getStartKey();
+ TiStore peerStore = regionManager.getStoreById(peer.getStoreId());
+ ManagedChannel channel =
+ channelFactory.getChannel(
+ peerStore.getAddress(), regionManager.getPDClient().getHostMapping());
+ TikvGrpc.TikvFutureStub stub =
+ TikvGrpc.newFutureStub(channel).withDeadlineAfter(timeout, TimeUnit.MILLISECONDS);
+ Kvrpcpb.RawGetRequest rawGetRequest =
+ Kvrpcpb.RawGetRequest.newBuilder()
+ .setContext(region.getReplicaContext(peer))
+ .setKey(key)
+ .build();
+ ListenableFuture task = stub.rawGet(rawGetRequest);
+ responses.add(new SwitchLeaderTask(task, peer));
}
- if (originStore == null) {
- originStore = targetStore;
- if (this.targetStore.getProxyStore() != null) {
- this.timeout = conf.getForwardTimeout();
+ while (true) {
+ try {
+ Thread.sleep(2);
+ } catch (InterruptedException e) {
+ throw new GrpcException(e);
+ }
+ List unfinished = new LinkedList<>();
+ for (SwitchLeaderTask task : responses) {
+ if (!task.task.isDone()) {
+ unfinished.add(task);
+ continue;
+ }
+ try {
+ Kvrpcpb.RawGetResponse resp = task.task.get();
+ if (resp != null) {
+ if (!resp.hasRegionError()) {
+ // the peer is leader
+ logger.info(
+ String.format("rawGet response indicates peer[%d] is leader", task.peer.getId()));
+ return task.peer;
+ }
+ }
+ } catch (Exception ignored) {
+ }
}
+ if (unfinished.isEmpty()) {
+ return null;
+ }
+ responses = unfinished;
}
- targetStore = proxyStore;
- retryForwardTimes += 1;
- updateClientStub();
- logger.warn(
- String.format(
- "forward request to store [%s] by store [%s] for region[%d]",
- targetStore.getStore().getAddress(),
- targetStore.getProxyStore().getAddress(),
- region.getId()));
- return true;
}
private TiStore switchProxyStore() {
- boolean hasVisitedStore = false;
- List peers = region.getFollowerList();
- if (peers.isEmpty()) {
- return null;
- }
- Metapb.Store proxyStore = targetStore.getProxyStore();
- if (proxyStore == null || peers.get(peers.size() - 1).getStoreId() == proxyStore.getId()) {
- hasVisitedStore = true;
+ long forwardTimeout = conf.getForwardTimeout();
+ List responses = new LinkedList<>();
+ for (Metapb.Peer peer : region.getFollowerList()) {
+ ByteString key = region.getStartKey();
+ TiStore peerStore = regionManager.getStoreById(peer.getStoreId());
+ ManagedChannel channel =
+ channelFactory.getChannel(
+ peerStore.getAddress(), regionManager.getPDClient().getHostMapping());
+ TikvGrpc.TikvFutureStub stub =
+ TikvGrpc.newFutureStub(channel).withDeadlineAfter(forwardTimeout, TimeUnit.MILLISECONDS);
+ Metadata header = new Metadata();
+ header.put(TiConfiguration.FORWARD_META_DATA_KEY, store.getStore().getAddress());
+ Kvrpcpb.RawGetRequest rawGetRequest =
+ Kvrpcpb.RawGetRequest.newBuilder()
+ .setContext(region.getReplicaContext(peer))
+ .setKey(key)
+ .build();
+ ListenableFuture task =
+ MetadataUtils.attachHeaders(stub, header).rawGet(rawGetRequest);
+ responses.add(new ForwardCheckTask(task, peerStore.getStore()));
}
- for (Metapb.Peer peer : peers) {
- if (hasVisitedStore) {
- TiStore store = regionManager.getStoreById(peer.getStoreId());
- if (store.isReachable()) {
- return targetStore.withProxy(store.getStore());
+ while (true) {
+ try {
+ Thread.sleep(2);
+ } catch (InterruptedException e) {
+ throw new GrpcException(e);
+ }
+ List unfinished = new LinkedList<>();
+ for (ForwardCheckTask task : responses) {
+ if (!task.task.isDone()) {
+ unfinished.add(task);
+ continue;
+ }
+ try {
+ // any answer will do
+ Kvrpcpb.RawGetResponse resp = task.task.get();
+ logger.info(
+ String.format(
+ "rawGetResponse indicates forward from [%s] to [%s]",
+ task.store.getAddress(), store.getAddress()));
+ return store.withProxy(task.store);
+ } catch (Exception ignored) {
}
- } else if (peer.getStoreId() == proxyStore.getId()) {
- hasVisitedStore = true;
}
+ if (unfinished.isEmpty()) {
+ return null;
+ }
+ responses = unfinished;
+ }
+ }
+
+ private static class SwitchLeaderTask {
+ private final ListenableFuture task;
+ private final Metapb.Peer peer;
+
+ private SwitchLeaderTask(ListenableFuture task, Metapb.Peer peer) {
+ this.task = task;
+ this.peer = peer;
+ }
+ }
+
+ private static class ForwardCheckTask {
+ private final ListenableFuture task;
+ private final Metapb.Store store;
+
+ private ForwardCheckTask(ListenableFuture task, Metapb.Store store) {
+ this.task = task;
+ this.store = store;
}
- return null;
}
}
diff --git a/src/main/java/org/tikv/common/region/RegionCache.java b/src/main/java/org/tikv/common/region/RegionCache.java
index 77f41010a67..1b8f457da05 100644
--- a/src/main/java/org/tikv/common/region/RegionCache.java
+++ b/src/main/java/org/tikv/common/region/RegionCache.java
@@ -104,6 +104,18 @@ public synchronized void invalidateRegion(TiRegion region) {
}
}
+ public synchronized void insertRegionToCache(TiRegion region) {
+ try {
+ TiRegion oldRegion = regionCache.get(region.getId());
+ if (oldRegion != null) {
+ keyToRegionIdCache.remove(makeRange(oldRegion.getStartKey(), oldRegion.getEndKey()));
+ }
+ regionCache.put(region.getId(), region);
+ keyToRegionIdCache.put(makeRange(region.getStartKey(), region.getEndKey()), region.getId());
+ } catch (Exception ignore) {
+ }
+ }
+
public synchronized boolean updateRegion(TiRegion expected, TiRegion region) {
try {
if (logger.isDebugEnabled()) {
diff --git a/src/main/java/org/tikv/common/region/RegionErrorReceiver.java b/src/main/java/org/tikv/common/region/RegionErrorReceiver.java
index 0a5bcabf037..307538d4a82 100644
--- a/src/main/java/org/tikv/common/region/RegionErrorReceiver.java
+++ b/src/main/java/org/tikv/common/region/RegionErrorReceiver.java
@@ -17,13 +17,13 @@
package org.tikv.common.region;
+import org.tikv.common.log.SlowLog;
+
public interface RegionErrorReceiver {
boolean onNotLeader(TiRegion region);
/// return whether we need to retry this request.
- boolean onStoreUnreachable();
-
- void tryUpdateRegionStore();
+ boolean onStoreUnreachable(SlowLog slowLog);
TiRegion getRegion();
}
diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java
index 1556f5d2a95..8bb4f8186cc 100644
--- a/src/main/java/org/tikv/common/region/RegionManager.java
+++ b/src/main/java/org/tikv/common/region/RegionManager.java
@@ -31,6 +31,7 @@
import org.tikv.common.ReadOnlyPDClient;
import org.tikv.common.TiConfiguration;
import org.tikv.common.exception.GrpcException;
+import org.tikv.common.exception.InvalidStoreException;
import org.tikv.common.exception.TiClientInternalException;
import org.tikv.common.util.BackOffer;
import org.tikv.common.util.ChannelFactory;
@@ -158,9 +159,6 @@ public Pair getRegionStorePairByKey(
if (storeType == TiStoreType.TiKV) {
Peer peer = region.getCurrentReplica();
store = getStoreById(peer.getStoreId(), backOffer);
- if (store == null) {
- cache.clearAll();
- }
} else {
outerLoop:
for (Peer peer : region.getLearnerList()) {
@@ -174,19 +172,19 @@ public Pair getRegionStorePairByKey(
}
}
if (store == null) {
- // clear the region cache so we may get the learner peer next time
+ // clear the region cache, so we may get the learner peer next time
cache.invalidateRegion(region);
}
}
-
- if (store == null) {
- throw new TiClientInternalException(
- "Cannot find valid store on " + storeType + " for region " + region);
- }
-
return Pair.create(region, store);
}
+ public TiRegion createRegion(Metapb.Region region, BackOffer backOffer) {
+ List peers = region.getPeersList();
+ List stores = getRegionStore(peers, backOffer);
+ return new TiRegion(conf, region, null, peers, stores);
+ }
+
private TiRegion createRegion(Metapb.Region region, Metapb.Peer leader, BackOffer backOffer) {
List peers = region.getPeersList();
List stores = getRegionStore(peers, backOffer);
@@ -194,16 +192,26 @@ private TiRegion createRegion(Metapb.Region region, Metapb.Peer leader, BackOffe
}
private List getRegionStore(List peers, BackOffer backOffer) {
- return peers.stream().map(p -> getStoreById(p.getStoreId())).collect(Collectors.toList());
+ return peers
+ .stream()
+ .map(p -> getStoreById(p.getStoreId(), backOffer))
+ .collect(Collectors.toList());
}
- public TiStore getStoreById(long id, BackOffer backOffer) {
+ private TiStore getStoreByIdWithBackOff(long id, BackOffer backOffer) {
try {
TiStore store = cache.getStoreById(id);
if (store == null) {
store = new TiStore(pdClient.getStore(backOffer, id));
}
+ // if we did not get store info from pd, remove store from cache
+ if (store.getStore() == null) {
+ logger.warn(String.format("failed to get store %d from pd", id));
+ return null;
+ }
+ // if the store is already tombstone, remove store from cache
if (store.getStore().getState().equals(StoreState.Tombstone)) {
+ logger.warn(String.format("store %d is tombstone", id));
return null;
}
if (cache.putStore(id, store) && storeChecker != null) {
@@ -219,6 +227,16 @@ public TiStore getStoreById(long id) {
return getStoreById(id, defaultBackOff());
}
+ public TiStore getStoreById(long id, BackOffer backOffer) {
+ TiStore store = getStoreByIdWithBackOff(id, backOffer);
+ if (store == null) {
+ logger.warn(String.format("failed to fetch store %d, the store may be missing", id));
+ cache.clearAll();
+ throw new InvalidStoreException(id);
+ }
+ return store;
+ }
+
public void onRegionStale(TiRegion region) {
cache.invalidateRegion(region);
}
@@ -265,6 +283,10 @@ public void invalidateRegion(TiRegion region) {
cache.invalidateRegion(region);
}
+ public void insertRegionToCache(TiRegion region) {
+ cache.insertRegionToCache(region);
+ }
+
private BackOffer defaultBackOff() {
return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS());
}
diff --git a/src/main/java/org/tikv/common/region/RegionStoreClient.java b/src/main/java/org/tikv/common/region/RegionStoreClient.java
index db54e581327..4b6452f8ac8 100644
--- a/src/main/java/org/tikv/common/region/RegionStoreClient.java
+++ b/src/main/java/org/tikv/common/region/RegionStoreClient.java
@@ -48,7 +48,7 @@
import org.tikv.kvproto.Metapb;
import org.tikv.kvproto.TikvGrpc;
import org.tikv.kvproto.TikvGrpc.TikvBlockingStub;
-import org.tikv.kvproto.TikvGrpc.TikvStub;
+import org.tikv.kvproto.TikvGrpc.TikvFutureStub;
import org.tikv.txn.AbstractLockResolverClient;
import org.tikv.txn.Lock;
import org.tikv.txn.ResolveLockResult;
@@ -93,7 +93,7 @@ private RegionStoreClient(
TiStoreType storeType,
ChannelFactory channelFactory,
TikvBlockingStub blockingStub,
- TikvStub asyncStub,
+ TikvFutureStub asyncStub,
RegionManager regionManager,
PDClient pdClient,
RegionStoreClient.RegionStoreClientBuilder clientBuilder) {
@@ -124,7 +124,7 @@ private RegionStoreClient(
ManagedChannel channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
TikvBlockingStub tikvBlockingStub = TikvGrpc.newBlockingStub(channel);
- TikvStub tikvAsyncStub = TikvGrpc.newStub(channel);
+ TikvGrpc.TikvFutureStub tikvAsyncStub = TikvGrpc.newFutureStub(channel);
this.lockResolverClient =
AbstractLockResolverClient.getInstance(
@@ -1264,7 +1264,7 @@ public RegionStoreClient build(TiRegion region, TiStore store, TiStoreType store
ManagedChannel channel = null;
TikvBlockingStub blockingStub = null;
- TikvStub asyncStub = null;
+ TikvFutureStub asyncStub = null;
if (conf.getEnableGrpcForward() && store.getProxyStore() != null && !store.isReachable()) {
addressStr = store.getProxyStore().getAddress();
@@ -1273,11 +1273,11 @@ public RegionStoreClient build(TiRegion region, TiStore store, TiStoreType store
Metadata header = new Metadata();
header.put(TiConfiguration.FORWARD_META_DATA_KEY, store.getStore().getAddress());
blockingStub = MetadataUtils.attachHeaders(TikvGrpc.newBlockingStub(channel), header);
- asyncStub = MetadataUtils.attachHeaders(TikvGrpc.newStub(channel), header);
+ asyncStub = MetadataUtils.attachHeaders(TikvGrpc.newFutureStub(channel), header);
} else {
channel = channelFactory.getChannel(addressStr, pdClient.getHostMapping());
blockingStub = TikvGrpc.newBlockingStub(channel);
- asyncStub = TikvGrpc.newStub(channel);
+ asyncStub = TikvGrpc.newFutureStub(channel);
}
return new RegionStoreClient(
diff --git a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java
index c87cefd7b03..da8fdc3657d 100644
--- a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java
+++ b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java
@@ -110,14 +110,9 @@ public void run() {
}
} else {
if (!store.isReachable()) {
- logger.warn(
- String.format(
- "store [%s] recovers to be reachable and canforward", store.getAddress()));
+ logger.warn(String.format("store [%s] recovers to be reachable", store.getAddress()));
store.markReachable();
}
- if (!store.canForwardFirst()) {
- store.makrCanForward();
- }
}
} else if (store.isReachable()) {
unreachableStore.add(store);
diff --git a/src/main/java/org/tikv/common/region/TiRegion.java b/src/main/java/org/tikv/common/region/TiRegion.java
index 2f34f1064d2..0a1d262b573 100644
--- a/src/main/java/org/tikv/common/region/TiRegion.java
+++ b/src/main/java/org/tikv/common/region/TiRegion.java
@@ -82,6 +82,10 @@ public TiRegion(
replicaIdx = 0;
}
+ public TiConfiguration getConf() {
+ return conf;
+ }
+
public Peer getLeader() {
return leader;
}
@@ -155,6 +159,10 @@ public Kvrpcpb.Context getReplicaContext(Peer currentPeer, Set resolvedLoc
return getContext(currentPeer, resolvedLocks, false);
}
+ public Kvrpcpb.Context getReplicaContext(Peer currentPeer) {
+ return getContext(currentPeer, java.util.Collections.emptySet(), false);
+ }
+
private Kvrpcpb.Context getContext(
Peer currentPeer, Set resolvedLocks, boolean replicaRead) {
@@ -271,6 +279,18 @@ public class RegionVerID {
this.ver = ver;
}
+ public long getId() {
+ return id;
+ }
+
+ public long getConfVer() {
+ return confVer;
+ }
+
+ public long getVer() {
+ return ver;
+ }
+
@Override
public boolean equals(Object other) {
if (this == other) {
diff --git a/src/main/java/org/tikv/common/region/TiStore.java b/src/main/java/org/tikv/common/region/TiStore.java
index 8beacaa0210..c246490df91 100644
--- a/src/main/java/org/tikv/common/region/TiStore.java
+++ b/src/main/java/org/tikv/common/region/TiStore.java
@@ -2,25 +2,19 @@
import java.io.Serializable;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
import org.tikv.kvproto.Metapb;
public class TiStore implements Serializable {
- private static final long MAX_FAIL_FORWARD_TIMES = 4;
private final Metapb.Store store;
private final Metapb.Store proxyStore;
private final AtomicBoolean reachable;
private final AtomicBoolean valid;
- private final AtomicLong failForwardCount;
- private final AtomicBoolean canForward;
public TiStore(Metapb.Store store) {
this.store = store;
this.reachable = new AtomicBoolean(true);
this.valid = new AtomicBoolean(true);
- this.canForward = new AtomicBoolean(true);
this.proxyStore = null;
- this.failForwardCount = new AtomicLong(0);
}
private TiStore(Metapb.Store store, Metapb.Store proxyStore) {
@@ -31,9 +25,7 @@ private TiStore(Metapb.Store store, Metapb.Store proxyStore) {
this.reachable = new AtomicBoolean(true);
}
this.valid = new AtomicBoolean(true);
- this.canForward = new AtomicBoolean(true);
this.proxyStore = proxyStore;
- this.failForwardCount = new AtomicLong(0);
}
@java.lang.Override
@@ -82,23 +74,6 @@ public void markInvalid() {
this.valid.set(false);
}
- public void forwardFail() {
- if (this.canForward.get()) {
- if (this.failForwardCount.addAndGet(1) >= MAX_FAIL_FORWARD_TIMES) {
- this.canForward.set(false);
- }
- }
- }
-
- public void makrCanForward() {
- this.failForwardCount.set(0);
- this.canForward.set(true);
- }
-
- public boolean canForwardFirst() {
- return this.canForward.get();
- }
-
public Metapb.Store getStore() {
return this.store;
}
diff --git a/src/main/java/org/tikv/common/util/BackOffFunction.java b/src/main/java/org/tikv/common/util/BackOffFunction.java
index 702fdd47d63..b80e9e8ec5e 100644
--- a/src/main/java/org/tikv/common/util/BackOffFunction.java
+++ b/src/main/java/org/tikv/common/util/BackOffFunction.java
@@ -1,7 +1,6 @@
package org.tikv.common.util;
import java.util.concurrent.ThreadLocalRandom;
-import org.tikv.common.exception.GrpcException;
public class BackOffFunction {
private final int base;
@@ -25,7 +24,7 @@ public static BackOffFunction create(int base, int cap, BackOffer.BackOffStrateg
* Do back off in exponential with optional jitters according to different back off strategies.
* See http://www.awsarchitectureblog.com/2015/03/backoff.html
*/
- long doBackOff(long maxSleepMs) {
+ long getSleepMs(long maxSleepMs) {
long sleep = 0;
long v = expo(base, cap, attempts);
switch (strategy) {
@@ -47,11 +46,6 @@ long doBackOff(long maxSleepMs) {
sleep = maxSleepMs;
}
- try {
- Thread.sleep(sleep);
- } catch (InterruptedException e) {
- throw new GrpcException(e);
- }
attempts++;
lastSleep = sleep;
return lastSleep;
diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java
index 7d7513635f7..77b05183267 100644
--- a/src/main/java/org/tikv/common/util/BackOffer.java
+++ b/src/main/java/org/tikv/common/util/BackOffer.java
@@ -17,6 +17,8 @@
package org.tikv.common.util;
+import org.tikv.common.log.SlowLog;
+
public interface BackOffer {
// Back off types.
int seconds = 1000;
@@ -37,6 +39,12 @@ public interface BackOffer {
* max back off time exceeded and throw an exception to the caller.
*/
void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err);
+ /**
+ * canRetryAfterSleep sleeps a while base on the BackOffType and records the error message. Will
+ * stop until max back off time exceeded and throw an exception to the caller. It will return
+ * false if the total sleep time has exceed some limit condition.
+ */
+ boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType);
/**
* BackoffWithMaxSleep sleeps a while base on the backoffType and records the error message and
@@ -56,4 +64,6 @@ enum BackOffStrategy {
// DecorrJitter increases the maximum jitter based on the last random value.
DecorrJitter
}
+
+ SlowLog getSlowLog();
}
diff --git a/src/main/java/org/tikv/common/util/ChannelFactory.java b/src/main/java/org/tikv/common/util/ChannelFactory.java
index a517473089f..65729a49723 100644
--- a/src/main/java/org/tikv/common/util/ChannelFactory.java
+++ b/src/main/java/org/tikv/common/util/ChannelFactory.java
@@ -100,7 +100,7 @@ public ManagedChannel getChannel(String addressStr, HostMapping hostMapping) {
.idleTimeout(60, TimeUnit.SECONDS);
if (sslContextBuilder == null) {
- return builder.usePlaintext(true).build();
+ return builder.usePlaintext().build();
} else {
SslContext sslContext = null;
try {
diff --git a/src/main/java/org/tikv/common/util/ClientUtils.java b/src/main/java/org/tikv/common/util/ClientUtils.java
index 22783d7cf34..88ac4143397 100644
--- a/src/main/java/org/tikv/common/util/ClientUtils.java
+++ b/src/main/java/org/tikv/common/util/ClientUtils.java
@@ -19,6 +19,7 @@
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.tikv.common.exception.TiKVException;
@@ -170,10 +171,14 @@ public static void getTasks(
ExecutorCompletionService> completionService,
Queue> taskQueue,
List batches,
- int backOff) {
+ long backOff) {
try {
for (int i = 0; i < batches.size(); i++) {
- List task = completionService.take().get(backOff, TimeUnit.MILLISECONDS);
+ Future> future = completionService.poll(backOff, TimeUnit.MILLISECONDS);
+ if (future == null) {
+ throw new TiKVException("TimeOut Exceeded for current operation.");
+ }
+ List task = future.get();
if (!task.isEmpty()) {
taskQueue.offer(task);
}
@@ -181,8 +186,6 @@ public static void getTasks(
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new TiKVException("Current thread interrupted.", e);
- } catch (TimeoutException e) {
- throw new TiKVException("TimeOut Exceeded for current operation. ", e);
} catch (ExecutionException e) {
throw new TiKVException("Execution exception met.", e);
}
@@ -192,11 +195,16 @@ public static List getTasksWithOutput(
ExecutorCompletionService, List>> completionService,
Queue> taskQueue,
List batches,
- int backOff) {
+ long backOff) {
try {
List result = new ArrayList<>();
for (int i = 0; i < batches.size(); i++) {
- Pair, List> task = completionService.take().get(backOff, TimeUnit.MILLISECONDS);
+ Future, List>> future =
+ completionService.poll(backOff, TimeUnit.MILLISECONDS);
+ if (future == null) {
+ throw new TiKVException("TimeOut Exceeded for current operation.");
+ }
+ Pair, List> task = future.get();
if (!task.first.isEmpty()) {
taskQueue.offer(task.first);
} else {
@@ -207,8 +215,6 @@ public static List getTasksWithOutput(
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new TiKVException("Current thread interrupted.", e);
- } catch (TimeoutException e) {
- throw new TiKVException("TimeOut Exceeded for current operation. ", e);
} catch (ExecutionException e) {
throw new TiKVException("Execution exception met.", e);
}
diff --git a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java
index a7af12b85c1..89c79c30783 100644
--- a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java
+++ b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java
@@ -17,14 +17,21 @@
package org.tikv.common.util;
+import static org.tikv.common.ConfigUtils.TIKV_BO_REGION_MISS_BASE_IN_MS;
+
import com.google.common.base.Preconditions;
+import io.prometheus.client.Histogram;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.tikv.common.TiConfiguration;
import org.tikv.common.exception.GrpcException;
+import org.tikv.common.log.SlowLog;
+import org.tikv.common.log.SlowLogEmptyImpl;
+import org.tikv.common.log.SlowLogSpan;
public class ConcreteBackOffer implements BackOffer {
private static final Logger logger = LoggerFactory.getLogger(ConcreteBackOffer.class);
@@ -32,12 +39,26 @@ public class ConcreteBackOffer implements BackOffer {
private final Map backOffFunctionMap;
private final List errors;
private int totalSleep;
-
- private ConcreteBackOffer(int maxSleep) {
+ private final long deadline;
+ private final SlowLog slowLog;
+
+ public static final Histogram BACKOFF_DURATION =
+ Histogram.build()
+ .name("client_java_backoff_duration")
+ .help("backoff duration.")
+ .labelNames("type")
+ .register();
+
+ private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog) {
+ Preconditions.checkArgument(
+ maxSleep == 0 || deadline == 0, "Max sleep time should be 0 or Deadline should be 0.");
Preconditions.checkArgument(maxSleep >= 0, "Max sleep time cannot be less than 0.");
+ Preconditions.checkArgument(deadline >= 0, "Deadline cannot be less than 0.");
this.maxSleep = maxSleep;
this.errors = new ArrayList<>();
this.backOffFunctionMap = new HashMap<>();
+ this.deadline = deadline;
+ this.slowLog = slowLog;
}
private ConcreteBackOffer(ConcreteBackOffer source) {
@@ -45,34 +66,41 @@ private ConcreteBackOffer(ConcreteBackOffer source) {
this.totalSleep = source.totalSleep;
this.errors = source.errors;
this.backOffFunctionMap = source.backOffFunctionMap;
+ this.deadline = source.deadline;
+ this.slowLog = source.slowLog;
+ }
+
+ public static ConcreteBackOffer newDeadlineBackOff(int timeoutInMs, SlowLog slowLog) {
+ long deadline = System.currentTimeMillis() + timeoutInMs;
+ return new ConcreteBackOffer(0, deadline, slowLog);
}
public static ConcreteBackOffer newCustomBackOff(int maxSleep) {
- return new ConcreteBackOffer(maxSleep);
+ return new ConcreteBackOffer(maxSleep, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newScannerNextMaxBackOff() {
- return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF);
+ return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newBatchGetMaxBackOff() {
- return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF);
+ return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newCopNextMaxBackOff() {
- return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF);
+ return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newGetBackOff() {
- return new ConcreteBackOffer(GET_MAX_BACKOFF);
+ return new ConcreteBackOffer(GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newRawKVBackOff() {
- return new ConcreteBackOffer(RAWKV_MAX_BACKOFF);
+ return new ConcreteBackOffer(RAWKV_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer newTsoBackOff() {
- return new ConcreteBackOffer(TSO_MAX_BACKOFF);
+ return new ConcreteBackOffer(TSO_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
}
public static ConcreteBackOffer create(BackOffer source) {
@@ -96,7 +124,11 @@ private BackOffFunction createBackOffFunc(BackOffFunction.BackOffFuncType funcTy
backOffFunction = BackOffFunction.create(2000, 10000, BackOffStrategy.EqualJitter);
break;
case BoRegionMiss:
- backOffFunction = BackOffFunction.create(100, 500, BackOffStrategy.NoJitter);
+ backOffFunction =
+ BackOffFunction.create(
+ TiConfiguration.getInt(TIKV_BO_REGION_MISS_BASE_IN_MS),
+ 500,
+ BackOffStrategy.NoJitter);
break;
case BoTxnLock:
backOffFunction = BackOffFunction.create(200, 3000, BackOffStrategy.EqualJitter);
@@ -105,7 +137,7 @@ private BackOffFunction createBackOffFunc(BackOffFunction.BackOffFuncType funcTy
backOffFunction = BackOffFunction.create(100, 600, BackOffStrategy.EqualJitter);
break;
case BoTiKVRPC:
- backOffFunction = BackOffFunction.create(100, 400, BackOffStrategy.EqualJitter);
+ backOffFunction = BackOffFunction.create(10, 400, BackOffStrategy.EqualJitter);
break;
case BoTxnNotFound:
backOffFunction = BackOffFunction.create(2, 500, BackOffStrategy.NoJitter);
@@ -120,32 +152,72 @@ public void doBackOff(BackOffFunction.BackOffFuncType funcType, Exception err) {
}
@Override
- public void doBackOffWithMaxSleep(
- BackOffFunction.BackOffFuncType funcType, long maxSleepMs, Exception err) {
+ public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType) {
+ return canRetryAfterSleep(funcType, -1);
+ }
+
+ public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long maxSleepMs) {
+ SlowLogSpan slowLogSpan = getSlowLog().start("backoff " + funcType.name());
+ Histogram.Timer backOffTimer = BACKOFF_DURATION.labels(funcType.name()).startTimer();
BackOffFunction backOffFunction =
backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc);
- // Back off will be done here
- totalSleep += backOffFunction.doBackOff(maxSleepMs);
+ // Back off will not be done here
+ long sleep = backOffFunction.getSleepMs(maxSleepMs);
+ totalSleep += sleep;
+ // Check deadline
+ if (deadline > 0) {
+ long currentMs = System.currentTimeMillis();
+ if (currentMs + sleep >= deadline) {
+ logger.warn(String.format("Deadline %d is exceeded, errors:", deadline));
+ return false;
+ }
+ }
+
+ try {
+ Thread.sleep(sleep);
+ } catch (InterruptedException e) {
+ throw new GrpcException(e);
+ } finally {
+ slowLogSpan.end();
+ backOffTimer.observeDuration();
+ }
+ if (maxSleep > 0 && totalSleep >= maxSleep) {
+ logger.warn(String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep));
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void doBackOffWithMaxSleep(
+ BackOffFunction.BackOffFuncType funcType, long maxSleepMs, Exception err) {
logger.debug(
String.format(
"%s, retry later(totalSleep %dms, maxSleep %dms)",
err.getMessage(), totalSleep, maxSleep));
errors.add(err);
- if (maxSleep > 0 && totalSleep >= maxSleep) {
- StringBuilder errMsg =
- new StringBuilder(
- String.format("BackOffer.maxSleep %dms is exceeded, errors:", maxSleep));
- for (int i = 0; i < errors.size(); i++) {
- Exception curErr = errors.get(i);
- // Print only last 3 errors for non-DEBUG log levels.
- if (logger.isDebugEnabled() || i >= errors.size() - 3) {
- errMsg.append("\n").append(i).append(".").append(curErr.toString());
- }
+ if (!canRetryAfterSleep(funcType, maxSleepMs)) {
+ logThrowError(err);
+ }
+ }
+
+ private void logThrowError(Exception err) {
+ StringBuilder errMsg = new StringBuilder();
+ for (int i = 0; i < errors.size(); i++) {
+ Exception curErr = errors.get(i);
+ // Print only last 3 errors for non-DEBUG log levels.
+ if (logger.isDebugEnabled() || i >= errors.size() - 3) {
+ errMsg.append("\n").append(i).append(".").append(curErr.toString());
}
- logger.warn(errMsg.toString());
- // Use the last backoff type to generate an exception
- throw new GrpcException("retry is exhausted.", err);
}
+ logger.warn(errMsg.toString());
+ // Use the last backoff type to generate an exception
+ throw new GrpcException("retry is exhausted.", err);
+ }
+
+ @Override
+ public SlowLog getSlowLog() {
+ return slowLog;
}
}
diff --git a/src/main/java/org/tikv/raw/RawKVClient.java b/src/main/java/org/tikv/raw/RawKVClient.java
index 2b30dbef9c7..a5f3dd6f3bb 100644
--- a/src/main/java/org/tikv/raw/RawKVClient.java
+++ b/src/main/java/org/tikv/raw/RawKVClient.java
@@ -27,12 +27,16 @@
import org.slf4j.LoggerFactory;
import org.tikv.common.TiConfiguration;
import org.tikv.common.TiSession;
+import org.tikv.common.codec.KeyUtils;
import org.tikv.common.exception.GrpcException;
import org.tikv.common.exception.RawCASConflictException;
import org.tikv.common.exception.TiKVException;
import org.tikv.common.importer.ImporterClient;
import org.tikv.common.importer.SwitchTiKVModeClient;
import org.tikv.common.key.Key;
+import org.tikv.common.log.SlowLog;
+import org.tikv.common.log.SlowLogEmptyImpl;
+import org.tikv.common.log.SlowLogImpl;
import org.tikv.common.operation.iterator.RawScanIterator;
import org.tikv.common.region.RegionStoreClient;
import org.tikv.common.region.RegionStoreClient.RegionStoreClientBuilder;
@@ -122,16 +126,27 @@ public void put(ByteString key, ByteString value) {
public void put(ByteString key, ByteString value, long ttl) {
String label = "client_raw_put";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVWriteSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "put");
+ put("key", KeyUtils.formatBytesUTF8(key));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
while (true) {
- RegionStoreClient client = clientBuilder.build(key, backOffer);
- try {
+ try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
+ slowLog.addProperty("region", client.getRegion().toString());
client.rawPut(backOffer, key, value, ttl, atomicForCAS);
RAW_REQUEST_SUCCESS.labels(label).inc();
return;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
+ logger.warn("Retry for put error", e);
}
}
} catch (Exception e) {
@@ -139,6 +154,7 @@ public void put(ByteString key, ByteString value, long ttl) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -208,16 +224,27 @@ public void compareAndSet(
String label = "client_raw_compare_and_set";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVWriteSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "putIfAbsent");
+ put("key", KeyUtils.formatBytesUTF8(key));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
while (true) {
- RegionStoreClient client = clientBuilder.build(key, backOffer);
- try {
+ try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
+ slowLog.addProperty("region", client.getRegion().toString());
client.rawCompareAndSet(backOffer, key, prevValue, value, ttl);
RAW_REQUEST_SUCCESS.labels(label).inc();
return;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
+ logger.warn("Retry for putIfAbsent error", e);
}
}
} catch (Exception e) {
@@ -225,6 +252,7 @@ public void compareAndSet(
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -246,14 +274,27 @@ public void batchPut(Map kvPairs) {
public void batchPut(Map kvPairs, long ttl) {
String label = "client_raw_batch_put";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVBatchWriteSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "batchPut");
+ put("keySize", String.valueOf(kvPairs.size()));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog);
try {
- doSendBatchPut(defaultBackOff(), kvPairs, ttl);
+ long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS();
+ doSendBatchPut(backOffer, kvPairs, ttl, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -266,16 +307,28 @@ public void batchPut(Map kvPairs, long ttl) {
public Optional get(ByteString key) {
String label = "client_raw_get";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVReadSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "get");
+ put("key", KeyUtils.formatBytesUTF8(key));
+ }
+ });
+
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
while (true) {
- RegionStoreClient client = clientBuilder.build(key, backOffer);
- try {
- Optional result = client.rawGet(defaultBackOff(), key);
+ try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
+ slowLog.addProperty("region", client.getRegion().toString());
+ Optional result = client.rawGet(backOffer, key);
RAW_REQUEST_SUCCESS.labels(label).inc();
return result;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
+ logger.warn("Retry for get error", e);
}
}
} catch (Exception e) {
@@ -283,6 +336,7 @@ public Optional get(ByteString key) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -295,9 +349,20 @@ public Optional get(ByteString key) {
public List batchGet(List keys) {
String label = "client_raw_batch_get";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVBatchReadSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "batchGet");
+ put("keySize", String.valueOf(keys.size()));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchReadTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
- List result = doSendBatchGet(backOffer, keys);
+ long deadline = System.currentTimeMillis() + conf.getRawKVBatchReadTimeoutInMS();
+ List result = doSendBatchGet(backOffer, keys, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
return result;
} catch (Exception e) {
@@ -305,6 +370,7 @@ public List batchGet(List keys) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -316,9 +382,20 @@ public List batchGet(List keys) {
public void batchDelete(List keys) {
String label = "client_raw_batch_delete";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVBatchWriteSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "batchDelete");
+ put("keySize", String.valueOf(keys.size()));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
- doSendBatchDelete(backOffer, keys);
+ long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS();
+ doSendBatchDelete(backOffer, keys, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
return;
} catch (Exception e) {
@@ -326,6 +403,7 @@ public void batchDelete(List keys) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -339,16 +417,27 @@ public void batchDelete(List keys) {
public Optional getKeyTTL(ByteString key) {
String label = "client_raw_get_key_ttl";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVReadSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "getKeyTTL");
+ put("key", KeyUtils.formatBytesUTF8(key));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
while (true) {
- RegionStoreClient client = clientBuilder.build(key, backOffer);
- try {
- Optional result = client.rawGetKeyTTL(defaultBackOff(), key);
+ try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
+ slowLog.addProperty("region", client.getRegion().toString());
+ Optional result = client.rawGetKeyTTL(backOffer, key);
RAW_REQUEST_SUCCESS.labels(label).inc();
return result;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
+ logger.warn("Retry for getKeyTTL error", e);
}
}
} catch (Exception e) {
@@ -356,6 +445,7 @@ public Optional getKeyTTL(ByteString key) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -405,6 +495,8 @@ public List> batchScanKeys(
public List> batchScan(List ranges) {
String label = "client_raw_batch_scan";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ long deadline = System.currentTimeMillis() + conf.getRawKVScanTimeoutInMS();
+ List>>> futureList = new ArrayList<>();
try {
if (ranges.isEmpty()) {
return new ArrayList<>();
@@ -414,7 +506,7 @@ public List> batchScan(List ranges) {
int num = 0;
for (ScanOption scanOption : ranges) {
int i = num;
- completionService.submit(() -> Pair.create(i, scan(scanOption)));
+ futureList.add(completionService.submit(() -> Pair.create(i, scan(scanOption))));
++num;
}
List> scanResults = new ArrayList<>();
@@ -423,14 +515,16 @@ public List> batchScan(List ranges) {
}
for (int i = 0; i < num; i++) {
try {
- Pair> scanResult =
- completionService.take().get(BackOffer.RAWKV_MAX_BACKOFF, TimeUnit.SECONDS);
+ Future>> future =
+ completionService.poll(deadline - System.currentTimeMillis(), TimeUnit.MILLISECONDS);
+ if (future == null) {
+ throw new TiKVException("TimeOut Exceeded for current operation.");
+ }
+ Pair> scanResult = future.get();
scanResults.set(scanResult.first, scanResult.second);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new TiKVException("Current thread interrupted.", e);
- } catch (TimeoutException e) {
- throw new TiKVException("TimeOut Exceeded for current operation. ", e);
} catch (ExecutionException e) {
throw new TiKVException("Execution exception met.", e);
}
@@ -439,6 +533,9 @@ public List> batchScan(List ranges) {
return scanResults;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
+ for (Future>> future : futureList) {
+ future.cancel(true);
+ }
throw e;
} finally {
requestTimer.observeDuration();
@@ -469,9 +566,23 @@ public List scan(ByteString startKey, ByteString endKey, int limit) {
public List scan(ByteString startKey, ByteString endKey, int limit, boolean keyOnly) {
String label = "client_raw_scan";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVScanSlowLogInMS(),
+ new HashMap(5) {
+ {
+ put("func", "scan");
+ put("startKey", KeyUtils.formatBytesUTF8(startKey));
+ put("endKey", KeyUtils.formatBytesUTF8(endKey));
+ put("limit", String.valueOf(limit));
+ put("keyOnly", String.valueOf(keyOnly));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog);
try {
Iterator iterator =
- rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly);
+ rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly, backOffer);
List result = new ArrayList<>();
iterator.forEachRemaining(result::add);
RAW_REQUEST_SUCCESS.labels(label).inc();
@@ -481,6 +592,7 @@ public List scan(ByteString startKey, ByteString endKey, int limit, bool
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -529,17 +641,37 @@ public List scan(ByteString startKey, ByteString endKey) {
public List scan(ByteString startKey, ByteString endKey, boolean keyOnly) {
String label = "client_raw_scan_without_limit";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVScanSlowLogInMS(),
+ new HashMap(4) {
+ {
+ put("func", "scan");
+ put("startKey", KeyUtils.formatBytesUTF8(startKey));
+ put("endKey", KeyUtils.formatBytesUTF8(endKey));
+ put("keyOnly", String.valueOf(keyOnly));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog);
try {
+ ByteString newStartKey = startKey;
List result = new ArrayList<>();
while (true) {
Iterator iterator =
rawScanIterator(
- conf, clientBuilder, startKey, endKey, conf.getScanBatchSize(), keyOnly);
+ conf,
+ clientBuilder,
+ newStartKey,
+ endKey,
+ conf.getScanBatchSize(),
+ keyOnly,
+ backOffer);
if (!iterator.hasNext()) {
break;
}
iterator.forEachRemaining(result::add);
- startKey = Key.toRawKey(result.get(result.size() - 1).getKey()).next().toByteString();
+ newStartKey = Key.toRawKey(result.get(result.size() - 1).getKey()).next().toByteString();
}
RAW_REQUEST_SUCCESS.labels(label).inc();
return result;
@@ -548,6 +680,7 @@ public List scan(ByteString startKey, ByteString endKey, boolean keyOnly
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -587,16 +720,27 @@ public List scanPrefix(ByteString prefixKey, boolean keyOnly) {
public void delete(ByteString key) {
String label = "client_raw_delete";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ SlowLog slowLog =
+ new SlowLogImpl(
+ conf.getRawKVWriteSlowLogInMS(),
+ new HashMap(2) {
+ {
+ put("func", "delete");
+ put("key", KeyUtils.formatBytesUTF8(key));
+ }
+ });
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
try {
- BackOffer backOffer = defaultBackOff();
while (true) {
- RegionStoreClient client = clientBuilder.build(key, backOffer);
- try {
- client.rawDelete(defaultBackOff(), key, atomicForCAS);
+ try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
+ slowLog.addProperty("region", client.getRegion().toString());
+ client.rawDelete(backOffer, key, atomicForCAS);
RAW_REQUEST_SUCCESS.labels(label).inc();
return;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
+ logger.warn("Retry for delete error", e);
}
}
} catch (Exception e) {
@@ -604,6 +748,7 @@ public void delete(ByteString key) {
throw e;
} finally {
requestTimer.observeDuration();
+ slowLog.log();
}
}
@@ -619,9 +764,12 @@ public void delete(ByteString key) {
public synchronized void deleteRange(ByteString startKey, ByteString endKey) {
String label = "client_raw_delete_range";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
+ ConcreteBackOffer backOffer =
+ ConcreteBackOffer.newDeadlineBackOff(
+ conf.getRawKVCleanTimeoutInMS(), SlowLogEmptyImpl.INSTANCE);
try {
- BackOffer backOffer = defaultBackOff();
- doSendDeleteRange(backOffer, startKey, endKey);
+ long deadline = System.currentTimeMillis() + conf.getRawKVCleanTimeoutInMS();
+ doSendDeleteRange(backOffer, startKey, endKey, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
@@ -730,10 +878,13 @@ private void doIngest(TiRegion region, List> sorted
importerClient.write(sortedList.iterator());
}
- private void doSendBatchPut(BackOffer backOffer, Map kvPairs, long ttl) {
+ private void doSendBatchPut(
+ BackOffer backOffer, Map kvPairs, long ttl, long deadline) {
ExecutorCompletionService> completionService =
new ExecutorCompletionService<>(batchPutThreadPool);
+ List>> futureList = new ArrayList<>();
+
Map> groupKeys =
groupKeysByRegion(clientBuilder.getRegionManager(), kvPairs.keySet(), backOffer);
List batches = new ArrayList<>();
@@ -756,20 +907,28 @@ private void doSendBatchPut(BackOffer backOffer, Map kvP
for (Batch batch : task) {
completionService.submit(
() -> doSendBatchPutInBatchesWithRetry(batch.getBackOffer(), batch, ttl));
+
+ try {
+ getTasks(completionService, taskQueue, task, deadline - System.currentTimeMillis());
+ } catch (Exception e) {
+ for (Future> future : futureList) {
+ future.cancel(true);
+ }
+ throw e;
+ }
}
- getTasks(completionService, taskQueue, task, BackOffer.RAWKV_MAX_BACKOFF);
}
}
private List doSendBatchPutInBatchesWithRetry(BackOffer backOffer, Batch batch, long ttl) {
- try (RegionStoreClient client = clientBuilder.build(batch.getRegion())) {
- client.setTimeout(conf.getScanTimeout());
+ try (RegionStoreClient client = clientBuilder.build(batch.getRegion(), backOffer)) {
+ client.setTimeout(conf.getRawKVBatchWriteTimeoutInMS());
client.rawBatchPut(backOffer, batch, ttl, atomicForCAS);
return new ArrayList<>();
} catch (final TiKVException e) {
// TODO: any elegant way to re-split the ranges if fails?
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
- logger.debug("ReSplitting ranges for BatchPutRequest");
+ logger.warn("ReSplitting ranges for BatchPutRequest", e);
// retry
return doSendBatchPutWithRefetchRegion(backOffer, batch);
}
@@ -794,10 +953,12 @@ private List doSendBatchPutWithRefetchRegion(BackOffer backOffer, Batch b
return retryBatches;
}
- private List doSendBatchGet(BackOffer backOffer, List keys) {
+ private List doSendBatchGet(BackOffer backOffer, List keys, long deadline) {
ExecutorCompletionService, List>> completionService =
new ExecutorCompletionService<>(batchGetThreadPool);
+ List, List>>> futureList = new ArrayList<>();
+
List batches =
getBatches(backOffer, keys, RAW_BATCH_GET_SIZE, MAX_RAW_BATCH_LIMIT, this.clientBuilder);
@@ -808,11 +969,20 @@ private List doSendBatchGet(BackOffer backOffer, List keys)
while (!taskQueue.isEmpty()) {
List task = taskQueue.poll();
for (Batch batch : task) {
- completionService.submit(
- () -> doSendBatchGetInBatchesWithRetry(batch.getBackOffer(), batch));
+ futureList.add(
+ completionService.submit(
+ () -> doSendBatchGetInBatchesWithRetry(batch.getBackOffer(), batch)));
+ }
+ try {
+ result.addAll(
+ getTasksWithOutput(
+ completionService, taskQueue, task, deadline - System.currentTimeMillis()));
+ } catch (Exception e) {
+ for (Future, List>> future : futureList) {
+ future.cancel(true);
+ }
+ throw e;
}
- result.addAll(
- getTasksWithOutput(completionService, taskQueue, task, BackOffer.RAWKV_MAX_BACKOFF));
}
return result;
@@ -820,14 +990,14 @@ private List doSendBatchGet(BackOffer backOffer, List keys)
private Pair, List> doSendBatchGetInBatchesWithRetry(
BackOffer backOffer, Batch batch) {
- RegionStoreClient client = clientBuilder.build(batch.getRegion(), backOffer);
- try {
+
+ try (RegionStoreClient client = clientBuilder.build(batch.getRegion(), backOffer)) {
List partialResult = client.rawBatchGet(backOffer, batch.getKeys());
return Pair.create(new ArrayList<>(), partialResult);
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
clientBuilder.getRegionManager().invalidateRegion(batch.getRegion());
- logger.debug("ReSplitting ranges for BatchGetRequest", e);
+ logger.warn("ReSplitting ranges for BatchGetRequest", e);
// retry
return Pair.create(doSendBatchGetWithRefetchRegion(backOffer, batch), new ArrayList<>());
@@ -839,10 +1009,12 @@ private List doSendBatchGetWithRefetchRegion(BackOffer backOffer, Batch b
backOffer, batch.getKeys(), RAW_BATCH_GET_SIZE, MAX_RAW_BATCH_LIMIT, clientBuilder);
}
- private void doSendBatchDelete(BackOffer backOffer, List keys) {
+ private void doSendBatchDelete(BackOffer backOffer, List keys, long deadline) {
ExecutorCompletionService> completionService =
new ExecutorCompletionService<>(batchDeleteThreadPool);
+ List>> futureList = new ArrayList<>();
+
List batches =
getBatches(backOffer, keys, RAW_BATCH_DELETE_SIZE, MAX_RAW_BATCH_LIMIT, this.clientBuilder);
@@ -852,22 +1024,29 @@ private void doSendBatchDelete(BackOffer backOffer, List keys) {
while (!taskQueue.isEmpty()) {
List task = taskQueue.poll();
for (Batch batch : task) {
- completionService.submit(
- () -> doSendBatchDeleteInBatchesWithRetry(batch.getBackOffer(), batch));
+ futureList.add(
+ completionService.submit(
+ () -> doSendBatchDeleteInBatchesWithRetry(batch.getBackOffer(), batch)));
+ }
+ try {
+ getTasks(completionService, taskQueue, task, deadline - System.currentTimeMillis());
+ } catch (Exception e) {
+ for (Future> future : futureList) {
+ future.cancel(true);
+ }
+ throw e;
}
- getTasks(completionService, taskQueue, task, BackOffer.RAWKV_MAX_BACKOFF);
}
}
private List doSendBatchDeleteInBatchesWithRetry(BackOffer backOffer, Batch batch) {
- RegionStoreClient client = clientBuilder.build(batch.getRegion(), backOffer);
- try {
+ try (RegionStoreClient client = clientBuilder.build(batch.getRegion(), backOffer)) {
client.rawBatchDelete(backOffer, batch.getKeys(), atomicForCAS);
return new ArrayList<>();
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
clientBuilder.getRegionManager().invalidateRegion(batch.getRegion());
- logger.debug("ReSplitting ranges for BatchGetRequest", e);
+ logger.warn("ReSplitting ranges for BatchGetRequest", e);
// retry
return doSendBatchDeleteWithRefetchRegion(backOffer, batch);
@@ -886,10 +1065,13 @@ private ByteString calcKeyByCondition(boolean condition, ByteString key1, ByteSt
return key2;
}
- private void doSendDeleteRange(BackOffer backOffer, ByteString startKey, ByteString endKey) {
+ private void doSendDeleteRange(
+ BackOffer backOffer, ByteString startKey, ByteString endKey, long deadline) {
ExecutorCompletionService> completionService =
new ExecutorCompletionService<>(deleteRangeThreadPool);
+ List>> futureList = new ArrayList<>();
+
List regions = fetchRegionsFromRange(backOffer, startKey, endKey);
List ranges = new ArrayList<>();
for (int i = 0; i < regions.size(); i++) {
@@ -903,9 +1085,18 @@ private void doSendDeleteRange(BackOffer backOffer, ByteString startKey, ByteStr
while (!taskQueue.isEmpty()) {
List task = taskQueue.poll();
for (DeleteRange range : task) {
- completionService.submit(() -> doSendDeleteRangeWithRetry(range.getBackOffer(), range));
+ futureList.add(
+ completionService.submit(
+ () -> doSendDeleteRangeWithRetry(range.getBackOffer(), range)));
+ }
+ try {
+ getTasks(completionService, taskQueue, task, deadline - System.currentTimeMillis());
+ } catch (Exception e) {
+ for (Future> future : futureList) {
+ future.cancel(true);
+ }
+ throw e;
}
- getTasks(completionService, taskQueue, task, BackOffer.RAWKV_MAX_BACKOFF);
}
}
@@ -917,7 +1108,7 @@ private List doSendDeleteRangeWithRetry(BackOffer backOffer, Delete
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
clientBuilder.getRegionManager().invalidateRegion(range.getRegion());
- logger.debug("ReSplitting ranges for BatchDeleteRangeRequest", e);
+ logger.warn("ReSplitting ranges for BatchDeleteRangeRequest", e);
// retry
return doSendDeleteRangeWithRefetchRegion(backOffer, range);
@@ -970,15 +1161,12 @@ private Iterator rawScanIterator(
ByteString startKey,
ByteString endKey,
int limit,
- boolean keyOnly) {
+ boolean keyOnly,
+ BackOffer backOffer) {
if (limit > MAX_RAW_SCAN_LIMIT) {
throw ERR_MAX_SCAN_LIMIT_EXCEEDED;
}
- return new RawScanIterator(conf, builder, startKey, endKey, limit, keyOnly);
- }
-
- private BackOffer defaultBackOff() {
- return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS());
+ return new RawScanIterator(conf, builder, startKey, endKey, limit, keyOnly, backOffer);
}
/**
@@ -1031,7 +1219,7 @@ public Iterator scan0(
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
try {
Iterator iterator =
- rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly);
+ rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly, defaultBackOff());
RAW_REQUEST_SUCCESS.labels(label).inc();
return iterator;
} catch (Exception e) {
@@ -1110,7 +1298,13 @@ public TikvIterator(ByteString startKey, ByteString endKey, boolean keyOnly) {
this.iterator =
rawScanIterator(
- conf, clientBuilder, this.startKey, this.endKey, conf.getScanBatchSize(), keyOnly);
+ conf,
+ clientBuilder,
+ this.startKey,
+ this.endKey,
+ conf.getScanBatchSize(),
+ keyOnly,
+ defaultBackOff());
}
@Override
@@ -1123,7 +1317,14 @@ public boolean hasNext() {
}
ByteString startKey = Key.toRawKey(this.last.getKey()).next().toByteString();
this.iterator =
- rawScanIterator(conf, clientBuilder, startKey, endKey, conf.getScanBatchSize(), keyOnly);
+ rawScanIterator(
+ conf,
+ clientBuilder,
+ startKey,
+ endKey,
+ conf.getScanBatchSize(),
+ keyOnly,
+ defaultBackOff());
this.last = null;
return this.iterator.hasNext();
}
@@ -1135,4 +1336,8 @@ public KvPair next() {
return next;
}
}
+
+ private BackOffer defaultBackOff() {
+ return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS());
+ }
}
diff --git a/src/main/java/org/tikv/txn/AbstractLockResolverClient.java b/src/main/java/org/tikv/txn/AbstractLockResolverClient.java
index 020068e272a..7b4ed1c2125 100644
--- a/src/main/java/org/tikv/txn/AbstractLockResolverClient.java
+++ b/src/main/java/org/tikv/txn/AbstractLockResolverClient.java
@@ -71,7 +71,7 @@ static AbstractLockResolverClient getInstance(
TiRegion region,
TiStore store,
TikvGrpc.TikvBlockingStub blockingStub,
- TikvGrpc.TikvStub asyncStub,
+ TikvGrpc.TikvFutureStub asyncStub,
ChannelFactory channelFactory,
RegionManager regionManager,
PDClient pdClient,
diff --git a/src/main/java/org/tikv/txn/LockResolverClientV2.java b/src/main/java/org/tikv/txn/LockResolverClientV2.java
index 3df5966abda..20033090406 100644
--- a/src/main/java/org/tikv/txn/LockResolverClientV2.java
+++ b/src/main/java/org/tikv/txn/LockResolverClientV2.java
@@ -52,7 +52,7 @@
import org.tikv.kvproto.Kvrpcpb.ResolveLockResponse;
import org.tikv.kvproto.TikvGrpc;
import org.tikv.kvproto.TikvGrpc.TikvBlockingStub;
-import org.tikv.kvproto.TikvGrpc.TikvStub;
+import org.tikv.kvproto.TikvGrpc.TikvFutureStub;
/** Before v3.0.5 TiDB uses the ttl on secondary lock. */
public class LockResolverClientV2 extends AbstractRegionStoreClient
@@ -77,7 +77,7 @@ public LockResolverClientV2(
TiRegion region,
TiStore store,
TikvBlockingStub blockingStub,
- TikvStub asyncStub,
+ TikvFutureStub asyncStub,
ChannelFactory channelFactory,
RegionManager regionManager) {
super(conf, region, store, channelFactory, blockingStub, asyncStub, regionManager);
diff --git a/src/main/java/org/tikv/txn/LockResolverClientV3.java b/src/main/java/org/tikv/txn/LockResolverClientV3.java
index 0b8d3c89a8c..b15fa784376 100644
--- a/src/main/java/org/tikv/txn/LockResolverClientV3.java
+++ b/src/main/java/org/tikv/txn/LockResolverClientV3.java
@@ -49,7 +49,7 @@
import org.tikv.kvproto.Kvrpcpb.CleanupResponse;
import org.tikv.kvproto.TikvGrpc;
import org.tikv.kvproto.TikvGrpc.TikvBlockingStub;
-import org.tikv.kvproto.TikvGrpc.TikvStub;
+import org.tikv.kvproto.TikvGrpc.TikvFutureStub;
/** Since v3.0.5 TiDB ignores the ttl on secondary lock and will use the ttl on primary key. */
public class LockResolverClientV3 extends AbstractRegionStoreClient
@@ -78,7 +78,7 @@ public LockResolverClientV3(
TiRegion region,
TiStore store,
TikvBlockingStub blockingStub,
- TikvStub asyncStub,
+ TikvFutureStub asyncStub,
ChannelFactory channelFactory,
RegionManager regionManager,
PDClient pdClient,
diff --git a/src/main/java/org/tikv/txn/LockResolverClientV4.java b/src/main/java/org/tikv/txn/LockResolverClientV4.java
index 09b28d67b33..cf39ce7009d 100644
--- a/src/main/java/org/tikv/txn/LockResolverClientV4.java
+++ b/src/main/java/org/tikv/txn/LockResolverClientV4.java
@@ -47,7 +47,7 @@
import org.tikv.kvproto.Kvrpcpb;
import org.tikv.kvproto.TikvGrpc;
import org.tikv.kvproto.TikvGrpc.TikvBlockingStub;
-import org.tikv.kvproto.TikvGrpc.TikvStub;
+import org.tikv.kvproto.TikvGrpc.TikvFutureStub;
import org.tikv.txn.exception.TxnNotFoundException;
import org.tikv.txn.exception.WriteConflictException;
@@ -78,7 +78,7 @@ public LockResolverClientV4(
TiRegion region,
TiStore store,
TikvBlockingStub blockingStub,
- TikvStub asyncStub,
+ TikvFutureStub asyncStub,
ChannelFactory channelFactory,
RegionManager regionManager,
PDClient pdClient,
diff --git a/src/test/java/org/tikv/common/RegionManagerTest.java b/src/test/java/org/tikv/common/RegionManagerTest.java
index 7bbcc79b84c..1d472a748f8 100644
--- a/src/test/java/org/tikv/common/RegionManagerTest.java
+++ b/src/test/java/org/tikv/common/RegionManagerTest.java
@@ -16,7 +16,7 @@
package org.tikv.common;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;
@@ -153,6 +153,18 @@ public void getStoreById() {
StoreState.Tombstone,
GrpcUtils.makeStoreLabel("k1", "v1"),
GrpcUtils.makeStoreLabel("k2", "v2"))));
- assertNull(mgr.getStoreById(storeId + 1));
+
+ try {
+ mgr.getStoreById(storeId + 1);
+ fail();
+ } catch (Exception ignored) {
+ }
+
+ mgr.invalidateStore(storeId);
+ try {
+ mgr.getStoreById(storeId);
+ fail();
+ } catch (Exception ignored) {
+ }
}
}
diff --git a/src/test/java/org/tikv/raw/RawKVClientTest.java b/src/test/java/org/tikv/raw/RawKVClientTest.java
index b41109678fe..2caa837a73a 100644
--- a/src/test/java/org/tikv/raw/RawKVClientTest.java
+++ b/src/test/java/org/tikv/raw/RawKVClientTest.java
@@ -18,6 +18,10 @@
import org.tikv.common.codec.KeyUtils;
import org.tikv.common.exception.TiKVException;
import org.tikv.common.key.Key;
+import org.tikv.common.log.SlowLogEmptyImpl;
+import org.tikv.common.util.BackOffFunction;
+import org.tikv.common.util.BackOffer;
+import org.tikv.common.util.ConcreteBackOffer;
import org.tikv.common.util.FastByteComparisons;
import org.tikv.common.util.Pair;
import org.tikv.common.util.ScanOption;
@@ -136,6 +140,46 @@ private String generateType() {
"%s%02d", RandomStringUtils.randomAlphabetic(3).toUpperCase(Locale.ROOT), r.nextInt(10000));
}
+ @Test
+ public void testCustomBackOff() {
+ int timeout = 2000;
+ int sleep = 150;
+ BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(timeout);
+ long s = System.currentTimeMillis();
+ try {
+ while (true) {
+ Thread.sleep(sleep);
+ backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, new Exception("t"));
+ }
+ } catch (Exception ignored) {
+ } finally {
+ long e = System.currentTimeMillis();
+ long duration = e - s;
+ logger.info("duration = " + duration);
+ assert (duration >= 2900);
+ }
+ }
+
+ @Test
+ public void testDeadlineBackOff() {
+ int timeout = 2000;
+ int sleep = 150;
+ BackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff(timeout, SlowLogEmptyImpl.INSTANCE);
+ long s = System.currentTimeMillis();
+ try {
+ while (true) {
+ Thread.sleep(sleep);
+ backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, new Exception("t"));
+ }
+ } catch (Exception ignored) {
+ } finally {
+ long e = System.currentTimeMillis();
+ long duration = e - s;
+ logger.info("duration = " + duration);
+ assert (duration <= timeout + sleep);
+ }
+ }
+
@Test
public void batchPutTest() {
ExecutorService executors = Executors.newFixedThreadPool(200);