diff --git a/metrics/grafana/client_java_summary.json b/metrics/grafana/client_java_summary.json index 5a24786b3c6..46895dd20aa 100644 --- a/metrics/grafana/client_java_summary.json +++ b/metrics/grafana/client_java_summary.json @@ -113,7 +113,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -214,7 +214,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_raw_requests_failure_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_raw_requests_failure_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -315,7 +315,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "format": "time_series", "hide": false, "interval": "", @@ -325,7 +325,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(1, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(1, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "{{type}} - max", @@ -435,7 +435,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, "interval": "", @@ -561,7 +561,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(1, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "format": "time_series", "instant": false, "interval": "", @@ -572,7 +572,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "format": "time_series", "hide": false, "instant": false, @@ -686,7 +686,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -796,7 +796,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(1,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(1,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", "interval": "", "legendFormat": "{{ type }} -- max", "queryType": "randomWalk", @@ -804,7 +804,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "{{ type }} -- 99", @@ -914,7 +914,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)", "interval": "", "legendFormat": "{{ type }}", "queryType": "randomWalk", @@ -1023,7 +1023,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_seek_leader_store_duration_sum{instance=~\"$instance\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_leader_store_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(client_java_seek_leader_store_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_leader_store_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)", "interval": "", "legendFormat": "seek-leader-store-avg", "queryType": "randomWalk", @@ -1031,7 +1031,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_leader_store_duration_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_leader_store_duration_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "seek-leader-store-99", @@ -1140,7 +1140,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_seek_proxy_store_duration_sum{instance=~\"$instance\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_proxy_store_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(client_java_seek_proxy_store_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_proxy_store_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)", "interval": "", "legendFormat": "seek-proxy-store-avg", "queryType": "randomWalk", @@ -1148,7 +1148,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_proxy_store_duration_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_seek_proxy_store_duration_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "seek-proxy-store-99", @@ -1259,7 +1259,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", "hide": false, "interval": "", "legendFormat": "{{type}}-total", @@ -1358,7 +1358,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type)", "interval": "", "legendFormat": "{{type}}-count", "queryType": "randomWalk", @@ -1459,7 +1459,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", "interval": "", "legendFormat": "{{type}}-avg", "queryType": "randomWalk", @@ -1573,7 +1573,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__rate_interval]))", + "expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval]))", "interval": "", "legendFormat": "avg", "queryType": "randomWalk", @@ -1581,7 +1581,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le))", "hide": false, "interval": "", "legendFormat": "99th", @@ -1680,7 +1680,7 @@ "targets": [ { "exemplar": true, - "expr": "1 - sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__rate_interval]))", + "expr": "1 - sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval]))", "interval": "", "legendFormat": "hit ratio", "queryType": "randomWalk", @@ -1788,7 +1788,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))", "interval": "", "legendFormat": "{{ type }}-99th", "queryType": "randomWalk", @@ -1796,7 +1796,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "{{ type }}-99th", @@ -1804,7 +1804,7 @@ }, { "exemplar": true, - "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (le, type))", "hide": false, "interval": "", "legendFormat": "{{ type }}-99th", @@ -1904,7 +1904,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (type)", "hide": false, "interval": "", "legendFormat": "{{type}}", @@ -1912,7 +1912,7 @@ }, { "exemplar": true, - "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)", "hide": false, "interval": "", "legendFormat": "{{type}}", @@ -1920,7 +1920,7 @@ }, { "exemplar": true, - "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (type)", "hide": false, "interval": "", "legendFormat": "{{type}}", @@ -2021,7 +2021,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", "interval": "", "legendFormat": "{{type}}-avg", "queryType": "randomWalk", @@ -2029,7 +2029,7 @@ }, { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", "hide": false, "interval": "", "legendFormat": "{{type}}-sum", @@ -2128,7 +2128,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", + "expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)", "interval": "", "legendFormat": "{{type}}", "queryType": "randomWalk", @@ -2350,7 +2350,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2451,7 +2451,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_smart_raw_requests_failure_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_smart_raw_requests_failure_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2592,7 +2592,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(client_java_smart_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))", + "expr": "histogram_quantile(0.99, sum(rate(client_java_smart_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2742,7 +2742,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_smart_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_smart_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "hide": false, "interval": "", @@ -2853,7 +2853,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_smart_raw_circuit_breaker_opened_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_smart_raw_circuit_breaker_opened_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -2954,7 +2954,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(client_java_circuit_breaker_attempt_counter_total{instance=~\"$instance\"}[$__rate_interval])) by (type)", + "expr": "sum(rate(client_java_circuit_breaker_attempt_counter_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -4900,6 +4900,103 @@ "align": false, "alignLevel": null } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "hiddenSeries": false, + "id": 105, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(1, sum(rate(grpc_threadless_executor_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, phase))", + "interval": "", + "legendFormat": "{{ phase }} max", + "queryType": "randomWalk", + "refId": "A" + }, + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum(rate(grpc_threadless_executor_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, phase))", + "hide": false, + "interval": "", + "legendFormat": "{{ phase }} p99", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "ThreadlessExecutor Lock", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } ], "title": "gRPC internal", @@ -5054,6 +5151,37 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_TEST-CLUSTER}", + "definition": "label_values(client_java_raw_requests_latency_count, cluster)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "query": "label_values(client_java_raw_requests_latency_count, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false } ] }, diff --git a/src/main/java/org/tikv/common/KVClient.java b/src/main/java/org/tikv/common/KVClient.java index 0b5f838d119..0a92bca34b3 100644 --- a/src/main/java/org/tikv/common/KVClient.java +++ b/src/main/java/org/tikv/common/KVClient.java @@ -65,7 +65,9 @@ public void close() {} * @return a ByteString value if key exists, ByteString.EMPTY if key does not exist */ public ByteString get(ByteString key, long version) throws GrpcException { - BackOffer backOffer = ConcreteBackOffer.newGetBackOff(); + BackOffer backOffer = + ConcreteBackOffer.newGetBackOff( + clientBuilder.getRegionManager().getPDClient().getClusterId()); while (true) { RegionStoreClient client = clientBuilder.build(key); try { diff --git a/src/main/java/org/tikv/common/PDClient.java b/src/main/java/org/tikv/common/PDClient.java index d442ec66a07..a6091fd85a3 100644 --- a/src/main/java/org/tikv/common/PDClient.java +++ b/src/main/java/org/tikv/common/PDClient.java @@ -113,6 +113,7 @@ public class PDClient extends AbstractGRPCClient HistogramUtils.buildDuration() .name("client_java_pd_get_region_by_requests_latency") .help("pd getRegionByKey request latency.") + .labelNames("cluster") .register(); private PDClient(TiConfiguration conf, ChannelFactory channelFactory) { @@ -203,7 +204,7 @@ private GetOperatorResponse getOperator(long regionId) { () -> GetOperatorRequest.newBuilder().setHeader(header).setRegionId(regionId).build(); // get operator no need to handle error and no need back offer. return callWithRetry( - ConcreteBackOffer.newCustomBackOff(0), + ConcreteBackOffer.newCustomBackOff(0, getClusterId()), PDGrpc.getGetOperatorMethod(), request, new NoopHandler<>()); @@ -231,7 +232,8 @@ private boolean isScatterRegionFinish(GetOperatorResponse resp) { @Override public Pair getRegionByKey(BackOffer backOffer, ByteString key) { - Histogram.Timer requestTimer = PD_GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer(); + Histogram.Timer requestTimer = + PD_GET_REGION_BY_KEY_REQUEST_LATENCY.labels(getClusterId().toString()).startTimer(); try { if (conf.getKvMode() == KVMode.TXN) { CodecDataOutput cdo = new CodecDataOutput(); @@ -745,7 +747,8 @@ private Metapb.Region decodeRegion(Metapb.Region region) { return builder.build(); } - public long getClusterId() { + @Override + public Long getClusterId() { return header.getClusterId(); } diff --git a/src/main/java/org/tikv/common/ReadOnlyPDClient.java b/src/main/java/org/tikv/common/ReadOnlyPDClient.java index 802d258bc8f..843b87758d4 100644 --- a/src/main/java/org/tikv/common/ReadOnlyPDClient.java +++ b/src/main/java/org/tikv/common/ReadOnlyPDClient.java @@ -65,4 +65,6 @@ List scanRegions( List getAllStores(BackOffer backOffer); TiConfiguration.ReplicaRead getReplicaRead(); + + Long getClusterId(); } diff --git a/src/main/java/org/tikv/common/Snapshot.java b/src/main/java/org/tikv/common/Snapshot.java index 820c40e71b9..99468035e33 100644 --- a/src/main/java/org/tikv/common/Snapshot.java +++ b/src/main/java/org/tikv/common/Snapshot.java @@ -78,7 +78,9 @@ public List batchGet(int backOffer, List kvPairList = client.batchGet( - ConcreteBackOffer.newCustomBackOff(backOffer), list, timestamp.getVersion()); + ConcreteBackOffer.newCustomBackOff(backOffer, session.getPDClient().getClusterId()), + list, + timestamp.getVersion()); return kvPairList .stream() .map( diff --git a/src/main/java/org/tikv/common/StoreVersion.java b/src/main/java/org/tikv/common/StoreVersion.java index 03e613b553f..0310165e3aa 100644 --- a/src/main/java/org/tikv/common/StoreVersion.java +++ b/src/main/java/org/tikv/common/StoreVersion.java @@ -60,7 +60,8 @@ public static int compareTo(String v0, String v1) { public static boolean minTiKVVersion(String version, PDClient pdClient) { StoreVersion storeVersion = new StoreVersion(version); - BackOffer bo = ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF); + BackOffer bo = + ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF, pdClient.getClusterId()); List storeList = pdClient .getAllStores(bo) diff --git a/src/main/java/org/tikv/common/TiSession.java b/src/main/java/org/tikv/common/TiSession.java index 42eb278ee98..19ae704806d 100644 --- a/src/main/java/org/tikv/common/TiSession.java +++ b/src/main/java/org/tikv/common/TiSession.java @@ -91,13 +91,13 @@ public TiSession(TiConfiguration conf) { if (conf.isWarmUpEnable() && conf.isRawKVMode()) { warmUp(); } - this.circuitBreaker = new CircuitBreakerImpl(conf); + this.circuitBreaker = new CircuitBreakerImpl(conf, getPDClient().getClusterId()); logger.info("TiSession initialized in " + conf.getKvMode() + " mode"); } private synchronized void warmUp() { long warmUpStartTime = System.nanoTime(); - BackOffer backOffer = ConcreteBackOffer.newRawKVBackOff(); + BackOffer backOffer = ConcreteBackOffer.newRawKVBackOff(getPDClient().getClusterId()); try { // let JVM ClassLoader load gRPC error related classes @@ -128,7 +128,9 @@ private synchronized void warmUp() { } for (Pdpb.Region region : regions) { regionManager.insertRegionToCache( - regionManager.createRegion(region.getRegion(), ConcreteBackOffer.newGetBackOff())); + regionManager.createRegion( + region.getRegion(), + ConcreteBackOffer.newGetBackOff(getPDClient().getClusterId()))); } startKey = regions.get(regions.size() - 1).getRegion().getEndKey(); } while (!startKey.isEmpty()); @@ -226,7 +228,8 @@ public TiConfiguration getConf() { public TiTimestamp getTimestamp() { checkIsClosed(); - return getPDClient().getTimestamp(ConcreteBackOffer.newTsoBackOff()); + return getPDClient() + .getTimestamp(ConcreteBackOffer.newTsoBackOff(getPDClient().getClusterId())); } public Snapshot createSnapshot() { @@ -459,7 +462,7 @@ public void splitRegionAndScatter( .stream() .map(k -> Key.toRawKey(k).next().toByteString()) .collect(Collectors.toList()), - ConcreteBackOffer.newCustomBackOff(splitRegionBackoffMS)); + ConcreteBackOffer.newCustomBackOff(splitRegionBackoffMS, getPDClient().getClusterId())); // scatter region for (Metapb.Region newRegion : newRegions) { @@ -482,7 +485,9 @@ public void splitRegionAndScatter( return; } getPDClient() - .waitScatterRegionFinish(newRegion, ConcreteBackOffer.newCustomBackOff((int) remainMS)); + .waitScatterRegionFinish( + newRegion, + ConcreteBackOffer.newCustomBackOff((int) remainMS, getPDClient().getClusterId())); } } else { logger.info("skip to wait scatter region finish"); diff --git a/src/main/java/org/tikv/common/operation/iterator/ConcreteScanIterator.java b/src/main/java/org/tikv/common/operation/iterator/ConcreteScanIterator.java index 1f5ac6fcf36..4303c36e933 100644 --- a/src/main/java/org/tikv/common/operation/iterator/ConcreteScanIterator.java +++ b/src/main/java/org/tikv/common/operation/iterator/ConcreteScanIterator.java @@ -74,7 +74,9 @@ TiRegion loadCurrentRegionToCache() throws GrpcException { try (RegionStoreClient client = builder.build(startKey)) { client.setTimeout(conf.getScanTimeout()); region = client.getRegion(); - BackOffer backOffer = ConcreteBackOffer.newScannerNextMaxBackOff(); + BackOffer backOffer = + ConcreteBackOffer.newScannerNextMaxBackOff( + builder.getRegionManager().getPDClient().getClusterId()); currentCache = client.scan(backOffer, startKey, version); return region; } @@ -86,7 +88,8 @@ private ByteString resolveCurrentLock(Kvrpcpb.KvPair current) { builder.getRegionManager().getRegionStorePairByKey(current.getKey()); TiRegion region = pair.first; TiStore store = pair.second; - BackOffer backOffer = ConcreteBackOffer.newGetBackOff(); + BackOffer backOffer = + ConcreteBackOffer.newGetBackOff(builder.getRegionManager().getPDClient().getClusterId()); try (RegionStoreClient client = builder.build(region, store)) { return client.get(backOffer, current.getKey(), version); } catch (Exception e) { diff --git a/src/main/java/org/tikv/common/policy/RetryPolicy.java b/src/main/java/org/tikv/common/policy/RetryPolicy.java index 6baffa47494..07638bf8098 100644 --- a/src/main/java/org/tikv/common/policy/RetryPolicy.java +++ b/src/main/java/org/tikv/common/policy/RetryPolicy.java @@ -33,19 +33,19 @@ public abstract class RetryPolicy { HistogramUtils.buildDuration() .name("client_java_grpc_single_requests_latency") .help("grpc request latency.") - .labelNames("type") + .labelNames("type", "cluster") .register(); public static final Histogram CALL_WITH_RETRY_DURATION = HistogramUtils.buildDuration() .name("client_java_call_with_retry_duration") .help("callWithRetry duration.") - .labelNames("type") + .labelNames("type", "cluster") .register(); public static final Counter GRPC_REQUEST_RETRY_NUM = Counter.build() .name("client_java_grpc_requests_retry_num") .help("grpc request retry num.") - .labelNames("type") + .labelNames("type", "cluster") .register(); // handles PD and TiKV's error. @@ -70,7 +70,8 @@ private void rethrowNotRecoverableException(Exception e) { } public RespT callWithRetry(Callable proc, String methodName, BackOffer backOffer) { - Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(methodName).startTimer(); + String[] labels = new String[] {methodName, backOffer.getClusterId().toString()}; + Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(labels).startTimer(); SlowLogSpan callWithRetrySlowLogSpan = backOffer.getSlowLog().start("callWithRetry " + methodName); try { @@ -78,8 +79,7 @@ public RespT callWithRetry(Callable proc, String methodName, BackOffer ba RespT result = null; try { // add single request duration histogram - Histogram.Timer requestTimer = - GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer(); + Histogram.Timer requestTimer = GRPC_SINGLE_REQUEST_LATENCY.labels(labels).startTimer(); SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("gRPC " + methodName); try { result = proc.call(); @@ -93,7 +93,7 @@ public RespT callWithRetry(Callable proc, String methodName, BackOffer ba backOffer.checkTimeout(); boolean retry = handler.handleRequestError(backOffer, e); if (retry) { - GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); + GRPC_REQUEST_RETRY_NUM.labels(labels).inc(); continue; } else { return result; @@ -104,7 +104,7 @@ public RespT callWithRetry(Callable proc, String methodName, BackOffer ba if (handler != null) { boolean retry = handler.handleResponseError(backOffer, result); if (retry) { - GRPC_REQUEST_RETRY_NUM.labels(methodName).inc(); + GRPC_REQUEST_RETRY_NUM.labels(labels).inc(); continue; } } diff --git a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java index ce13be471ad..7f240a8f1a3 100644 --- a/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java +++ b/src/main/java/org/tikv/common/region/AbstractRegionStoreClient.java @@ -52,12 +52,14 @@ public abstract class AbstractRegionStoreClient HistogramUtils.buildDuration() .name("client_java_seek_leader_store_duration") .help("seek leader store duration.") + .labelNames("cluster") .register(); public static final Histogram SEEK_PROXY_STORE_DURATION = HistogramUtils.buildDuration() .name("client_java_seek_proxy_store_duration") .help("seek proxy store duration.") + .labelNames("cluster") .register(); protected final RegionManager regionManager; @@ -181,7 +183,10 @@ private void updateClientStub() { } private Boolean seekLeaderStore(BackOffer backOffer) { - Histogram.Timer switchLeaderDurationTimer = SEEK_LEADER_STORE_DURATION.startTimer(); + Histogram.Timer switchLeaderDurationTimer = + SEEK_LEADER_STORE_DURATION + .labels(regionManager.getPDClient().getClusterId().toString()) + .startTimer(); SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("seekLeaderStore"); try { List peers = region.getFollowerList(); @@ -230,7 +235,10 @@ private Boolean seekLeaderStore(BackOffer backOffer) { private boolean seekProxyStore(BackOffer backOffer) { SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("seekProxyStore"); - Histogram.Timer grpcForwardDurationTimer = SEEK_PROXY_STORE_DURATION.startTimer(); + Histogram.Timer grpcForwardDurationTimer = + SEEK_PROXY_STORE_DURATION + .labels(regionManager.getPDClient().getClusterId().toString()) + .startTimer(); try { logger.info(String.format("try grpc forward: region[%d]", region.getId())); // when current leader cannot be reached diff --git a/src/main/java/org/tikv/common/region/RegionManager.java b/src/main/java/org/tikv/common/region/RegionManager.java index c73725e79d6..eaf99dba325 100644 --- a/src/main/java/org/tikv/common/region/RegionManager.java +++ b/src/main/java/org/tikv/common/region/RegionManager.java @@ -52,11 +52,13 @@ public class RegionManager { HistogramUtils.buildDuration() .name("client_java_get_region_by_requests_latency") .help("getRegionByKey request latency.") + .labelNames("cluster") .register(); public static final Histogram SCAN_REGIONS_REQUEST_LATENCY = HistogramUtils.buildDuration() .name("client_java_scan_regions_request_latency") .help("scanRegions request latency.") + .labelNames("cluster") .register(); // TODO: the region cache logic need rewrite. @@ -101,7 +103,8 @@ public ReadOnlyPDClient getPDClient() { public List scanRegions( BackOffer backOffer, ByteString startKey, ByteString endKey, int limit) { - Histogram.Timer requestTimer = SCAN_REGIONS_REQUEST_LATENCY.startTimer(); + Histogram.Timer requestTimer = + SCAN_REGIONS_REQUEST_LATENCY.labels(getPDClient().getClusterId().toString()).startTimer(); SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("scanRegions"); try { return pdClient.scanRegions(backOffer, startKey, endKey, limit); @@ -118,7 +121,10 @@ public TiRegion getRegionByKey(ByteString key) { } public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) { - Histogram.Timer requestTimer = GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer(); + Histogram.Timer requestTimer = + GET_REGION_BY_KEY_REQUEST_LATENCY + .labels(getPDClient().getClusterId().toString()) + .startTimer(); SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("getRegionByKey"); TiRegion region = cache.getRegionByKey(key, backOffer); try { @@ -311,6 +317,7 @@ public void insertRegionToCache(TiRegion region) { } private BackOffer defaultBackOff() { - return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS()); + return ConcreteBackOffer.newCustomBackOff( + conf.getRawKVDefaultBackoffInMS(), pdClient.getClusterId()); } } diff --git a/src/main/java/org/tikv/common/region/RegionStoreClient.java b/src/main/java/org/tikv/common/region/RegionStoreClient.java index 056f67885d1..36529b7de77 100644 --- a/src/main/java/org/tikv/common/region/RegionStoreClient.java +++ b/src/main/java/org/tikv/common/region/RegionStoreClient.java @@ -76,7 +76,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient { HistogramUtils.buildDuration() .name("client_java_grpc_raw_requests_latency") .help("grpc raw request latency.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private synchronized Boolean getIsV4() { @@ -141,6 +141,10 @@ private RegionStoreClient( this.pdClient = pdClient; } + private String[] withClusterId(String label) { + return new String[] {label, pdClient.getClusterId().toString()}; + } + public synchronized boolean addResolvedLocks(Long version, Set locks) { Set oldList = resolvedLocks.get(version); if (oldList != null) { @@ -733,7 +737,7 @@ public Iterator coprocessStreaming( StreamingResponse responseIterator = this.callServerStreamingWithRetry( - ConcreteBackOffer.newCopNextMaxBackOff(), + ConcreteBackOffer.newCopNextMaxBackOff(pdClient.getClusterId()), TikvGrpc.getCoprocessorStreamMethod(), reqToSend, handler); @@ -768,7 +772,10 @@ public List splitRegion(Iterable splitKeys) { SplitRegionResponse resp = callWithRetry( - ConcreteBackOffer.newGetBackOff(), TikvGrpc.getSplitRegionMethod(), request, handler); + ConcreteBackOffer.newGetBackOff(pdClient.getClusterId()), + TikvGrpc.getSplitRegionMethod(), + request, + handler); if (resp == null) { this.regionManager.onRequestFail(region); @@ -789,7 +796,7 @@ public List splitRegion(Iterable splitKeys) { public ByteString rawGet(BackOffer backOffer, ByteString key) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_get")).startTimer(); try { Supplier factory = () -> RawGetRequest.newBuilder().setContext(makeContext(storeType)).setKey(key).build(); @@ -820,7 +827,7 @@ private ByteString rawGetHelper(RawGetResponse resp) { public Long rawGetKeyTTL(BackOffer backOffer, ByteString key) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get_key_ttl").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_get_key_ttl")).startTimer(); try { Supplier factory = () -> @@ -859,7 +866,7 @@ private Long rawGetKeyTTLHelper(RawGetKeyTTLResponse resp) { public void rawDelete(BackOffer backOffer, ByteString key, boolean atomic) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_delete").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_delete")).startTimer(); try { Supplier factory = () -> @@ -897,7 +904,7 @@ private void rawDeleteHelper(RawDeleteResponse resp, TiRegion region) { public void rawPut( BackOffer backOffer, ByteString key, ByteString value, long ttl, boolean atomic) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_put").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_put")).startTimer(); try { Supplier factory = () -> @@ -936,7 +943,9 @@ private void rawPutHelper(RawPutResponse resp) { public ByteString rawPutIfAbsent( BackOffer backOffer, ByteString key, ByteString value, long ttl) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_put_if_absent").startTimer(); + GRPC_RAW_REQUEST_LATENCY + .labels(withClusterId("client_grpc_raw_put_if_absent")) + .startTimer(); try { Supplier factory = () -> @@ -979,7 +988,7 @@ private ByteString rawPutIfAbsentHelper(RawCASResponse resp) { public List rawBatchGet(BackOffer backoffer, List keys) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_get").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_get")).startTimer(); try { if (keys.isEmpty()) { return new ArrayList<>(); @@ -1014,7 +1023,7 @@ private List handleRawBatchGet(RawBatchGetResponse resp) { public void rawBatchPut(BackOffer backOffer, List kvPairs, long ttl, boolean atomic) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_put").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_put")).startTimer(); try { if (kvPairs.isEmpty()) { return; @@ -1066,7 +1075,7 @@ private void handleRawBatchPut(RawBatchPutResponse resp) { public void rawBatchDelete(BackOffer backoffer, List keys, boolean atomic) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_delete").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_delete")).startTimer(); try { if (keys.isEmpty()) { return; @@ -1114,7 +1123,7 @@ private void handleRawBatchDelete(RawBatchDeleteResponse resp) { */ public List rawScan(BackOffer backOffer, ByteString key, int limit, boolean keyOnly) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_scan").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_scan")).startTimer(); try { Supplier factory = () -> @@ -1160,7 +1169,7 @@ private List rawScanHelper(RawScanResponse resp) { */ public void rawDeleteRange(BackOffer backOffer, ByteString startKey, ByteString endKey) { Histogram.Timer requestTimer = - GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_delete_range").startTimer(); + GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_delete_range")).startTimer(); try { Supplier factory = () -> @@ -1316,7 +1325,8 @@ public RegionManager getRegionManager() { } private BackOffer defaultBackOff() { - return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS()); + return ConcreteBackOffer.newCustomBackOff( + conf.getRawKVDefaultBackoffInMS(), pdClient.getClusterId()); } } } diff --git a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java index c3530171ab9..56c9e6f42b4 100644 --- a/src/main/java/org/tikv/common/region/StoreHealthyChecker.java +++ b/src/main/java/org/tikv/common/region/StoreHealthyChecker.java @@ -80,7 +80,9 @@ private boolean checkStoreHealth(TiStore store) { private boolean checkStoreTombstone(TiStore store) { try { - Metapb.Store newStore = pdClient.getStore(ConcreteBackOffer.newRawKVBackOff(), store.getId()); + Metapb.Store newStore = + pdClient.getStore( + ConcreteBackOffer.newRawKVBackOff(pdClient.getClusterId()), store.getId()); if (newStore != null && newStore.getState() == Metapb.StoreState.Tombstone) { return true; } diff --git a/src/main/java/org/tikv/common/util/BackOffer.java b/src/main/java/org/tikv/common/util/BackOffer.java index 59814f84d8c..acc6cde212e 100644 --- a/src/main/java/org/tikv/common/util/BackOffer.java +++ b/src/main/java/org/tikv/common/util/BackOffer.java @@ -66,4 +66,6 @@ enum BackOffStrategy { } SlowLog getSlowLog(); + + Long getClusterId(); } diff --git a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java index 9668f78d7cf..e06a3feece8 100644 --- a/src/main/java/org/tikv/common/util/ConcreteBackOffer.java +++ b/src/main/java/org/tikv/common/util/ConcreteBackOffer.java @@ -39,6 +39,7 @@ public class ConcreteBackOffer implements BackOffer { private static final Logger logger = LoggerFactory.getLogger(ConcreteBackOffer.class); private final int maxSleep; + private final Long clusterId; @VisibleForTesting public final Map backOffFunctionMap; @@ -52,14 +53,15 @@ public class ConcreteBackOffer implements BackOffer { HistogramUtils.buildDuration() .name("client_java_backoff_duration") .help("backoff duration.") - .labelNames("type") + .labelNames("type", "cluster") .register(); - private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog) { + private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog, long clusterId) { Preconditions.checkArgument( maxSleep == 0 || deadline == 0, "Max sleep time should be 0 or Deadline should be 0."); Preconditions.checkArgument(maxSleep >= 0, "Max sleep time cannot be less than 0."); Preconditions.checkArgument(deadline >= 0, "Deadline cannot be less than 0."); + this.clusterId = clusterId; this.maxSleep = maxSleep; this.errors = Collections.synchronizedList(new ArrayList<>()); this.backOffFunctionMap = new ConcurrentHashMap<>(); @@ -68,6 +70,7 @@ private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog) { } private ConcreteBackOffer(ConcreteBackOffer source) { + this.clusterId = source.clusterId; this.maxSleep = source.maxSleep; this.totalSleep = source.totalSleep; this.errors = source.errors; @@ -76,37 +79,54 @@ private ConcreteBackOffer(ConcreteBackOffer source) { this.slowLog = source.slowLog; } - public static ConcreteBackOffer newDeadlineBackOff(int timeoutInMs, SlowLog slowLog) { + public static ConcreteBackOffer newDeadlineBackOff( + int timeoutInMs, SlowLog slowLog, long clusterId) { long deadline = System.currentTimeMillis() + timeoutInMs; - return new ConcreteBackOffer(0, deadline, slowLog); + return new ConcreteBackOffer(0, deadline, slowLog, clusterId); + } + + public static ConcreteBackOffer newDeadlineBackOff(int timeoutInMs, SlowLog slowLog) { + return newDeadlineBackOff(timeoutInMs, slowLog, 0); + } + + public static ConcreteBackOffer newCustomBackOff(int maxSleep, long clusterId) { + return new ConcreteBackOffer(maxSleep, 0, SlowLogEmptyImpl.INSTANCE, clusterId); } public static ConcreteBackOffer newCustomBackOff(int maxSleep) { - return new ConcreteBackOffer(maxSleep, 0, SlowLogEmptyImpl.INSTANCE); + return newCustomBackOff(maxSleep, 0); } - public static ConcreteBackOffer newScannerNextMaxBackOff() { - return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + public static ConcreteBackOffer newScannerNextMaxBackOff(long clusterId) { + return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId); } public static ConcreteBackOffer newBatchGetMaxBackOff() { - return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, 0); } public static ConcreteBackOffer newCopNextMaxBackOff() { - return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + return newCopNextMaxBackOff(0); } - public static ConcreteBackOffer newGetBackOff() { - return new ConcreteBackOffer(GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + public static ConcreteBackOffer newCopNextMaxBackOff(long clusterId) { + return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId); + } + + public static ConcreteBackOffer newGetBackOff(long clusterId) { + return new ConcreteBackOffer(GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId); + } + + public static ConcreteBackOffer newRawKVBackOff(long clusterId) { + return new ConcreteBackOffer(RAWKV_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId); } public static ConcreteBackOffer newRawKVBackOff() { - return new ConcreteBackOffer(RAWKV_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + return newRawKVBackOff(0); } - public static ConcreteBackOffer newTsoBackOff() { - return new ConcreteBackOffer(TSO_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE); + public static ConcreteBackOffer newTsoBackOff(long clusterId) { + return new ConcreteBackOffer(TSO_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId); } public static ConcreteBackOffer create(BackOffer source) { @@ -173,7 +193,8 @@ public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType) { } public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long maxSleepMs) { - Histogram.Timer backOffTimer = BACKOFF_DURATION.labels(funcType.name()).startTimer(); + Histogram.Timer backOffTimer = + BACKOFF_DURATION.labels(funcType.name(), clusterId.toString()).startTimer(); SlowLogSpan slowLogSpan = getSlowLog().start("backoff " + funcType.name()); BackOffFunction backOffFunction = backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc); @@ -238,4 +259,9 @@ private void logThrowError(Exception err) { public SlowLog getSlowLog() { return slowLog; } + + @Override + public Long getClusterId() { + return clusterId; + } } diff --git a/src/main/java/org/tikv/raw/RawKVClient.java b/src/main/java/org/tikv/raw/RawKVClient.java index 69dad0aff0b..e8823777222 100644 --- a/src/main/java/org/tikv/raw/RawKVClient.java +++ b/src/main/java/org/tikv/raw/RawKVClient.java @@ -66,6 +66,7 @@ public class RawKVClient implements RawKVClientBase { private final Long clusterId; private final List pdAddresses; + private final TiSession session; private final RegionStoreClientBuilder clientBuilder; private final TiConfiguration conf; private final ExecutorService batchGetThreadPool; @@ -79,21 +80,21 @@ public class RawKVClient implements RawKVClientBase { HistogramUtils.buildDuration() .name("client_java_raw_requests_latency") .help("client raw request latency.") - .labelNames("type") + .labelNames("type", "cluster") .register(); public static final Counter RAW_REQUEST_SUCCESS = Counter.build() .name("client_java_raw_requests_success") .help("client raw request success.") - .labelNames("type") + .labelNames("type", "cluster") .register(); public static final Counter RAW_REQUEST_FAILURE = Counter.build() .name("client_java_raw_requests_failure") .help("client raw request failure.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private static final TiKVException ERR_MAX_SCAN_LIMIT_EXCEEDED = @@ -104,6 +105,7 @@ public RawKVClient(TiSession session, RegionStoreClientBuilder clientBuilder) { Objects.requireNonNull(clientBuilder, "clientBuilder is null"); this.conf = session.getConf(); this.clientBuilder = clientBuilder; + this.session = session; this.batchGetThreadPool = session.getThreadPoolForBatchGet(); this.batchPutThreadPool = session.getThreadPoolForBatchPut(); this.batchDeleteThreadPool = session.getThreadPoolForBatchDelete(); @@ -113,6 +115,10 @@ public RawKVClient(TiSession session, RegionStoreClientBuilder clientBuilder) { this.pdAddresses = session.getPDClient().getPdAddrs(); } + private String[] withClusterId(String label) { + return new String[] {label, clusterId.toString()}; + } + @Override public void close() {} @@ -132,8 +138,8 @@ public void putAtomic(ByteString key, ByteString value, long ttl) { } private void put(ByteString key, ByteString value, long ttl, boolean atomic) { - String label = "client_raw_put"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_put"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVWriteSlowLogInMS(), @@ -146,13 +152,13 @@ private void put(ByteString key, ByteString value, long ttl, boolean atomic) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId); try { while (true) { try (RegionStoreClient client = clientBuilder.build(key, backOffer)) { slowLog.addProperty("region", client.getRegion().toString()); client.rawPut(backOffer, key, value, ttl, atomic); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return; } catch (final TiKVException e) { backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e); @@ -160,7 +166,7 @@ private void put(ByteString key, ByteString value, long ttl, boolean atomic) { } } } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -176,8 +182,8 @@ public ByteString putIfAbsent(ByteString key, ByteString value) { @Override public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) { - String label = "client_raw_put_if_absent"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_put_if_absent"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVWriteSlowLogInMS(), @@ -190,13 +196,13 @@ public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId); try { while (true) { try (RegionStoreClient client = clientBuilder.build(key, backOffer)) { slowLog.addProperty("region", client.getRegion().toString()); ByteString result = client.rawPutIfAbsent(backOffer, key, value, ttl); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (final TiKVException e) { backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e); @@ -204,7 +210,7 @@ public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) { } } } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -234,8 +240,8 @@ public void batchPutAtomic(Map kvPairs, long ttl) { } private void batchPut(Map kvPairs, long ttl, boolean atomic) { - String label = "client_raw_batch_put"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_batch_put"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVBatchWriteSlowLogInMS(), @@ -248,13 +254,14 @@ private void batchPut(Map kvPairs, long ttl, boolean ato } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff( + conf.getRawKVBatchWriteTimeoutInMS(), slowLog, clusterId); try { long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS(); doSendBatchPut(backOffer, kvPairs, ttl, atomic, deadline); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -265,12 +272,12 @@ private void batchPut(Map kvPairs, long ttl, boolean ato @Override public ByteString get(ByteString key) { - String label = "client_raw_get"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_get"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVReadSlowLogInMS(), - new HashMap(2) { + new HashMap() { { put("cluster_id", clusterId); put("pd_addresses", pdAddresses); @@ -280,13 +287,13 @@ public ByteString get(ByteString key) { }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog, clusterId); try { while (true) { try (RegionStoreClient client = clientBuilder.build(key, backOffer)) { slowLog.addProperty("region", client.getRegion().toString()); ByteString result = client.rawGet(backOffer, key); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (final TiKVException e) { backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e); @@ -294,7 +301,7 @@ public ByteString get(ByteString key) { } } } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -305,12 +312,12 @@ public ByteString get(ByteString key) { @Override public List batchGet(List keys) { - String label = "client_raw_batch_get"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_batch_get"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVBatchReadSlowLogInMS(), - new HashMap(2) { + new HashMap() { { put("cluster_id", clusterId); put("pd_addresses", pdAddresses); @@ -319,14 +326,15 @@ public List batchGet(List keys) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchReadTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff( + conf.getRawKVBatchReadTimeoutInMS(), slowLog, clusterId); try { long deadline = System.currentTimeMillis() + conf.getRawKVBatchReadTimeoutInMS(); List result = doSendBatchGet(backOffer, keys, deadline); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -346,8 +354,8 @@ public void batchDeleteAtomic(List keys) { } private void batchDelete(List keys, boolean atomic) { - String label = "client_raw_batch_delete"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_batch_delete"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVBatchWriteSlowLogInMS(), @@ -360,14 +368,15 @@ private void batchDelete(List keys, boolean atomic) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff( + conf.getRawKVBatchWriteTimeoutInMS(), slowLog, clusterId); try { long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS(); doSendBatchDelete(backOffer, keys, atomic, deadline); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return; } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -378,8 +387,8 @@ private void batchDelete(List keys, boolean atomic) { @Override public Long getKeyTTL(ByteString key) { - String label = "client_raw_get_key_ttl"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_get_key_ttl"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVReadSlowLogInMS(), @@ -392,13 +401,13 @@ public Long getKeyTTL(ByteString key) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog, clusterId); try { while (true) { try (RegionStoreClient client = clientBuilder.build(key, backOffer)) { slowLog.addProperty("region", client.getRegion().toString()); Long result = client.rawGetKeyTTL(backOffer, key); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (final TiKVException e) { backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e); @@ -406,7 +415,7 @@ public Long getKeyTTL(ByteString key) { } } } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -417,8 +426,8 @@ public Long getKeyTTL(ByteString key) { @Override public List> batchScan(List ranges) { - String label = "client_raw_batch_scan"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_batch_scan"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); long deadline = System.currentTimeMillis() + conf.getRawKVScanTimeoutInMS(); List>>> futureList = new ArrayList<>(); try { @@ -453,10 +462,10 @@ public List> batchScan(List ranges) { throw new TiKVException("Execution exception met.", e); } } - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return scanResults; } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); for (Future>> future : futureList) { future.cancel(true); } @@ -473,8 +482,8 @@ public List scan(ByteString startKey, ByteString endKey, int limit) { @Override public List scan(ByteString startKey, ByteString endKey, int limit, boolean keyOnly) { - String label = "client_raw_scan"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_scan"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVScanSlowLogInMS(), @@ -490,16 +499,16 @@ public List scan(ByteString startKey, ByteString endKey, int limit, bool } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog, clusterId); try { Iterator iterator = rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly, backOffer); List result = new ArrayList<>(); iterator.forEachRemaining(result::add); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -525,8 +534,8 @@ public List scan(ByteString startKey, ByteString endKey) { @Override public List scan(ByteString startKey, ByteString endKey, boolean keyOnly) { - String label = "client_raw_scan_without_limit"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_scan_without_limit"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVScanSlowLogInMS(), @@ -541,7 +550,7 @@ public List scan(ByteString startKey, ByteString endKey, boolean keyOnly } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog, clusterId); try { ByteString newStartKey = startKey; List result = new ArrayList<>(); @@ -561,10 +570,10 @@ public List scan(ByteString startKey, ByteString endKey, boolean keyOnly iterator.forEachRemaining(result::add); newStartKey = Key.toRawKey(result.get(result.size() - 1).getKey()).next().toByteString(); } - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -607,8 +616,8 @@ public void deleteAtomic(ByteString key) { } private void delete(ByteString key, boolean atomic) { - String label = "client_raw_delete"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_delete"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); SlowLog slowLog = new SlowLogImpl( conf.getRawKVWriteSlowLogInMS(), @@ -622,13 +631,13 @@ private void delete(ByteString key, boolean atomic) { } }); ConcreteBackOffer backOffer = - ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog); + ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId); try { while (true) { try (RegionStoreClient client = clientBuilder.build(key, backOffer)) { slowLog.addProperty("region", client.getRegion().toString()); client.rawDelete(backOffer, key, atomic); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); return; } catch (final TiKVException e) { backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e); @@ -636,7 +645,7 @@ private void delete(ByteString key, boolean atomic) { } } } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); slowLog.setError(e); throw e; } finally { @@ -647,17 +656,17 @@ private void delete(ByteString key, boolean atomic) { @Override public synchronized void deleteRange(ByteString startKey, ByteString endKey) { - String label = "client_raw_delete_range"; - Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer(); + String[] labels = withClusterId("client_raw_delete_range"); + Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer(); ConcreteBackOffer backOffer = ConcreteBackOffer.newDeadlineBackOff( - conf.getRawKVCleanTimeoutInMS(), SlowLogEmptyImpl.INSTANCE); + conf.getRawKVCleanTimeoutInMS(), SlowLogEmptyImpl.INSTANCE, clusterId); try { long deadline = System.currentTimeMillis() + conf.getRawKVCleanTimeoutInMS(); doSendDeleteRange(backOffer, startKey, endKey, deadline); - RAW_REQUEST_SUCCESS.labels(label).inc(); + RAW_REQUEST_SUCCESS.labels(labels).inc(); } catch (Exception e) { - RAW_REQUEST_FAILURE.labels(label).inc(); + RAW_REQUEST_FAILURE.labels(labels).inc(); throw e; } finally { requestTimer.observeDuration(); @@ -670,6 +679,11 @@ public synchronized void deletePrefix(ByteString key) { deleteRange(key, endKey); } + @Override + public TiSession getSession() { + return session; + } + private void doSendBatchPut( BackOffer backOffer, Map kvPairs, diff --git a/src/main/java/org/tikv/raw/RawKVClientBase.java b/src/main/java/org/tikv/raw/RawKVClientBase.java index 96b3d98e84d..e4f44aa1fe2 100644 --- a/src/main/java/org/tikv/raw/RawKVClientBase.java +++ b/src/main/java/org/tikv/raw/RawKVClientBase.java @@ -18,6 +18,7 @@ import com.google.protobuf.ByteString; import java.util.List; import java.util.Map; +import org.tikv.common.TiSession; import org.tikv.common.util.ScanOption; import org.tikv.kvproto.Kvrpcpb; @@ -254,4 +255,6 @@ public interface RawKVClientBase extends AutoCloseable { * @param key prefix of keys to be deleted */ void deletePrefix(ByteString key); + + TiSession getSession(); } diff --git a/src/main/java/org/tikv/raw/SmartRawKVClient.java b/src/main/java/org/tikv/raw/SmartRawKVClient.java index 1c5f886bb51..f31bad831fb 100644 --- a/src/main/java/org/tikv/raw/SmartRawKVClient.java +++ b/src/main/java/org/tikv/raw/SmartRawKVClient.java @@ -22,6 +22,7 @@ import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.tikv.common.TiSession; import org.tikv.common.exception.CircuitBreakerOpenException; import org.tikv.common.util.HistogramUtils; import org.tikv.common.util.ScanOption; @@ -35,28 +36,28 @@ public class SmartRawKVClient implements RawKVClientBase { HistogramUtils.buildDuration() .name("client_java_smart_raw_requests_latency") .help("client smart raw request latency.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private static final Counter REQUEST_SUCCESS = Counter.build() .name("client_java_smart_raw_requests_success") .help("client smart raw request success.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private static final Counter REQUEST_FAILURE = Counter.build() .name("client_java_smart_raw_requests_failure") .help("client smart raw request failure.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private static final Counter CIRCUIT_BREAKER_OPENED = Counter.build() .name("client_java_smart_raw_circuit_breaker_opened") .help("client smart raw circuit breaker opened.") - .labelNames("type") + .labelNames("type", "cluster") .register(); private final RawKVClientBase client; @@ -208,14 +209,21 @@ public void deletePrefix(ByteString key) { callWithCircuitBreaker("deletePrefix", () -> client.deletePrefix(key)); } + @Override + public TiSession getSession() { + return client.getSession(); + } + T callWithCircuitBreaker(String funcName, Function1 func) { - Histogram.Timer requestTimer = REQUEST_LATENCY.labels(funcName).startTimer(); + String[] labels = new String[] {funcName, getSession().getPDClient().getClusterId().toString()}; + + Histogram.Timer requestTimer = REQUEST_LATENCY.labels(labels).startTimer(); try { T result = callWithCircuitBreaker0(funcName, func); - REQUEST_SUCCESS.labels(funcName).inc(); + REQUEST_SUCCESS.labels(labels).inc(); return result; } catch (Exception e) { - REQUEST_FAILURE.labels(funcName).inc(); + REQUEST_FAILURE.labels(labels).inc(); throw e; } finally { requestTimer.observeDuration(); @@ -248,7 +256,9 @@ private T callWithCircuitBreaker0(String funcName, Function1 func) { } } else { logger.debug("Circuit Breaker Opened"); - CIRCUIT_BREAKER_OPENED.labels(funcName).inc(); + CIRCUIT_BREAKER_OPENED + .labels(funcName, getSession().getPDClient().getClusterId().toString()) + .inc(); throw new CircuitBreakerOpenException(); } } diff --git a/src/main/java/org/tikv/service/failsafe/CircuitBreakerImpl.java b/src/main/java/org/tikv/service/failsafe/CircuitBreakerImpl.java index 7f2231bd7ab..10eb7983112 100644 --- a/src/main/java/org/tikv/service/failsafe/CircuitBreakerImpl.java +++ b/src/main/java/org/tikv/service/failsafe/CircuitBreakerImpl.java @@ -30,9 +30,10 @@ public class CircuitBreakerImpl implements CircuitBreaker { Counter.build() .name("client_java_circuit_breaker_attempt_counter") .help("client circuit breaker attempt counter.") - .labelNames("type") + .labelNames("type", "cluster") .register(); + private final Long clusterId; private final boolean enable; private final int windowInSeconds; private final int errorThresholdPercentage; @@ -47,14 +48,15 @@ public class CircuitBreakerImpl implements CircuitBreaker { private final CircuitBreakerMetrics metrics; - public CircuitBreakerImpl(TiConfiguration conf) { + public CircuitBreakerImpl(TiConfiguration conf, long clusterId) { this( conf.isCircuitBreakEnable(), conf.getCircuitBreakAvailabilityWindowInSeconds(), conf.getCircuitBreakAvailabilityErrorThresholdPercentage(), conf.getCircuitBreakAvailabilityRequestVolumnThreshold(), conf.getCircuitBreakSleepWindowInSeconds(), - conf.getCircuitBreakAttemptRequestCount()); + conf.getCircuitBreakAttemptRequestCount(), + clusterId); } public CircuitBreakerImpl( @@ -63,8 +65,10 @@ public CircuitBreakerImpl( int errorThresholdPercentage, int requestVolumeThreshold, int sleepWindowInSeconds, - int attemptRequestCount) { + int attemptRequestCount, + long clusterId) { this.enable = enable; + this.clusterId = clusterId; this.windowInSeconds = windowInSeconds; this.errorThresholdPercentage = errorThresholdPercentage; this.requestVolumeThreshold = requestVolumeThreshold; @@ -123,7 +127,7 @@ Status getStatus() { @Override public void recordAttemptSuccess() { - CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("success").inc(); + CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("success", clusterId.toString()).inc(); if (attemptSuccessCount.incrementAndGet() >= this.attemptRequestCount) { halfOpen2Close(); } @@ -131,7 +135,7 @@ public void recordAttemptSuccess() { @Override public void recordAttemptFailure() { - CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("failure").inc(); + CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("failure", clusterId.toString()).inc(); halfOpen2Open(); } diff --git a/src/main/java/org/tikv/txn/KVClient.java b/src/main/java/org/tikv/txn/KVClient.java index 28b29f4ee17..91ca961215c 100644 --- a/src/main/java/org/tikv/txn/KVClient.java +++ b/src/main/java/org/tikv/txn/KVClient.java @@ -71,7 +71,9 @@ public void close() { * @return a ByteString value if key exists, ByteString.EMPTY if key does not exist */ public ByteString get(ByteString key, long version) throws GrpcException { - BackOffer backOffer = ConcreteBackOffer.newGetBackOff(); + BackOffer backOffer = + ConcreteBackOffer.newGetBackOff( + clientBuilder.getRegionManager().getPDClient().getClusterId()); while (true) { RegionStoreClient client = clientBuilder.build(key); try { diff --git a/src/main/java/org/tikv/txn/TTLManager.java b/src/main/java/org/tikv/txn/TTLManager.java index 82f64fafe5f..9cc46210f05 100644 --- a/src/main/java/org/tikv/txn/TTLManager.java +++ b/src/main/java/org/tikv/txn/TTLManager.java @@ -90,7 +90,9 @@ public void keepAlive() { } private void doKeepAlive() { - BackOffer bo = ConcreteBackOffer.newCustomBackOff(MANAGED_LOCK_TTL); + BackOffer bo = + ConcreteBackOffer.newCustomBackOff( + MANAGED_LOCK_TTL, regionManager.getPDClient().getClusterId()); long uptime = kvClient.getTimestamp().getPhysical() - TiTimestamp.extractPhysical(startTS); long ttl = uptime + MANAGED_LOCK_TTL; diff --git a/src/main/java/org/tikv/txn/TwoPhaseCommitter.java b/src/main/java/org/tikv/txn/TwoPhaseCommitter.java index 4c7fe404284..993cf8575d5 100644 --- a/src/main/java/org/tikv/txn/TwoPhaseCommitter.java +++ b/src/main/java/org/tikv/txn/TwoPhaseCommitter.java @@ -281,7 +281,9 @@ private void doPrewriteSecondaryKeys( // consume one task if reaches task limit completionService.take().get(); } - BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(maxBackOfferMS); + BackOffer backOffer = + ConcreteBackOffer.newCustomBackOff( + maxBackOfferMS, regionManager.getPDClient().getClusterId()); completionService.submit( () -> { doPrewriteSecondaryKeysInBatchesWithRetry( @@ -539,7 +541,9 @@ private void doCommitSecondaryKeys( // consume one task if reaches task limit completionService.take().get(); } - BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(commitBackOfferMS); + BackOffer backOffer = + ConcreteBackOffer.newCustomBackOff( + commitBackOfferMS, regionManager.getPDClient().getClusterId()); completionService.submit( () -> { doCommitSecondaryKeysWithRetry(backOffer, keyBytes, curSize, commitTs); diff --git a/src/main/java/org/tikv/txn/TxnKVClient.java b/src/main/java/org/tikv/txn/TxnKVClient.java index e16876f2373..6e0cd41529a 100644 --- a/src/main/java/org/tikv/txn/TxnKVClient.java +++ b/src/main/java/org/tikv/txn/TxnKVClient.java @@ -68,7 +68,7 @@ public RegionManager getRegionManager() { } public TiTimestamp getTimestamp() { - BackOffer bo = ConcreteBackOffer.newTsoBackOff(); + BackOffer bo = ConcreteBackOffer.newTsoBackOff(pdClient.getClusterId()); TiTimestamp timestamp = new TiTimestamp(0, 0); try { while (true) { diff --git a/src/test/java/org/tikv/raw/MetricsTest.java b/src/test/java/org/tikv/raw/MetricsTest.java index 2b0c755df5c..6d8ea55b1a2 100644 --- a/src/test/java/org/tikv/raw/MetricsTest.java +++ b/src/test/java/org/tikv/raw/MetricsTest.java @@ -33,6 +33,7 @@ public void oneTiSession() throws Exception { client.put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")); ByteString result = client.get(ByteString.copyFromUtf8("k")); assertEquals(result.toStringUtf8(), "v"); + Thread.sleep(1000); client.close(); session.close(); } diff --git a/src/test/java/org/tikv/service/failsafe/CircuitBreakerTest.java b/src/test/java/org/tikv/service/failsafe/CircuitBreakerTest.java index 766d5bff709..f6719213ed8 100644 --- a/src/test/java/org/tikv/service/failsafe/CircuitBreakerTest.java +++ b/src/test/java/org/tikv/service/failsafe/CircuitBreakerTest.java @@ -25,7 +25,8 @@ public void testCircuitBreaker() throws InterruptedException { errorThresholdPercentage, requestVolumeThreshold, sleepWindowInSeconds, - attemptRequestCount); + attemptRequestCount, + 0); CircuitBreakerMetrics metrics = circuitBreaker.getMetrics(); // initial state: CLOSE