diff --git a/config/README.md b/config/README.md index cf2243a6..45abf23f 100644 --- a/config/README.md +++ b/config/README.md @@ -7,6 +7,7 @@ - ``: string value consisting of IP, IP mask or named group, for example `"127.0.0.1"` or `"127.0.0.1/24"`. - ``: string value consisting of host name, for example `"example.com"` - ``: string value matching the regular expression `/^\d+(\.\d+)?[KMGTP]?B?$/i`, for example `"100MB"` + - ``: а map consisting of string keys and string values for optional custom labels in metrics ### Global configuration consist of: ```yml @@ -237,6 +238,9 @@ allowed_networks: , ... | optional # Prometheus metric namespace namespace: | optional + +# Labels that should be added to each sent prometheus metrics +constant_labels: ``` ### diff --git a/config/config.go b/config/config.go index 00aeb153..3691c792 100644 --- a/config/config.go +++ b/config/config.go @@ -492,6 +492,9 @@ type Metrics struct { // Prometheus metric namespace Namespace string `yaml:"namespace,omitempty"` + // Constant labels which will be added to each sent prometheus metric + ConstantLabels map[string]string `yaml:"constant_labels,omitempty"` + // Catches all undefined fields and must be empty after parsing. XXX map[string]interface{} `yaml:",inline"` } diff --git a/docs/src/content/docs/cn/index.md b/docs/src/content/docs/cn/index.md index 58d82a00..525d7ca6 100644 --- a/docs/src/content/docs/cn/index.md +++ b/docs/src/content/docs/cn/index.md @@ -487,6 +487,8 @@ server: # By default access to `/metrics` is unrestricted. metrics: allowed_networks: ["office"] + # You can specify constant labels which will be added to each sent prometheus metric + constant_labels: {} # Configs for input users. users: diff --git a/docs/src/content/docs/configuration/default.md b/docs/src/content/docs/configuration/default.md index 63ce9351..b554ba73 100644 --- a/docs/src/content/docs/configuration/default.md +++ b/docs/src/content/docs/configuration/default.md @@ -177,6 +177,8 @@ server: metrics: allowed_networks: ["office"] namespace: "" + # You can specify constant labels which will be added to each sent prometheus metric + constant_labels: {} # Proxy settings enable parsing proxy headers in cases where # CHProxy is run behind another proxy. diff --git a/internal/topology/metrics.go b/internal/topology/metrics.go index 21000965..475ed712 100644 --- a/internal/topology/metrics.go +++ b/internal/topology/metrics.go @@ -13,19 +13,23 @@ var ( func initMetrics(cfg *config.Config) { namespace := cfg.Server.Metrics.Namespace + constLabels := cfg.Server.Metrics.ConstantLabels + HostHealth = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "host_health", - Help: "Health state of hosts by clusters", + Namespace: namespace, + Name: "host_health", + Help: "Health state of hosts by clusters", + ConstLabels: constLabels, }, []string{"cluster", "replica", "cluster_node"}, ) HostPenalties = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "host_penalties_total", - Help: "Total number of given penalties by host", + Namespace: namespace, + Name: "host_penalties_total", + Help: "Total number of given penalties by host", + ConstLabels: constLabels, }, []string{"cluster", "replica", "cluster_node"}, ) diff --git a/main_test.go b/main_test.go index c907ec14..fb0b414d 100644 --- a/main_test.go +++ b/main_test.go @@ -640,7 +640,16 @@ func TestServe(t *testing.T) { "testdata/http.metrics.namespace.yml", func(t *testing.T) { resp := httpGet(t, "http://127.0.0.1:9090/metrics", http.StatusOK) - assert.GreaterOrEqual(t, len(getStringFromResponse(t, resp.Body)), 10000) + metricsString := getStringFromResponse(t, resp.Body) + assert.GreaterOrEqual(t, len(metricsString), 10000) + assert.GreaterOrEqual( + t, + strings.Count( + metricsString, + `constant_label1="value1",constant_label2="value2"`, + ), + 10, + ) resp.Body.Close() }, startHTTP, @@ -973,6 +982,11 @@ func TestServe(t *testing.T) { } cfg := &config.Config{} + cfg.Server.Metrics.ConstantLabels = map[string]string{ + "constant_label1": "value1", + "constant_label2": "value2", + } + registerMetrics(cfg) for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { diff --git a/metrics.go b/metrics.go index 4c344d9e..0d1eabec 100644 --- a/metrics.go +++ b/metrics.go @@ -41,238 +41,270 @@ var ( func initMetrics(cfg *config.Config) { namespace := cfg.Server.Metrics.Namespace + constLabels := cfg.Server.Metrics.ConstantLabels + statusCodes = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "status_codes_total", - Help: "Distribution by status codes", + Namespace: namespace, + Name: "status_codes_total", + Help: "Distribution by status codes", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node", "code"}, ) statusCodesClickhouse = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "status_codes_proxy_total", - Help: "Distribution by status codes", + Namespace: namespace, + Name: "status_codes_proxy_total", + Help: "Distribution by status codes", + ConstLabels: constLabels, }, []string{"cluster", "replica", "cluster_node", "code"}, ) requestSum = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "request_sum_total", - Help: "Total number of sent requests", + Namespace: namespace, + Name: "request_sum_total", + Help: "Total number of sent requests", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) requestSuccess = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "request_success_total", - Help: "Total number of sent success requests", + Namespace: namespace, + Name: "request_success_total", + Help: "Total number of sent success requests", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) limitExcess = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "concurrent_limit_excess_total", - Help: "Total number of max_concurrent_queries excess", + Namespace: namespace, + Name: "concurrent_limit_excess_total", + Help: "Total number of max_concurrent_queries excess", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) concurrentQueries = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "concurrent_queries", - Help: "The number of concurrent queries at current time", + Namespace: namespace, + Name: "concurrent_queries", + Help: "The number of concurrent queries at current time", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) requestQueueSize = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "request_queue_size", - Help: "Request queue sizes at the current time", + Namespace: namespace, + Name: "request_queue_size", + Help: "Request queue sizes at the current time", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user"}, ) userQueueOverflow = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "user_queue_overflow_total", - Help: "The number of overflows for per-user request queues", + Namespace: namespace, + Name: "user_queue_overflow_total", + Help: "The number of overflows for per-user request queues", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user"}, ) clusterUserQueueOverflow = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cluster_user_queue_overflow_total", - Help: "The number of overflows for per-cluster_user request queues", + Namespace: namespace, + Name: "cluster_user_queue_overflow_total", + Help: "The number of overflows for per-cluster_user request queues", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user"}, ) requestBodyBytes = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "request_body_bytes_total", - Help: "The amount of bytes read from request bodies", + Namespace: namespace, + Name: "request_body_bytes_total", + Help: "The amount of bytes read from request bodies", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) responseBodyBytes = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "response_body_bytes_total", - Help: "The amount of bytes written to response bodies", + Namespace: namespace, + Name: "response_body_bytes_total", + Help: "The amount of bytes written to response bodies", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) cacheFailedInsert = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_insertion_failures_total", - Help: "The number of insertion in the cache that didn't work out", + Namespace: namespace, + Name: "cache_insertion_failures_total", + Help: "The number of insertion in the cache that didn't work out", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) cacheCorruptedFetch = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_get_corrutpion_total", - Help: "The number of time a data fetching from redis was corrupted", + Namespace: namespace, + Name: "cache_get_corrutpion_total", + Help: "The number of time a data fetching from redis was corrupted", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) cacheHit = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_hits_total", - Help: "The amount of cache hits", + Namespace: namespace, + Name: "cache_hits_total", + Help: "The amount of cache hits", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) cacheMiss = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_miss_total", - Help: "The amount of cache misses", + Namespace: namespace, + Name: "cache_miss_total", + Help: "The amount of cache misses", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) cacheSize = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "cache_size", - Help: "Cache size at the current time", + Namespace: namespace, + Name: "cache_size", + Help: "Cache size at the current time", + ConstLabels: constLabels, }, []string{"cache"}, ) cacheItems = prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Namespace: namespace, - Name: "cache_items", - Help: "Cache items at the current time", + Namespace: namespace, + Name: "cache_items", + Help: "Cache items at the current time", + ConstLabels: constLabels, }, []string{"cache"}, ) cacheSkipped = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_payloadsize_too_big_total", - Help: "The amount of too big payloads to be cached", + Namespace: namespace, + Name: "cache_payloadsize_too_big_total", + Help: "The amount of too big payloads to be cached", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) requestDuration = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Namespace: namespace, - Name: "request_duration_seconds", - Help: "Request duration. Includes possible wait time in the queue", - Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, + Namespace: namespace, + Name: "request_duration_seconds", + Help: "Request duration. Includes possible wait time in the queue", + ConstLabels: constLabels, + Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) proxiedResponseDuration = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Namespace: namespace, - Name: "proxied_response_duration_seconds", - Help: "Response duration proxied from clickhouse", - Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, + Namespace: namespace, + Name: "proxied_response_duration_seconds", + Help: "Response duration proxied from clickhouse", + ConstLabels: constLabels, + Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) cachedResponseDuration = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Namespace: namespace, - Name: "cached_response_duration_seconds", - Help: "Response duration served from the cache", - Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, + Namespace: namespace, + Name: "cached_response_duration_seconds", + Help: "Response duration served from the cache", + ConstLabels: constLabels, + Objectives: map[float64]float64{0.5: 1e-1, 0.9: 1e-2, 0.99: 1e-3, 0.999: 1e-4, 1: 1e-5}, }, []string{"cache", "user", "cluster", "cluster_user"}, ) canceledRequest = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "canceled_request_total", - Help: "The number of requests canceled by remote client", + Namespace: namespace, + Name: "canceled_request_total", + Help: "The number of requests canceled by remote client", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) cacheHitFromConcurrentQueries = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_hit_concurrent_query_total", - Help: "The amount of cache hits after having awaited concurrently executed queries", + Namespace: namespace, + Name: "cache_hit_concurrent_query_total", + Help: "The amount of cache hits after having awaited concurrently executed queries", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) cacheMissFromConcurrentQueries = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "cache_miss_concurrent_query_total", - Help: "The amount of cache misses, even if previously reported as queries available in the cache, after having awaited concurrently executed queries", + Namespace: namespace, + Name: "cache_miss_concurrent_query_total", + Help: "The amount of cache misses, even if previously reported as queries available in the cache, after having awaited concurrently executed queries", + ConstLabels: constLabels, }, []string{"cache", "user", "cluster", "cluster_user"}, ) killedRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "killed_request_total", - Help: "The number of requests killed by proxy", + Namespace: namespace, + Name: "killed_request_total", + Help: "The number of requests killed by proxy", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) timeoutRequest = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "timeout_request_total", - Help: "The number of timed out requests", + Namespace: namespace, + Name: "timeout_request_total", + Help: "The number of timed out requests", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, ) configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "config_last_reload_successful", - Help: "Whether the last configuration reload attempt was successful.", + Namespace: namespace, + Name: "config_last_reload_successful", + Help: "Whether the last configuration reload attempt was successful.", + ConstLabels: constLabels, }) configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Name: "config_last_reload_success_timestamp_seconds", - Help: "Timestamp of the last successful configuration reload.", + Namespace: namespace, + Name: "config_last_reload_success_timestamp_seconds", + Help: "Timestamp of the last successful configuration reload.", + ConstLabels: constLabels, }) badRequest = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Name: "bad_requests_total", - Help: "Total number of unsupported requests", + Namespace: namespace, + Name: "bad_requests_total", + Help: "Total number of unsupported requests", + ConstLabels: constLabels, }) retryRequest = prometheus.NewCounterVec( prometheus.CounterOpts{ - Namespace: namespace, - Name: "retry_request_total", - Help: "The number of retry requests", + Namespace: namespace, + Name: "retry_request_total", + Help: "The number of retry requests", + ConstLabels: constLabels, }, []string{"user", "cluster", "cluster_user", "replica", "cluster_node"}, )