From 2da9a30703a42b18af497f1dbfda2b5906e00ab9 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Wed, 5 Jun 2019 15:51:06 -0400 Subject: [PATCH] metrics: Report reason as a label on the conditions metric Conditions are read by telemetry and the reason incentivizes teams to have good reasons on their operators. Should not increase cardinality significantly, but instead provide better insight. --- docs/dev/metrics.md | 8 ++++---- pkg/cvo/metrics.go | 6 +++--- pkg/cvo/metrics_test.go | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/dev/metrics.md b/docs/dev/metrics.md index da98cf70f6..1c80ee254f 100644 --- a/docs/dev/metrics.md +++ b/docs/dev/metrics.md @@ -31,10 +31,10 @@ Metrics about cluster operators: ``` # HELP cluster_operator_conditions Report the conditions for active cluster operators. 0 is False and 1 is True. # TYPE cluster_operator_conditions gauge -cluster_operator_conditions{condition="Available",name="version",namespace="openshift-cluster-version"} 1 -cluster_operator_conditions{condition="Degraded",name="version",namespace="openshift-cluster-version"} 0 -cluster_operator_conditions{condition="Progressing",name="version",namespace="openshift-cluster-version"} 0 -cluster_operator_conditions{condition="RetrievedUpdates",name="version",namespace="openshift-cluster-version"} 0 +cluster_operator_conditions{condition="Available",name="version",namespace="openshift-cluster-version",reason="Happy"} 1 +cluster_operator_conditions{condition="Degraded",name="version",namespace="openshift-cluster-version",reason=""} 0 +cluster_operator_conditions{condition="Progressing",name="version",namespace="openshift-cluster-version",reason=""} 0 +cluster_operator_conditions{condition="RetrievedUpdates",name="version",namespace="openshift-cluster-version",reason=""} 0 # HELP cluster_operator_up Reports key highlights of the active cluster operators. # TYPE cluster_operator_up gauge cluster_operator_up{name="version",namespace="openshift-cluster-version",version="4.0.1"} 1 diff --git a/pkg/cvo/metrics.go b/pkg/cvo/metrics.go index 5476890d40..e988bd1239 100644 --- a/pkg/cvo/metrics.go +++ b/pkg/cvo/metrics.go @@ -70,7 +70,7 @@ started. clusterOperatorConditions: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "cluster_operator_conditions", Help: "Report the conditions for active cluster operators. 0 is False and 1 is True.", - }, []string{"name", "condition"}), + }, []string{"name", "condition", "reason"}), clusterOperatorConditionTransitions: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "cluster_operator_condition_transitions", Help: "Reports the number of times that a condition on a cluster operator changes status", @@ -122,7 +122,7 @@ func (m *operatorMetrics) Describe(ch chan<- *prometheus.Desc) { ch <- m.version.WithLabelValues("", "", "").Desc() ch <- m.availableUpdates.WithLabelValues("", "").Desc() ch <- m.clusterOperatorUp.WithLabelValues("", "").Desc() - ch <- m.clusterOperatorConditions.WithLabelValues("", "").Desc() + ch <- m.clusterOperatorConditions.WithLabelValues("", "", "").Desc() ch <- m.clusterOperatorConditionTransitions.WithLabelValues("", "").Desc() } @@ -227,7 +227,7 @@ func (m *operatorMetrics) Collect(ch chan<- prometheus.Metric) { if condition.Status == configv1.ConditionUnknown { continue } - g := m.clusterOperatorConditions.WithLabelValues(op.Name, string(condition.Type)) + g := m.clusterOperatorConditions.WithLabelValues(op.Name, string(condition.Type), string(condition.Reason)) if condition.Status == configv1.ConditionTrue { g.Set(1) } else { diff --git a/pkg/cvo/metrics_test.go b/pkg/cvo/metrics_test.go index 78ecc38c1b..743deb4de1 100644 --- a/pkg/cvo/metrics_test.go +++ b/pkg/cvo/metrics_test.go @@ -161,7 +161,7 @@ func Test_operatorMetrics_Collect(t *testing.T) { Status: configv1.ClusterOperatorStatus{ Conditions: []configv1.ClusterOperatorStatusCondition{ {Type: configv1.OperatorAvailable, Status: configv1.ConditionTrue}, - {Type: configv1.ClusterStatusConditionType("Custom"), Status: configv1.ConditionFalse}, + {Type: configv1.ClusterStatusConditionType("Custom"), Status: configv1.ConditionFalse, Reason: "CustomReason"}, {Type: configv1.ClusterStatusConditionType("Unknown"), Status: configv1.ConditionUnknown}, }, }, @@ -175,8 +175,8 @@ func Test_operatorMetrics_Collect(t *testing.T) { } expectMetric(t, metrics[0], 0, map[string]string{"type": "current", "version": "", "image": ""}) expectMetric(t, metrics[1], 1, map[string]string{"name": "test", "version": ""}) - expectMetric(t, metrics[2], 1, map[string]string{"name": "test", "condition": "Available"}) - expectMetric(t, metrics[3], 0, map[string]string{"name": "test", "condition": "Custom"}) + expectMetric(t, metrics[2], 1, map[string]string{"name": "test", "condition": "Available", "reason": ""}) + expectMetric(t, metrics[3], 0, map[string]string{"name": "test", "condition": "Custom", "reason": "CustomReason"}) }, }, {