From 9e5a3e932f2b426671ca77f344236209b5ec7032 Mon Sep 17 00:00:00 2001 From: Joel Speed Date: Fri, 3 Jul 2020 15:05:05 +0100 Subject: [PATCH] Add HistogramVector to track transition into different Machine phases --- pkg/controller/machine/controller.go | 11 +++++++++++ pkg/metrics/metrics.go | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/pkg/controller/machine/controller.go b/pkg/controller/machine/controller.go index 22b4768685..84a1933090 100644 --- a/pkg/controller/machine/controller.go +++ b/pkg/controller/machine/controller.go @@ -24,6 +24,7 @@ import ( configv1 "github.com/openshift/api/config/v1" machinev1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" + "github.com/openshift/machine-api-operator/pkg/metrics" "github.com/openshift/machine-api-operator/pkg/util" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -430,6 +431,7 @@ func isInvalidMachineConfigurationError(err error) bool { func (r *ReconcileMachine) setPhase(machine *machinev1.Machine, phase string, errorMessage string) error { if stringPointerDeref(machine.Status.Phase) != phase { klog.V(3).Infof("%v: going into phase %q", machine.GetName(), phase) + // A call to Patch will mutate our local copy of the machine to match what is stored in the API. // Before we make any changes to the status subresource on our local copy, we need to patch the object first, // otherwise our local changes to the status subresource will be lost. @@ -454,6 +456,15 @@ func (r *ReconcileMachine) setPhase(machine *machinev1.Machine, phase string, er klog.Errorf("Failed to update machine status %q: %v", machine.GetName(), err) return err } + + // Update the metric after everything else has succeeded to prevent duplicate + // entries when there are failures + if phase != phaseDeleting { + // Apart from deleting, update the transition metric + // Deleting would always end up in the infinite bucket + timeElapsed := time.Now().Sub(machine.GetCreationTimestamp().Time).Seconds() + metrics.MachinePhaseTransitionSeconds.With(map[string]string{"phase": phase}).Observe(timeElapsed) + } } return nil } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index ce70b98665..b77e1f4ca6 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -63,8 +63,21 @@ var ( ) ) +// Metrics for use in the Machine controller +var ( + // MachinePhaseTransitionSeconds is a metric to capute the time between a Machine being created and entering a particular phase + MachinePhaseTransitionSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "mapi_machine_phase_transition_seconds", + Help: "Number of seconds between Machine creation and Machine transition to a phase.", + Buckets: []float64{5, 10, 20, 30, 60, 90, 120, 180, 240, 300, 360, 480, 600}, + }, []string{"phase"}, + ) +) + func init() { prometheus.MustRegister(MachineCollectorUp) + metrics.Registry.MustRegister(MachinePhaseTransitionSeconds) metrics.Registry.MustRegister( failedInstanceCreateCount, failedInstanceUpdateCount,