operator-framework · everettraven · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023
@@ -33,6 +33,7 @@ import (
 	ctrl "sigs.k8s.io/controller-runtime"
 	"sigs.k8s.io/controller-runtime/pkg/healthz"
 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+	"sigs.k8s.io/controller-runtime/pkg/metrics"
 
 	"github.com/spf13/pflag"
 
@@ -41,6 +42,7 @@ import (
 	"github.com/operator-framework/catalogd/internal/version"
 	corecontrollers "github.com/operator-framework/catalogd/pkg/controllers/core"
 	"github.com/operator-framework/catalogd/pkg/features"
+	catalogdmetrics "github.com/operator-framework/catalogd/pkg/metrics"
 	"github.com/operator-framework/catalogd/pkg/profile"
 	"github.com/operator-framework/catalogd/pkg/storage"
 
@@ -124,24 +126,32 @@ func main() {
 		os.Exit(1)
 	}
 
-	if err := os.MkdirAll(storageDir, 0700); err != nil {
-		setupLog.Error(err, "unable to create storage directory for catalogs")
-	}
-	localStorage := storage.LocalDir{RootDir: storageDir}
-	shutdownTimeout := 30 * time.Second
-	catalogServer := server.Server{
-		Kind: "catalogs",
-		Server: &http.Server{
-			Addr:         catalogServerAddr,
-			Handler:      localStorage.StorageServerHandler(),
-			ReadTimeout:  5 * time.Second,
-			WriteTimeout: 10 * time.Second,
-		},
-		ShutdownTimeout: &shutdownTimeout,
-	}
-	if err := mgr.Add(&catalogServer); err != nil {
-		setupLog.Error(err, "unable to start catalog server")
-		os.Exit(1)
+	var localStorage storage.Instance
+	if features.CatalogdFeatureGate.Enabled(features.HTTPServer) {
+		metrics.Registry.MustRegister(catalogdmetrics.RequestDurationMetric)
+
+		if err := os.MkdirAll(storageDir, 0700); err != nil {
+			setupLog.Error(err, "unable to create storage directory for catalogs")
+			os.Exit(1)
+		}
+
+		localStorage = storage.LocalDir{RootDir: storageDir}
+		shutdownTimeout := 30 * time.Second
+		catalogServer := server.Server{
+			Kind: "catalogs",
+			Server: &http.Server{
+				Addr:         catalogServerAddr,
+				Handler:      catalogdmetrics.AddMetricsToHandler(localStorage.StorageServerHandler()),
+				ReadTimeout:  5 * time.Second,
+				WriteTimeout: 10 * time.Second,
+			},
+			ShutdownTimeout: &shutdownTimeout,
+		}
+
+		if err := mgr.Add(&catalogServer); err != nil {
+			setupLog.Error(err, "unable to start catalog server")
+			os.Exit(1)
+		}
 	}
 
 	if err = (&corecontrollers.CatalogReconciler{

@@ -10,6 +10,7 @@ require (
 	github.com/onsi/ginkgo/v2 v2.9.7
 	github.com/onsi/gomega v1.27.7
 	github.com/operator-framework/operator-registry v1.27.1
+	github.com/prometheus/client_golang v1.14.0
 	github.com/spf13/pflag v1.0.5
 	github.com/stretchr/testify v1.8.1
 	k8s.io/api v0.26.1
@@ -59,7 +60,6 @@ require (
 	github.com/operator-framework/api v0.17.4-0.20230223191600-0131a6301e42 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_golang v1.14.0 // indirect
 	github.com/prometheus/client_model v0.3.0 // indirect
 	github.com/prometheus/common v0.37.0 // indirect
 	github.com/prometheus/procfs v0.8.0 // indirect

@@ -0,0 +1,40 @@
+package metrics
+
+import (
+	"net/http"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+const (
+	RequestDurationMetricName = "catalogd_http_request_duration_seconds"
+)
+
+// Sets up the necessary metrics for calculating the Apdex Score
+// If using Grafana for visualization connected to a Prometheus data
+// source that is scraping these metrics, you can create a panel that
+// uses the following queries + expressions for calculating the Apdex Score where T = 0.5:
+// Query A: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="0.5"})
+// Query B: sum(catalogd_http_request_duration_seconds_bucket{code!~"5..",le="2"})
+// Query C: sum(catalogd_http_request_duration_seconds_count)
+// Expression for Apdex Score: ($A + (($B - $A) / 2)) / $C
+var (
+	RequestDurationMetric = prometheus.NewHistogramVec(
+		prometheus.HistogramOpts{
+			Name: RequestDurationMetricName,
+			Help: "Histogram of request duration in seconds",
+			// create a bucket for each 100 ms up to 1s and ensure it multiplied by 4 also exists.
+			// Include a 10s bucket to capture very long running requests. This allows us to easily
+			// calculate Apdex Scores up to a T of 1 second, but using various mathmatical formulas we
+			// should be able to estimate Apdex Scores up to a T of 2.5. Having a larger range of buckets
+			// will allow us to more easily calculate health indicators other than the Apdex Score.
+			Buckets: []float64{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.2, 1.6, 2, 2.4, 2.8, 3.2, 3.6, 4, 10},
+		},
+		[]string{"code"},
+	)
+)
+
+func AddMetricsToHandler(handler http.Handler) http.Handler {
+	return promhttp.InstrumentHandlerDuration(RequestDurationMetric, handler)
+}