From 169c415345c97c76160f1fe7e3f6402591fd66f8 Mon Sep 17 00:00:00 2001 From: Antonio Nesic Date: Wed, 25 Feb 2026 15:50:57 +0100 Subject: [PATCH 1/2] fix: start health server before manager init to prevent 503 on upgrades Move HealthManager creation and health server startup to before ctrl.NewManager() so K8s probes are answered immediately, preventing connection refused and 503 errors during slow initialization. --- cmd/main.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cmd/main.go b/cmd/main.go index 7128e310..edd44366 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -101,6 +101,31 @@ func main() { tlsOpts = append(tlsOpts, disableHTTP2) } + // Create HealthManager and start health server early so probes are + // answered immediately, before the (potentially slow) manager and + // controller initialisation. + healthManager := health.NewHealthManager() + healthManager.Register(health.ComponentCollectorManager) + healthManager.Register(health.ComponentBufferQueue) + healthManager.Register(health.ComponentDakrTransport) + healthManager.Register(health.ComponentMpaServer) + healthManager.Register(health.ComponentPrometheus) + + healthServer := health.NewHealthServer(healthManager, probeAddr) + if err := healthServer.Start(); err != nil { + setupLog.Error(err, "unable to start health server") + os.Exit(1) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := healthServer.Stop(ctx); err != nil { + setupLog.Error(err, "error stopping health server") + } + }() + + setupLog.Info("health server started, initializing manager") + webhookServer := webhook.NewServer(webhook.Options{ TLSOpts: tlsOpts, }) @@ -190,6 +215,9 @@ func main() { os.Exit(1) } + // No need to add the standard controller with kubebuilder:scaffold:builder + // The env-based controller doesn't rely on CRDs + setupLog.Info("starting manager") if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { setupLog.Error(err, "problem running manager") From 5e3a8c09ad81c77356af2e5d489ac8651655159c Mon Sep 17 00:00:00 2001 From: Antonio Nesic Date: Thu, 26 Feb 2026 11:12:23 +0100 Subject: [PATCH 2/2] Remove the dobule initialization --- cmd/main.go | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index edd44366..369d2dbc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -111,6 +111,10 @@ func main() { healthManager.Register(health.ComponentMpaServer) healthManager.Register(health.ComponentPrometheus) + // Allow 2 minutes for the controller to win leader election and start + // reconciling before enforcing readiness checks. + healthManager.SuppressReadiness(2 * time.Minute) + healthServer := health.NewHealthServer(healthManager, probeAddr) if err := healthServer.Start(); err != nil { setupLog.Error(err, "unable to start health server") @@ -148,35 +152,6 @@ func main() { metricsServerOptions.FilterProvider = filters.WithAuthenticationAndAuthorization } - // Initialize HealthManager and register components - healthManager := health.NewHealthManager() - healthManager.Register(health.ComponentCollectorManager) - healthManager.Register(health.ComponentBufferQueue) - healthManager.Register(health.ComponentDakrTransport) - healthManager.Register(health.ComponentMpaServer) - healthManager.Register(health.ComponentPrometheus) - - // Allow 2 minutes for the controller to win leader election and start - // reconciling before enforcing readiness checks. - healthManager.SuppressReadiness(2 * time.Minute) - - // No need to add the standard controller with kubebuilder:scaffold:builder - // The env-based controller doesn't rely on CRDs - - // New health server from health package - healthServer := health.NewHealthServer(healthManager, probeAddr) - if err := healthServer.Start(); err != nil { - setupLog.Error(err, "unable to start health server") - os.Exit(1) - } - defer func() { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := healthServer.Stop(ctx); err != nil { - setupLog.Error(err, "error stopping health server") - } - }() - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, Metrics: metricsServerOptions,