From 57d3eb5956188a191f9bb8da1752d3d47c6e673d Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Fri, 3 Jul 2020 13:29:39 +0200 Subject: [PATCH 1/2] Add Leader election flags for machine controller --- cmd/manager/main.go | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 56f2414a1c..ce3e152489 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -19,6 +19,7 @@ package main import ( "flag" "log" + "time" machinev1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" "k8s.io/klog" @@ -32,7 +33,29 @@ import ( func main() { flag.Set("logtostderr", "true") - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) klog.InitFlags(nil) flag.Parse() @@ -43,7 +66,12 @@ func main() { } // Setup a Manager - opts := manager.Options{} + opts := manager.Options{ + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-openstack-leader", + LeaseDuration: leaderElectLeaseDuration, + } if *watchNamespace != "" { opts.Namespace = *watchNamespace klog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace) From d234337c3aacfceabcfe255caee3dcefc98def51 Mon Sep 17 00:00:00 2001 From: Michael McCune Date: Tue, 25 Aug 2020 10:42:10 -0400 Subject: [PATCH 2/2] Slow the default lease retry and renew rate for machine controller Prevent machine controllers from writing in etcd at idle too often by setting 120s lease, 20s retry and 110s deadline on all renewals. Higher values cause tests to flake. --- cmd/manager/main.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index ce3e152489..7b93882657 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -30,6 +30,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader election operations. +var ( + leaseDuration = 120 * time.Second + renewDeadline = 110 * time.Second + retryPeriod = 20 * time.Second +) + func main() { flag.Set("logtostderr", "true") @@ -53,7 +60,7 @@ func main() { leaderElectLeaseDuration := flag.Duration( "leader-elect-lease-duration", - 15*time.Second, + leaseDuration, "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", ) klog.InitFlags(nil) @@ -71,6 +78,9 @@ func main() { LeaderElectionNamespace: *leaderElectResourceNamespace, LeaderElectionID: "cluster-api-provider-openstack-leader", LeaseDuration: leaderElectLeaseDuration, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDeadline, } if *watchNamespace != "" { opts.Namespace = *watchNamespace