From f5359b5a96efbf282f9bc91277f5290312b6b8e6 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Mon, 27 Apr 2020 14:45:36 +0200 Subject: [PATCH 1/5] Add Leader election flags for nodelink --- cmd/nodelink-controller/main.go | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/cmd/nodelink-controller/main.go b/cmd/nodelink-controller/main.go index ca0d0242f7..73bae14dac 100644 --- a/cmd/nodelink-controller/main.go +++ b/cmd/nodelink-controller/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "runtime" + "time" mapiv1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" "github.com/openshift/machine-api-operator/pkg/controller" @@ -23,7 +24,30 @@ func printVersion() { func main() { printVersion() - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + klog.InitFlags(nil) flag.Set("logtostderr", "true") flag.Parse() @@ -36,7 +60,11 @@ func main() { opts := manager.Options{ // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: "0", + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-nodelink-leader", + LeaseDuration: leaderElectLeaseDuration, } if *watchNamespace != "" { opts.Namespace = *watchNamespace From 4a5da4de3f09a680de0f4c2b248d47bda7e3bb42 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Thu, 2 Jul 2020 13:24:35 +0200 Subject: [PATCH 2/5] Add Leader election flags for vsphere --- cmd/vsphere/main.go | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/cmd/vsphere/main.go b/cmd/vsphere/main.go index 1b44c4a6e3..5a9758739b 100644 --- a/cmd/vsphere/main.go +++ b/cmd/vsphere/main.go @@ -25,8 +25,36 @@ func main() { flag.BoolVar(&printVersion, "version", false, "print version and exit") klog.InitFlags(nil) - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") - metricsAddress := flag.String("metrics-bind-address", metrics.DefaultMachineMetricsAddress, "Address for hosting metrics") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + + metricsAddress := flag.String( + "metrics-bind-address", + metrics.DefaultMachineMetricsAddress, + "Address for hosting metrics", + ) + flag.Set("logtostderr", "true") healthAddr := flag.String( "health-addr", @@ -44,10 +72,15 @@ func main() { syncPeriod := 10 * time.Minute opts := manager.Options{ - MetricsBindAddress: *metricsAddress, - HealthProbeBindAddress: *healthAddr, - SyncPeriod: &syncPeriod, + MetricsBindAddress: *metricsAddress, + HealthProbeBindAddress: *healthAddr, + SyncPeriod: &syncPeriod, + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-vsphere-leader", + LeaseDuration: leaderElectLeaseDuration, } + if *watchNamespace != "" { opts.Namespace = *watchNamespace klog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace) From 5fa69d5ca176b3911a07a40fd4db453cb32525f8 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Thu, 2 Jul 2020 13:29:19 +0200 Subject: [PATCH 3/5] Add Leader election flags for machine health checks --- cmd/machine-healthcheck/main.go | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/cmd/machine-healthcheck/main.go b/cmd/machine-healthcheck/main.go index cb8ae2be96..c42467dbb7 100644 --- a/cmd/machine-healthcheck/main.go +++ b/cmd/machine-healthcheck/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "runtime" + "time" "github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck" "github.com/openshift/machine-api-operator/pkg/metrics" @@ -44,6 +45,24 @@ func main() { "The address for health checking.", ) + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Parse() printVersion() @@ -54,9 +73,14 @@ func main() { } opts := manager.Options{ - MetricsBindAddress: *metricsAddress, - HealthProbeBindAddress: *healthAddr, + MetricsBindAddress: *metricsAddress, + HealthProbeBindAddress: *healthAddr, + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-healthcheck-leader", + LeaseDuration: leaderElectLeaseDuration, } + if *watchNamespace != "" { opts.Namespace = *watchNamespace glog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace) From 1517638df9bc7b0e4bbc68809be5ae2c37f8dfba Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Thu, 2 Jul 2020 13:32:40 +0200 Subject: [PATCH 4/5] Add Leader election flags for machineSet controller --- cmd/machineset/main.go | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/cmd/machineset/main.go b/cmd/machineset/main.go index 08d0c52911..535034c32f 100644 --- a/cmd/machineset/main.go +++ b/cmd/machineset/main.go @@ -61,6 +61,24 @@ func main() { "The address for health checking.", ) + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + 15*time.Second, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Parse() if *watchNamespace != "" { log.Printf("Watching cluster-api objects only in namespace %q for reconciliation.", *watchNamespace) @@ -76,10 +94,14 @@ func main() { // Create a new Cmd to provide shared dependencies and start components syncPeriod := 10 * time.Minute opts := manager.Options{ - MetricsBindAddress: *metricsAddress, - SyncPeriod: &syncPeriod, - Namespace: *watchNamespace, - HealthProbeBindAddress: *healthAddr, + MetricsBindAddress: *metricsAddress, + SyncPeriod: &syncPeriod, + Namespace: *watchNamespace, + HealthProbeBindAddress: *healthAddr, + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-machineset-leader", + LeaseDuration: leaderElectLeaseDuration, } mgr, err := manager.New(cfg, opts) From f0929b94323d845762ed045c0e8859948cca4fd3 Mon Sep 17 00:00:00 2001 From: Danil-Grigorev Date: Fri, 3 Jul 2020 10:54:27 +0200 Subject: [PATCH 5/5] Set leader-election for all controllers enabled Using leader election by default will add stronger guarantees than we have today that only one controller is running at a time to protect against edge cases where the deployment replica could be increased or upgrades with permissive maxSurge. Relevant provider PRs: - https://github.com/openshift/cluster-api-provider-gcp/pull/85 - https://github.com/openshift/cluster-api-provider-aws/pull/315 - https://github.com/openshift/cluster-api-provider-azure/pull/122 - https://github.com/openshift/cluster-api-provider-openstack/pull/108 - https://github.com/openshift/cluster-api-provider-baremetal/pull/81 - https://github.com/openshift/cluster-api-provider-ovirt/pull/55 - https://github.com/openshift/machine-api-operator/pull/571 --- pkg/operator/sync.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 4dc6e67fd7..c58d1b6758 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -368,6 +368,7 @@ func newContainers(config *OperatorConfig, features map[string]bool) []corev1.Co args := []string{ "--logtostderr=true", "--v=3", + "--leader-elect=true", fmt.Sprintf("--namespace=%s", config.TargetNamespace), }