diff --git a/cmd/machine-healthcheck/main.go b/cmd/machine-healthcheck/main.go index 4412924ec6..b595f4c54d 100644 --- a/cmd/machine-healthcheck/main.go +++ b/cmd/machine-healthcheck/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "runtime" + "time" "github.com/openshift/machine-api-operator/pkg/controller/machinehealthcheck" @@ -16,6 +17,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader electrion operations. +var ( + leaseDuration = 120 * time.Second + renewDealine = 110 * time.Second + retryPeriod = 90 * time.Second +) + func printVersion() { glog.Infof("Go Version: %s", runtime.Version()) glog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH) @@ -23,7 +31,30 @@ func printVersion() { } func main() { - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + leaseDuration, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Parse() printVersion() @@ -35,8 +66,16 @@ func main() { opts := manager.Options{ // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: "0", + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-healthcheck-leader", + LeaseDuration: leaderElectLeaseDuration, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDealine, } + if *watchNamespace != "" { opts.Namespace = *watchNamespace glog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace) diff --git a/cmd/machineset/main.go b/cmd/machineset/main.go index 590e4a4ac4..2448eb3a33 100644 --- a/cmd/machineset/main.go +++ b/cmd/machineset/main.go @@ -31,12 +31,37 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader electrion operations. +var ( + leaseDuration = 120 * time.Second + renewDealine = 110 * time.Second + retryPeriod = 90 * time.Second +) + func main() { flag.Set("logtostderr", "true") klog.InitFlags(nil) watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile cluster-api objects. If unspecified, the controller watches for cluster-api objects across all namespaces.") + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + leaseDuration, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Parse() if *watchNamespace != "" { log.Printf("Watching cluster-api objects only in namespace %q for reconciliation.", *watchNamespace) @@ -52,9 +77,16 @@ func main() { syncPeriod := 10 * time.Minute opts := manager.Options{ // Disable metrics serving - MetricsBindAddress: "0", - SyncPeriod: &syncPeriod, - Namespace: *watchNamespace, + MetricsBindAddress: "0", + SyncPeriod: &syncPeriod, + Namespace: *watchNamespace, + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-machineset-leader", + LeaseDuration: leaderElectLeaseDuration, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDealine, } mgr, err := manager.New(cfg, opts) if err != nil { diff --git a/cmd/nodelink-controller/main.go b/cmd/nodelink-controller/main.go index ca0d0242f7..28367dc825 100644 --- a/cmd/nodelink-controller/main.go +++ b/cmd/nodelink-controller/main.go @@ -3,6 +3,7 @@ package main import ( "flag" "runtime" + "time" mapiv1 "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" "github.com/openshift/machine-api-operator/pkg/controller" @@ -14,6 +15,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader electrion operations. +var ( + leaseDuration = 120 * time.Second + renewDealine = 110 * time.Second + retryPeriod = 90 * time.Second +) + func printVersion() { klog.Infof("Go Version: %s", runtime.Version()) klog.Infof("Go OS/Arch: %s/%s", runtime.GOOS, runtime.GOARCH) @@ -23,7 +31,30 @@ func printVersion() { func main() { printVersion() - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + leaseDuration, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + klog.InitFlags(nil) flag.Set("logtostderr", "true") flag.Parse() @@ -36,7 +67,14 @@ func main() { opts := manager.Options{ // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: "0", + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-nodelink-leader", + LeaseDuration: leaderElectLeaseDuration, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDealine, } if *watchNamespace != "" { opts.Namespace = *watchNamespace diff --git a/cmd/vsphere/main.go b/cmd/vsphere/main.go index 4e916a750f..689a9c99b4 100644 --- a/cmd/vsphere/main.go +++ b/cmd/vsphere/main.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "os" + "time" configv1 "github.com/openshift/api/config/v1" "github.com/openshift/machine-api-operator/pkg/apis/machine/v1beta1" @@ -17,12 +18,42 @@ import ( "sigs.k8s.io/controller-runtime/pkg/runtime/signals" ) +// The default durations for the leader electrion operations. +var ( + leaseDuration = 120 * time.Second + renewDealine = 110 * time.Second + retryPeriod = 90 * time.Second +) + func main() { var printVersion bool flag.BoolVar(&printVersion, "version", false, "print version and exit") klog.InitFlags(nil) - watchNamespace := flag.String("namespace", "", "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.") + watchNamespace := flag.String( + "namespace", + "", + "Namespace that the controller watches to reconcile machine-api objects. If unspecified, the controller watches for machine-api objects across all namespaces.", + ) + + leaderElectResourceNamespace := flag.String( + "leader-elect-resource-namespace", + "", + "The namespace of resource object that is used for locking during leader election. If unspecified and running in cluster, defaults to the service account namespace for the controller. Required for leader-election outside of a cluster.", + ) + + leaderElect := flag.Bool( + "leader-elect", + false, + "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.", + ) + + leaderElectLeaseDuration := flag.Duration( + "leader-elect-lease-duration", + leaseDuration, + "The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled.", + ) + flag.Set("logtostderr", "true") flag.Parse() @@ -35,8 +66,16 @@ func main() { opts := manager.Options{ // Disable metrics serving - MetricsBindAddress: "0", + MetricsBindAddress: "0", + LeaderElection: *leaderElect, + LeaderElectionNamespace: *leaderElectResourceNamespace, + LeaderElectionID: "cluster-api-provider-vsphere-leader", + LeaseDuration: leaderElectLeaseDuration, + // Slow the default retry and renew election rate to reduce etcd writes at idle: BZ 1858400 + RetryPeriod: &retryPeriod, + RenewDeadline: &renewDealine, } + if *watchNamespace != "" { opts.Namespace = *watchNamespace klog.Infof("Watching machine-api objects only in namespace %q for reconciliation.", opts.Namespace) diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 00b3058951..1355bd6bc1 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -313,6 +313,8 @@ func newContainers(config *OperatorConfig, features map[string]bool) []corev1.Co args := []string{ "--logtostderr=true", "--v=3", + "--leader-elect=true", + "--leader-elect-lease-duration=120s", fmt.Sprintf("--namespace=%s", config.TargetNamespace), }