diff --git a/cmd/swarmd/main.go b/cmd/swarmd/main.go index d5ffec0b3d..048c8d8531 100644 --- a/cmd/swarmd/main.go +++ b/cmd/swarmd/main.go @@ -290,7 +290,7 @@ func init() { mainCmd.Flags().String("generic-node-resources", "", "user defined resources (e.g. fpga=2,gpu=UUID1,gpu=UUID2,gpu=UUID3)") mainCmd.Flags().Bool("force-new-cluster", false, "Force the creation of a new cluster from data directory") mainCmd.Flags().Uint32("heartbeat-tick", 1, "Defines the heartbeat interval (in seconds) for raft member health-check") - mainCmd.Flags().Uint32("election-tick", 3, "Defines the amount of ticks (in seconds) needed without a Leader to trigger a new election") + mainCmd.Flags().Uint32("election-tick", 10, "Defines the amount of ticks (in seconds) needed without a Leader to trigger a new election") mainCmd.Flags().Var(&externalCAOpt, "external-ca", "Specifications of one or more certificate signing endpoints") mainCmd.Flags().Bool("autolock", false, "Require an unlock key in order to start a manager once it's been stopped") mainCmd.Flags().String("unlock-key", "", "Unlock this manager using this key") diff --git a/manager/state/raft/raft.go b/manager/state/raft/raft.go index a707913682..3e3410fe04 100644 --- a/manager/state/raft/raft.go +++ b/manager/state/raft/raft.go @@ -474,8 +474,11 @@ func (n *Node) joinCluster(ctx context.Context) error { // raft node that can be modified and customized func DefaultNodeConfig() *raft.Config { return &raft.Config{ - HeartbeatTick: 1, - ElectionTick: 3, + HeartbeatTick: 1, + // Recommended value in etcd/raft is 10 x (HeartbeatTick). + // Lower values were seen to have caused instability because of + // frequent leader elections when running on flakey networks. + ElectionTick: 10, MaxSizePerMsg: math.MaxUint16, MaxInflightMsgs: 256, Logger: log.L, @@ -489,8 +492,11 @@ func DefaultRaftConfig() api.RaftConfig { KeepOldSnapshots: 0, SnapshotInterval: 10000, LogEntriesForSlowFollowers: 500, - ElectionTick: 3, - HeartbeatTick: 1, + // Recommended value in etcd/raft is 10 x (HeartbeatTick). + // Lower values were seen to have caused instability because of + // frequent leader elections when running on flakey networks. + HeartbeatTick: 1, + ElectionTick: 10, } }