From 9b67599b8da4a6777c12ece1fa36166aa98f708b Mon Sep 17 00:00:00 2001 From: Nishant Totla Date: Wed, 11 Oct 2017 14:19:14 -0700 Subject: [PATCH] Increase gRPC request timeout to 20 seconds when sending snapshots (cherry picked from commit e3e2821fe3eae707915b78215526da078d2d75a7) Signed-off-by: Nishant Totla --- manager/state/raft/raft.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/manager/state/raft/raft.go b/manager/state/raft/raft.go index b0b6bf2f29..afd0154167 100644 --- a/manager/state/raft/raft.go +++ b/manager/state/raft/raft.go @@ -165,8 +165,11 @@ type NodeOptions struct { ClockSource clock.Clock // SendTimeout is the timeout on the sending messages to other raft // nodes. Leave this as 0 to get the default value. - SendTimeout time.Duration - TLSCredentials credentials.TransportCredentials + SendTimeout time.Duration + // LargeSendTimeout is the timeout on the sending snapshots to other raft + // nodes. Leave this as 0 to get the default value. + LargeSendTimeout time.Duration + TLSCredentials credentials.TransportCredentials KeyRotator EncryptionKeyRotator } @@ -187,6 +190,11 @@ func NewNode(opts NodeOptions) *Node { if opts.SendTimeout == 0 { opts.SendTimeout = 2 * time.Second } + if opts.LargeSendTimeout == 0 { + // a "slow" 100Mbps connection can send over 240MB data in 20 seconds + // which is well over the gRPC message limit of 128MB allowed by SwarmKit + opts.LargeSendTimeout = 20 * time.Second + } raftStore := raft.NewMemoryStorage() @@ -1334,7 +1342,14 @@ func (n *Node) sendToMember(ctx context.Context, members map[uint64]*membership. defer close(thisSend) if lastSend != nil { - waitCtx, waitCancel := context.WithTimeout(ctx, n.opts.SendTimeout) + timeout := n.opts.SendTimeout + // if a snapshot is being sent, set timeout to LargeSendTimeout because + // sending snapshots can take more time than other messages sent between peers. + // The same applies to AppendEntries as well, where messages can get large. + if m.Type == raftpb.MsgSnap || m.Type == raftpb.MsgApp { + timeout = n.opts.LargeSendTimeout + } + waitCtx, waitCancel := context.WithTimeout(ctx, timeout) defer waitCancel() select {