From db0be70b2b9f76927f5ae1ee946ac622a3f95ba2 Mon Sep 17 00:00:00 2001 From: Ying Li Date: Mon, 25 Jun 2018 17:28:11 -0700 Subject: [PATCH] Fix racy batching on the dispatcher. Signed-off-by: Ying Li (cherry picked from commit 317ad86ad8fab82e8e3359d44407951c2e3cb4d2) Signed-off-by: Sebastiaan van Stijn --- manager/dispatcher/dispatcher.go | 7 +++++++ node/node.go | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/manager/dispatcher/dispatcher.go b/manager/dispatcher/dispatcher.go index fe3571b010..a343a543e2 100644 --- a/manager/dispatcher/dispatcher.go +++ b/manager/dispatcher/dispatcher.go @@ -285,9 +285,16 @@ func (d *Dispatcher) Run(ctx context.Context) error { publishManagers(ev.([]*api.Peer)) case <-d.processUpdatesTrigger: d.processUpdates(ctx) + batchTimer.Stop() + // drain the timer, if it has already expired + select { + case <-batchTimer.C: + default: + } batchTimer.Reset(maxBatchInterval) case <-batchTimer.C: d.processUpdates(ctx) + // batch timer has already expired, so no need to drain batchTimer.Reset(maxBatchInterval) case v := <-configWatcher: cluster := v.(api.EventUpdateCluster) diff --git a/node/node.go b/node/node.go index 0886ea9659..9845192c47 100644 --- a/node/node.go +++ b/node/node.go @@ -1000,6 +1000,11 @@ func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.Security // re-promoted. In this case, we must assume we were // re-promoted, and restart the manager. log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal") + + // We can safely reset this timer without stopping/draining the timer + // first because the only way the code has reached this point is if the timer + // has already expired - if the role changed or the context were canceled, + // then we would have returned already. timer.Reset(roleChangeTimeout) renewer.Renew()