From db0be70b2b9f76927f5ae1ee946ac622a3f95ba2 Mon Sep 17 00:00:00 2001
From: Ying Li <ying.li@docker.com>
Date: Mon, 25 Jun 2018 17:28:11 -0700
Subject: [PATCH] Fix racy batching on the dispatcher.

Signed-off-by: Ying Li <ying.li@docker.com>
(cherry picked from commit 317ad86ad8fab82e8e3359d44407951c2e3cb4d2)
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
---
 manager/dispatcher/dispatcher.go | 7 +++++++
 node/node.go                     | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/manager/dispatcher/dispatcher.go b/manager/dispatcher/dispatcher.go
index fe3571b010..a343a543e2 100644
--- a/manager/dispatcher/dispatcher.go
+++ b/manager/dispatcher/dispatcher.go
@@ -285,9 +285,16 @@ func (d *Dispatcher) Run(ctx context.Context) error {
 			publishManagers(ev.([]*api.Peer))
 		case <-d.processUpdatesTrigger:
 			d.processUpdates(ctx)
+			batchTimer.Stop()
+			// drain the timer, if it has already expired
+			select {
+			case <-batchTimer.C:
+			default:
+			}
 			batchTimer.Reset(maxBatchInterval)
 		case <-batchTimer.C:
 			d.processUpdates(ctx)
+			// batch timer has already expired, so no need to drain
 			batchTimer.Reset(maxBatchInterval)
 		case v := <-configWatcher:
 			cluster := v.(api.EventUpdateCluster)
diff --git a/node/node.go b/node/node.go
index 0886ea9659..9845192c47 100644
--- a/node/node.go
+++ b/node/node.go
@@ -1000,6 +1000,11 @@ func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.Security
 			// re-promoted. In this case, we must assume we were
 			// re-promoted, and restart the manager.
 			log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
+
+			// We can safely reset this timer without stopping/draining the timer
+			// first because the only way the code has reached this point is if the timer
+			// has already expired - if the role changed or the context were canceled,
+			// then we would have returned already.
 			timer.Reset(roleChangeTimeout)
 
 			renewer.Renew()