From e91352325579da2af724033599bc391eb7269956 Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Fri, 9 Jun 2017 16:13:48 -0700 Subject: [PATCH] allocator: Retry failed allocations immediately upon a deallocation We retry failed allocations every 5 minutes. If something else gets deallocated, we should trigger the retry immediately in case the allocations were failing due to IP exhaustion, and the deallocation freed up an IP. Signed-off-by: Aaron Lehmann --- manager/allocator/network.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/manager/allocator/network.go b/manager/allocator/network.go index 83309c20c0..4055f3be36 100644 --- a/manager/allocator/network.go +++ b/manager/allocator/network.go @@ -59,6 +59,12 @@ type networkContext struct { // lastRetry is the last timestamp when unallocated // tasks/services/networks were retried. lastRetry time.Time + + // somethingWasDeallocated indicates that we just deallocated at + // least one service/task/network, so we should retry failed + // allocations (in we are experiencing IP exhaustion and an IP was + // released). + somethingWasDeallocated bool } func (a *Allocator) doNetworkInit(ctx context.Context) (err error) { @@ -305,6 +311,8 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) { // resources. if err := nc.nwkAllocator.Deallocate(n); err != nil { log.G(ctx).WithError(err).Errorf("Failed during network free for network %s", n.ID) + } else { + nc.somethingWasDeallocated = true } delete(nc.unallocatedNetworks, n.ID) @@ -371,6 +379,8 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) { if err := nc.nwkAllocator.DeallocateService(s); err != nil { log.G(ctx).WithError(err).Errorf("Failed deallocation during delete of service %s", s.ID) + } else { + nc.somethingWasDeallocated = true } // Remove it from unallocatedServices just in case @@ -383,11 +393,12 @@ func (a *Allocator) doNetworkAlloc(ctx context.Context, ev events.Event) { case state.EventCommit: a.procTasksNetwork(ctx, false) - if time.Since(nc.lastRetry) > retryInterval { + if time.Since(nc.lastRetry) > retryInterval || nc.somethingWasDeallocated { a.procUnallocatedNetworks(ctx) a.procUnallocatedServices(ctx) a.procTasksNetwork(ctx, true) nc.lastRetry = time.Now() + nc.somethingWasDeallocated = false } // Any left over tasks are moved to the unallocated set @@ -432,6 +443,8 @@ func (a *Allocator) doNodeAlloc(ctx context.Context, ev events.Event) { if nc.nwkAllocator.IsNodeAllocated(node) { if err := nc.nwkAllocator.DeallocateNode(node); err != nil { log.G(ctx).WithError(err).Errorf("Failed freeing network resources for node %s", node.ID) + } else { + nc.somethingWasDeallocated = true } } return @@ -522,6 +535,8 @@ func (a *Allocator) deallocateNodes(ctx context.Context) error { if nc.nwkAllocator.IsNodeAllocated(node) { if err := nc.nwkAllocator.DeallocateNode(node); err != nil { log.G(ctx).WithError(err).Errorf("Failed freeing network resources for node %s", node.ID) + } else { + nc.somethingWasDeallocated = true } node.Attachment = nil if err := a.store.Batch(func(batch *store.Batch) error { @@ -638,12 +653,15 @@ func (a *Allocator) doTaskAlloc(ctx context.Context, ev events.Event) { if nc.nwkAllocator.IsTaskAllocated(t) { if err := nc.nwkAllocator.DeallocateTask(t); err != nil { log.G(ctx).WithError(err).Errorf("Failed freeing network resources for task %s", t.ID) + } else { + nc.somethingWasDeallocated = true } } // Cleanup any task references that might exist delete(nc.pendingTasks, t.ID) delete(nc.unallocatedTasks, t.ID) + return } @@ -778,6 +796,7 @@ func (a *Allocator) allocateService(ctx context.Context, s *api.Service) error { if err := nc.nwkAllocator.DeallocateService(s); err != nil { return err } + nc.somethingWasDeallocated = true } if err := nc.nwkAllocator.AllocateService(s); err != nil {