From 99a65a992799f1df218818e30d00a0049950e265 Mon Sep 17 00:00:00 2001 From: Drew Erny Date: Mon, 14 Jan 2019 12:36:48 -0600 Subject: [PATCH 1/2] Fix leaking task resources when nodes are deleted When a node is deleted, its tasks are asked to restart, which involves putting them into a desired state of Shutdown. However, the Allocator will not deallocate a task which is not in an actual state of a terminal state. Once a node is deleted, the only opportunity for its tasks to recieve updates and be moved to a terminal state is when the function moving those tasks to TaskStateOrphaned is called, 24 hours after the node enters the Down state. However, if a leadership change occurs, then that function will never be called, and the tasks will never be moved to a terminal state, leaking resources. With this change, upon node deletion, all of its tasks will be moved to TaskStateOrphaned, allowing those tasks' resources to be cleaned up. Additionally, as part of this backport, avoid using the gogo types.TimestampNow function, which does not exist in the vendored version. Signed-off-by: Drew Erny (cherry picked from commit 8467e6a43f9cee66f46efb457b2c22f46ae5da0b) Signed-off-by: Sebastiaan van Stijn --- manager/controlapi/node.go | 39 +++++++++++++++++++++------------ manager/controlapi/node_test.go | 16 +++++++------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/manager/controlapi/node.go b/manager/controlapi/node.go index e1fe3dec1d..47af78d9c6 100644 --- a/manager/controlapi/node.go +++ b/manager/controlapi/node.go @@ -3,6 +3,7 @@ package controlapi import ( "crypto/x509" "encoding/pem" + "time" "github.com/docker/swarmkit/api" "github.com/docker/swarmkit/manager/state/raft/membership" @@ -248,25 +249,35 @@ func (s *Server) UpdateNode(ctx context.Context, request *api.UpdateNodeRequest) }, nil } -func removeNodeAttachments(tx store.Tx, nodeID string) error { - // orphan the node's attached containers. if we don't do this, the - // network these attachments are connected to will never be removeable +func orphanNodeTasks(tx store.Tx, nodeID string) error { + // when a node is deleted, all of its tasks are irrecoverably removed. + // additionally, the Dispatcher can no longer be relied on to update the + // task status. Therefore, when the node is removed, we must additionally + // move all of its assigned tasks to the Orphaned state, so that their + // resources can be cleaned up. tasks, err := store.FindTasks(tx, store.ByNodeID(nodeID)) if err != nil { return err } for _, task := range tasks { - // if the task is an attachment, then we just delete it. the allocator - // will do the heavy lifting. basically, GetAttachment will return the - // attachment if that's the kind of runtime, or nil if it's not. - if task.Spec.GetAttachment() != nil { - // don't delete the task. instead, update it to `ORPHANED` so that - // the taskreaper will clean it up. - task.Status.State = api.TaskStateOrphaned - if err := store.UpdateTask(tx, task); err != nil { - return err - } + // this code was backported from a later version. in the later version, + // gogotypes has a function TimestampNow, which returns a + // gogotypes.Timestamp for the current time. this version of the + // swarmkit code uses an earlier version of gogotypes, which means we + // don't have access to that function. however, this code is + // esssentially equivalent. + // + // we're ignoring the error value of TimestampProto, because it's + // probably a catastrophic error if time.Now doesn't return a valid + // time, and gogotypes.TimestampNow just panics if the conversion + // fails anyway. + timestamp, _ := gogotypes.TimestampProto(time.Now()) + task.Status = api.TaskStatus{ + Timestamp: timestamp, + State: api.TaskStateOrphaned, + Message: "Task belonged to a node that has been deleted", } + store.UpdateTask(tx, task) } return nil } @@ -336,7 +347,7 @@ func (s *Server) RemoveNode(ctx context.Context, request *api.RemoveNodeRequest) return err } - if err := removeNodeAttachments(tx, request.NodeID); err != nil { + if err := orphanNodeTasks(tx, request.NodeID); err != nil { return err } diff --git a/manager/controlapi/node_test.go b/manager/controlapi/node_test.go index 603b72b894..fc0770060a 100644 --- a/manager/controlapi/node_test.go +++ b/manager/controlapi/node_test.go @@ -733,9 +733,7 @@ func TestUpdateNodeDemote(t *testing.T) { } // TestRemoveNodeAttachments tests the unexported removeNodeAttachments -// function. This avoids us having to update the TestRemoveNodes function to -// test all of this logic -func TestRemoveNodeAttachments(t *testing.T) { +func TestOrphanNodeTasks(t *testing.T) { // first, set up a store and all that ts := newTestServer(t) defer ts.Stop() @@ -879,22 +877,24 @@ func TestRemoveNodeAttachments(t *testing.T) { // Now, call the function with our nodeID. make sure it returns no error err = ts.Store.Update(func(tx store.Tx) error { - return removeNodeAttachments(tx, "id2") + return orphanNodeTasks(tx, "id2") }) require.NoError(t, err) - // Now, make sure only task1, the network-attacahed task on id2, was - // removed + // Now, make sure only tasks 1 and 3, the tasks on the node we're deleting + // removed, are removed ts.Store.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.All) require.NoError(t, err) // should only be 3 tasks left require.Len(t, tasks, 4) - // and the list should not contain task1 + // and the list should not contain task1 or task2 for _, task := range tasks { require.NotNil(t, task) - if task.ID == "task1" { + if task.ID == "task1" || task.ID == "task3" { require.Equal(t, task.Status.State, api.TaskStateOrphaned) + } else { + require.NotEqual(t, task.Status.State, api.TaskStateOrphaned) } } }) From 847a883dfd5a23bc81a7422bb73d292706660bf5 Mon Sep 17 00:00:00 2001 From: Drew Erny Date: Mon, 24 Jun 2019 15:55:30 -0500 Subject: [PATCH 2/2] Only update non-terminal tasks on node removal. When a node is removed, its tasks are set in state ORPHANED. This does not need to be done for tasks that are already in a terminal state, and if all tasks in all states are updated, the size of the transaction may grow too large to process, and node removal becomes impossible. This changes to only set non-terminal tasks to state ORPHANED, and terminal tasks are left alone. Cherry pick does not apply cleanly, but the fix is rather simple. Signed-off-by: Drew Erny (cherry picked from commit d5df26594f5b74a9de41e376206c36abffc961fe) Signed-off-by: Drew Erny --- manager/controlapi/node.go | 43 +++++++++++++++++++++------------ manager/controlapi/node_test.go | 28 ++++++++++++++++++--- 2 files changed, 51 insertions(+), 20 deletions(-) diff --git a/manager/controlapi/node.go b/manager/controlapi/node.go index 47af78d9c6..68bd69dbf3 100644 --- a/manager/controlapi/node.go +++ b/manager/controlapi/node.go @@ -260,24 +260,35 @@ func orphanNodeTasks(tx store.Tx, nodeID string) error { return err } for _, task := range tasks { - // this code was backported from a later version. in the later version, - // gogotypes has a function TimestampNow, which returns a - // gogotypes.Timestamp for the current time. this version of the - // swarmkit code uses an earlier version of gogotypes, which means we - // don't have access to that function. however, this code is - // esssentially equivalent. + // this operation must occur within the same transaction boundary. If + // we cannot accomplish this task orphaning in the same transaction, we + // could crash or die between transactions and not get a chance to do + // this. however, in cases were there is an exceptionally large number + // of tasks for a node, this may cause the transaction to exceed the + // max message size. // - // we're ignoring the error value of TimestampProto, because it's - // probably a catastrophic error if time.Now doesn't return a valid - // time, and gogotypes.TimestampNow just panics if the conversion - // fails anyway. - timestamp, _ := gogotypes.TimestampProto(time.Now()) - task.Status = api.TaskStatus{ - Timestamp: timestamp, - State: api.TaskStateOrphaned, - Message: "Task belonged to a node that has been deleted", + // therefore, we restrict updating to only tasks in a non-terminal + // state. Tasks in a terminal state do not need to be updated. + if task.Status.State < api.TaskStateCompleted { + // this code was backported from a later version. in the later version, + // gogotypes has a function TimestampNow, which returns a + // gogotypes.Timestamp for the current time. this version of the + // swarmkit code uses an earlier version of gogotypes, which means we + // don't have access to that function. however, this code is + // esssentially equivalent. + // + // we're ignoring the error value of TimestampProto, because it's + // probably a catastrophic error if time.Now doesn't return a valid + // time, and gogotypes.TimestampNow just panics if the conversion + // fails anyway. + timestamp, _ := gogotypes.TimestampProto(time.Now()) + task.Status = api.TaskStatus{ + Timestamp: timestamp, + State: api.TaskStateOrphaned, + Message: "Task belonged to a node that has been deleted", + } + store.UpdateTask(tx, task) } - store.UpdateTask(tx, task) } return nil } diff --git a/manager/controlapi/node_test.go b/manager/controlapi/node_test.go index fc0770060a..6f0a2a6498 100644 --- a/manager/controlapi/node_test.go +++ b/manager/controlapi/node_test.go @@ -732,7 +732,7 @@ func TestUpdateNodeDemote(t *testing.T) { testUpdateNodeDemote(t) } -// TestRemoveNodeAttachments tests the unexported removeNodeAttachments +// TestRemoveNodeAttachments tests the unexported orphanNodeTasks func TestOrphanNodeTasks(t *testing.T) { // first, set up a store and all that ts := newTestServer(t) @@ -871,7 +871,28 @@ func TestOrphanNodeTasks(t *testing.T) { }, }, } - return store.CreateTask(tx, task4) + if err := store.CreateTask(tx, task4); err != nil { + return err + } + + // 5.) A regular task that's already in a terminal state on the node, + // which does not need to be updated. + task5 := &api.Task{ + ID: "task5", + NodeID: "id2", + DesiredState: api.TaskStateRunning, + Status: api.TaskStatus{ + // use TaskStateCompleted, as this is the earliest terminal + // state (this ensures we don't actually use <= instead of <) + State: api.TaskStateCompleted, + }, + Spec: api.TaskSpec{ + Runtime: &api.TaskSpec_Container{ + Container: &api.ContainerSpec{}, + }, + }, + } + return store.CreateTask(tx, task5) }) require.NoError(t, err) @@ -886,8 +907,7 @@ func TestOrphanNodeTasks(t *testing.T) { ts.Store.View(func(tx store.ReadTx) { tasks, err := store.FindTasks(tx, store.All) require.NoError(t, err) - // should only be 3 tasks left - require.Len(t, tasks, 4) + require.Len(t, tasks, 5) // and the list should not contain task1 or task2 for _, task := range tasks { require.NotNil(t, task)