From 43bbe90c5ed0c8a2aa1f012d1cda2d6270afa7df Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Fri, 21 Jul 2017 14:47:11 -0700 Subject: [PATCH] orchestrator: Flag tasks that shouldn't be restarted Previously, restart conditions other than "OnAny" were honored on a best-effort basis. A service-level reconciliation, for example after a leader election, would see that not enough tasks were running, and start replacement tasks regardless of the restart policy. This limited the usefulness of the other restart conditions. This change adds a DontRestart flag to Task. It can be set by the restart supervisor when it shuts down a task and decides not to start a replacement task. The orchestrators look for the presence of this flag and honor it when doing service-level reconciliation. If the flag is set, the dead task is passed to the updater along with the running tasks, so the updater can start a replacement if and only if the service definition has changed relative to the dead task. The task reaper has been modified so it will never delete the last task in a slot, if that task has the DontRestart flag set. Signed-off-by: Aaron Lehmann --- api/objects.pb.go | 232 +++++++++++------- api/objects.proto | 6 + manager/orchestrator/global/global.go | 9 +- manager/orchestrator/global/global_test.go | 28 ++- .../orchestrator/replicated/restart_test.go | 203 +++++++++------ .../orchestrator/replicated/update_test.go | 4 - manager/orchestrator/restart/restart.go | 13 +- manager/orchestrator/slot.go | 2 +- .../orchestrator/taskreaper/task_reaper.go | 11 +- manager/orchestrator/update/updater.go | 73 ++++-- 10 files changed, 376 insertions(+), 205 deletions(-) diff --git a/api/objects.pb.go b/api/objects.pb.go index b6a9389a6d..ea22ce58f0 100644 --- a/api/objects.pb.go +++ b/api/objects.pb.go @@ -185,6 +185,11 @@ type Task struct { // TaskStateShutdown if the manager wants to terminate the task. This field // is only written by the manager. DesiredState TaskState `protobuf:"varint,10,opt,name=desired_state,json=desiredState,proto3,enum=docker.swarmkit.v1.TaskState" json:"desired_state,omitempty"` + // DontRestart indicates that the restart supervisor decided not to + // start a replacement task for this task. This flag records the + // decision so that orchestrators can honor it when they do + // service-level reconciliation. + DontRestart bool `protobuf:"varint,16,opt,name=dont_restart,json=dontRestart,proto3" json:"dont_restart,omitempty"` // List of network attachments by the task. Networks []*NetworkAttachment `protobuf:"bytes,11,rep,name=networks" json:"networks,omitempty"` // A copy of runtime state of service endpoint from Service @@ -1161,6 +1166,18 @@ func (m *Task) MarshalTo(dAtA []byte) (int, error) { i += n } } + if m.DontRestart { + dAtA[i] = 0x80 + i++ + dAtA[i] = 0x1 + i++ + if m.DontRestart { + dAtA[i] = 1 + } else { + dAtA[i] = 0 + } + i++ + } return i, nil } @@ -1798,6 +1815,9 @@ func (m *Task) Size() (n int) { n += 1 + l + sovObjects(uint64(l)) } } + if m.DontRestart { + n += 3 + } return n } @@ -4447,6 +4467,7 @@ func (this *Task) String() string { `LogDriver:` + strings.Replace(fmt.Sprintf("%v", this.LogDriver), "Driver", "Driver", 1) + `,`, `SpecVersion:` + strings.Replace(fmt.Sprintf("%v", this.SpecVersion), "Version", "Version", 1) + `,`, `AssignedGenericResources:` + strings.Replace(fmt.Sprintf("%v", this.AssignedGenericResources), "GenericResource", "GenericResource", 1) + `,`, + `DontRestart:` + fmt.Sprintf("%v", this.DontRestart) + `,`, `}`, }, "") return s @@ -6060,6 +6081,26 @@ func (m *Task) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 16: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field DontRestart", wireType) + } + var v int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowObjects + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + v |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + m.DontRestart = bool(v != 0) default: iNdEx = preIndex skippy, err := skipObjects(dAtA[iNdEx:]) @@ -7689,99 +7730,100 @@ var ( func init() { proto.RegisterFile("objects.proto", fileDescriptorObjects) } var fileDescriptorObjects = []byte{ - // 1491 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x58, 0xcf, 0x6f, 0x1b, 0x4f, - 0x15, 0xef, 0xda, 0x1b, 0xff, 0x78, 0x4e, 0x4c, 0x98, 0x86, 0xb0, 0x35, 0xc1, 0x0e, 0xae, 0x40, - 0x15, 0xaa, 0x9c, 0x12, 0x0a, 0x4a, 0x03, 0xa5, 0xb5, 0x93, 0xa8, 0xb5, 0x4a, 0x69, 0x34, 0x2d, - 0x2d, 0xb7, 0x65, 0xb2, 0x3b, 0x75, 0x17, 0xaf, 0x77, 0x56, 0x3b, 0x63, 0x17, 0xdf, 0x38, 0x87, - 0x3f, 0x20, 0x37, 0x0e, 0xfd, 0x17, 0xe0, 0xc2, 0x85, 0x03, 0xa7, 0x1e, 0x39, 0x21, 0x4e, 0x11, - 0xf5, 0x7f, 0x81, 0xc4, 0xe1, 0xab, 0x99, 0x9d, 0xb5, 0x37, 0xf1, 0x3a, 0x49, 0xbf, 0xaa, 0xa2, - 0xef, 0x29, 0x33, 0x3b, 0x9f, 0xcf, 0x9b, 0xf7, 0xde, 0xbc, 0x5f, 0x31, 0xac, 0xb0, 0xa3, 0x3f, - 0x50, 0x47, 0xf0, 0x56, 0x18, 0x31, 0xc1, 0x10, 0x72, 0x99, 0xd3, 0xa7, 0x51, 0x8b, 0xbf, 0x27, - 0xd1, 0xa0, 0xef, 0x89, 0xd6, 0xe8, 0x27, 0xb5, 0x8a, 0x18, 0x87, 0x54, 0x03, 0x6a, 0x15, 0x1e, - 0x52, 0x27, 0xd9, 0x34, 0x7a, 0x8c, 0xf5, 0x7c, 0xba, 0xa5, 0x76, 0x47, 0xc3, 0xb7, 0x5b, 0xc2, - 0x1b, 0x50, 0x2e, 0xc8, 0x20, 0xd4, 0x80, 0xb5, 0x1e, 0xeb, 0x31, 0xb5, 0xdc, 0x92, 0x2b, 0xfd, - 0xf5, 0xd6, 0x79, 0x1a, 0x09, 0xc6, 0xfa, 0xe8, 0x66, 0xe8, 0x0f, 0x7b, 0x5e, 0xb0, 0x15, 0xff, - 0x89, 0x3f, 0x36, 0xff, 0x6e, 0x80, 0xf9, 0x9c, 0x0a, 0x82, 0x7e, 0x01, 0xc5, 0x11, 0x8d, 0xb8, - 0xc7, 0x02, 0xcb, 0xd8, 0x34, 0xee, 0x54, 0xb6, 0xbf, 0xd7, 0x9a, 0xd7, 0xb7, 0xf5, 0x3a, 0x86, - 0x74, 0xcc, 0x8f, 0xa7, 0x8d, 0x1b, 0x38, 0x61, 0xa0, 0x07, 0x00, 0x4e, 0x44, 0x89, 0xa0, 0xae, - 0x4d, 0x84, 0x95, 0x53, 0xfc, 0x5a, 0x2b, 0x56, 0xa5, 0x95, 0xa8, 0xd2, 0x7a, 0x95, 0x58, 0x80, - 0xcb, 0x1a, 0xdd, 0x16, 0x92, 0x3a, 0x0c, 0xdd, 0x84, 0x9a, 0xbf, 0x9c, 0xaa, 0xd1, 0x6d, 0xd1, - 0xfc, 0xab, 0x09, 0xe6, 0x6f, 0x98, 0x4b, 0xd1, 0x3a, 0xe4, 0x3c, 0x57, 0xa9, 0x5d, 0xee, 0x14, - 0x26, 0xa7, 0x8d, 0x5c, 0x77, 0x1f, 0xe7, 0x3c, 0x17, 0x6d, 0x83, 0x39, 0xa0, 0x82, 0x68, 0x85, - 0xac, 0x2c, 0x83, 0xa4, 0xed, 0xda, 0x1a, 0x85, 0x45, 0x3f, 0x07, 0x53, 0x3e, 0x83, 0xd6, 0x64, - 0x23, 0x8b, 0x23, 0xef, 0x7c, 0x19, 0x52, 0x27, 0xe1, 0x49, 0x3c, 0x3a, 0x80, 0x8a, 0x4b, 0xb9, - 0x13, 0x79, 0xa1, 0x90, 0x3e, 0x34, 0x15, 0xfd, 0xf6, 0x22, 0xfa, 0xfe, 0x0c, 0x8a, 0xd3, 0x3c, - 0xf4, 0x4b, 0x28, 0x70, 0x41, 0xc4, 0x90, 0x5b, 0x4b, 0x4a, 0x42, 0x7d, 0xa1, 0x02, 0x0a, 0xa5, - 0x55, 0xd0, 0x1c, 0xf4, 0x14, 0xaa, 0x03, 0x12, 0x90, 0x1e, 0x8d, 0x6c, 0x2d, 0xa5, 0xa0, 0xa4, - 0xfc, 0x20, 0xd3, 0xf4, 0x18, 0x19, 0x0b, 0xc2, 0x2b, 0x83, 0xf4, 0x16, 0x1d, 0x00, 0x10, 0x21, - 0x88, 0xf3, 0x6e, 0x40, 0x03, 0x61, 0x15, 0x95, 0x94, 0x1f, 0x66, 0xea, 0x42, 0xc5, 0x7b, 0x16, - 0xf5, 0xdb, 0x53, 0x30, 0x4e, 0x11, 0xd1, 0x13, 0xa8, 0x38, 0x34, 0x12, 0xde, 0x5b, 0xcf, 0x21, - 0x82, 0x5a, 0x25, 0x25, 0xa7, 0x91, 0x25, 0x67, 0x6f, 0x06, 0xd3, 0x46, 0xa5, 0x99, 0xe8, 0x1e, - 0x98, 0x11, 0xf3, 0xa9, 0x55, 0xde, 0x34, 0xee, 0x54, 0x17, 0x3f, 0x0b, 0x66, 0x3e, 0xc5, 0x0a, - 0xb9, 0xbb, 0x7e, 0x7c, 0xd2, 0x44, 0xb0, 0x5a, 0x32, 0x56, 0x0d, 0x15, 0x1a, 0xc6, 0x3d, 0xe3, - 0x77, 0xc6, 0xef, 0x8d, 0xe6, 0xff, 0xf3, 0x50, 0x7c, 0x49, 0xa3, 0x91, 0xe7, 0x7c, 0xd9, 0xc0, - 0x79, 0x70, 0x26, 0x70, 0x32, 0x6d, 0xd4, 0xd7, 0xce, 0xc5, 0xce, 0x0e, 0x94, 0x68, 0xe0, 0x86, - 0xcc, 0x0b, 0x84, 0x0e, 0x9c, 0x4c, 0x03, 0x0f, 0x34, 0x06, 0x4f, 0xd1, 0xe8, 0x00, 0x56, 0xe2, - 0x7c, 0xb0, 0xcf, 0x44, 0xcd, 0x66, 0x16, 0xfd, 0xb7, 0x0a, 0xa8, 0x9f, 0x7b, 0x79, 0x98, 0xda, - 0xa1, 0x7d, 0x58, 0x09, 0x23, 0x3a, 0xf2, 0xd8, 0x90, 0xdb, 0xca, 0x88, 0xc2, 0x95, 0x8c, 0xc0, - 0xcb, 0x09, 0x4b, 0xee, 0xd0, 0xaf, 0x60, 0x59, 0x92, 0xed, 0xa4, 0x8e, 0xc0, 0xa5, 0x75, 0x04, - 0xab, 0x92, 0xa7, 0x37, 0xe8, 0x05, 0x7c, 0xe7, 0x8c, 0x16, 0x53, 0x41, 0x95, 0xcb, 0x05, 0xdd, - 0x4c, 0x6b, 0xa2, 0x3f, 0xee, 0xa2, 0xe3, 0x93, 0x66, 0x15, 0x96, 0xd3, 0x21, 0xd0, 0xfc, 0x4b, - 0x0e, 0x4a, 0x89, 0x23, 0xd1, 0x7d, 0xfd, 0x66, 0xc6, 0x62, 0xaf, 0x25, 0x58, 0x65, 0x6f, 0xfc, - 0x5c, 0xf7, 0x61, 0x29, 0x64, 0x91, 0xe0, 0x56, 0x6e, 0x33, 0xbf, 0x28, 0x45, 0x0f, 0x59, 0x24, - 0xf6, 0x58, 0xf0, 0xd6, 0xeb, 0xe1, 0x18, 0x8c, 0xde, 0x40, 0x65, 0xe4, 0x45, 0x62, 0x48, 0x7c, - 0xdb, 0x0b, 0xb9, 0x95, 0x57, 0xdc, 0x1f, 0x5d, 0x74, 0x65, 0xeb, 0x75, 0x8c, 0xef, 0x1e, 0x76, - 0xaa, 0x93, 0xd3, 0x06, 0x4c, 0xb7, 0x1c, 0x83, 0x16, 0xd5, 0x0d, 0x79, 0xed, 0x39, 0x94, 0xa7, - 0x27, 0xe8, 0x2e, 0x40, 0x10, 0x67, 0xa4, 0x3d, 0x8d, 0xec, 0x95, 0xc9, 0x69, 0xa3, 0xac, 0xf3, - 0xb4, 0xbb, 0x8f, 0xcb, 0x1a, 0xd0, 0x75, 0x11, 0x02, 0x93, 0xb8, 0x6e, 0xa4, 0xe2, 0xbc, 0x8c, - 0xd5, 0xba, 0xf9, 0xe7, 0x22, 0x98, 0xaf, 0x08, 0xef, 0x5f, 0x77, 0x55, 0x95, 0x77, 0xce, 0x65, - 0xc6, 0x5d, 0x00, 0x1e, 0xc7, 0x9b, 0x34, 0xc7, 0x9c, 0x99, 0xa3, 0xa3, 0x50, 0x9a, 0xa3, 0x01, - 0xb1, 0x39, 0xdc, 0x67, 0x42, 0x25, 0x81, 0x89, 0xd5, 0x1a, 0xdd, 0x86, 0x62, 0xc0, 0x5c, 0x45, - 0x2f, 0x28, 0x3a, 0x4c, 0x4e, 0x1b, 0x05, 0x59, 0x2b, 0xba, 0xfb, 0xb8, 0x20, 0x8f, 0xba, 0xae, - 0x2c, 0x53, 0x24, 0x08, 0x98, 0x20, 0xb2, 0x06, 0x73, 0x5d, 0xee, 0x32, 0xa3, 0xbf, 0x3d, 0x83, - 0x25, 0x65, 0x2a, 0xc5, 0x44, 0xaf, 0xe1, 0x66, 0xa2, 0x6f, 0x5a, 0x60, 0xe9, 0x73, 0x04, 0x22, - 0x2d, 0x21, 0x75, 0x92, 0x6a, 0x0b, 0xe5, 0xc5, 0x6d, 0x41, 0x79, 0x30, 0xab, 0x2d, 0x74, 0x60, - 0xc5, 0xa5, 0xdc, 0x8b, 0xa8, 0xab, 0xca, 0x04, 0x55, 0x99, 0x59, 0xdd, 0xfe, 0xfe, 0x45, 0x42, - 0x28, 0x5e, 0xd6, 0x1c, 0xb5, 0x43, 0x6d, 0x28, 0xe9, 0xb8, 0xe1, 0x56, 0x45, 0xc5, 0xee, 0x15, - 0xdb, 0xc1, 0x94, 0x76, 0xa6, 0xcc, 0x2d, 0x7f, 0x56, 0x99, 0x7b, 0x00, 0xe0, 0xb3, 0x9e, 0xed, - 0x46, 0xde, 0x88, 0x46, 0xd6, 0x8a, 0x1e, 0x12, 0x32, 0xb8, 0xfb, 0x0a, 0x81, 0xcb, 0x3e, 0xeb, - 0xc5, 0xcb, 0xb9, 0xa2, 0x54, 0xfd, 0xcc, 0xa2, 0x44, 0xa0, 0x46, 0x38, 0xf7, 0x7a, 0x01, 0x75, - 0xed, 0x1e, 0x0d, 0x68, 0xe4, 0x39, 0x76, 0x44, 0x39, 0x1b, 0x46, 0x0e, 0xe5, 0xd6, 0xb7, 0x94, - 0x27, 0x32, 0xdb, 0xfc, 0x93, 0x18, 0x8c, 0x35, 0x16, 0x5b, 0x89, 0x98, 0x73, 0x07, 0x7c, 0xb7, - 0x76, 0x7c, 0xd2, 0x5c, 0x87, 0xb5, 0x74, 0x99, 0xda, 0x31, 0x1e, 0x1b, 0x4f, 0x8d, 0x43, 0xa3, - 0xf9, 0xcf, 0x1c, 0x7c, 0x7b, 0xce, 0xa7, 0xe8, 0x67, 0x50, 0xd4, 0x5e, 0xbd, 0x68, 0x58, 0xd3, - 0x3c, 0x9c, 0x60, 0xd1, 0x06, 0x94, 0x65, 0x8a, 0x53, 0xce, 0x69, 0x5c, 0xbc, 0xca, 0x78, 0xf6, - 0x01, 0x59, 0x50, 0x24, 0xbe, 0x47, 0xe4, 0x59, 0x5e, 0x9d, 0x25, 0x5b, 0x34, 0x84, 0xf5, 0xd8, - 0xf5, 0xf6, 0xac, 0xb5, 0xdb, 0x2c, 0x14, 0xdc, 0x32, 0x95, 0xfd, 0x8f, 0xae, 0x14, 0x09, 0xfa, - 0x71, 0x66, 0x1f, 0x5e, 0x84, 0x82, 0x1f, 0x04, 0x22, 0x1a, 0xe3, 0x35, 0x37, 0xe3, 0xa8, 0xf6, - 0x04, 0x6e, 0x2d, 0xa4, 0xa0, 0x55, 0xc8, 0xf7, 0xe9, 0x38, 0x2e, 0x4f, 0x58, 0x2e, 0xd1, 0x1a, - 0x2c, 0x8d, 0x88, 0x3f, 0xa4, 0xba, 0x9a, 0xc5, 0x9b, 0xdd, 0xdc, 0x8e, 0xd1, 0xfc, 0x90, 0x83, - 0xa2, 0x56, 0xe7, 0xba, 0x5b, 0xbe, 0xbe, 0x76, 0xae, 0xb0, 0x3d, 0x84, 0x65, 0xed, 0xd2, 0x38, - 0x23, 0xcd, 0x4b, 0x63, 0xba, 0x12, 0xe3, 0xe3, 0x6c, 0x7c, 0x08, 0xa6, 0x17, 0x92, 0x81, 0x6e, - 0xf7, 0x99, 0x37, 0x77, 0x0f, 0xdb, 0xcf, 0x5f, 0x84, 0x71, 0x61, 0x29, 0x4d, 0x4e, 0x1b, 0xa6, - 0xfc, 0x80, 0x15, 0x2d, 0xb3, 0x31, 0xfe, 0x6d, 0x09, 0x8a, 0x7b, 0xfe, 0x90, 0x0b, 0x1a, 0x5d, - 0xb7, 0x93, 0xf4, 0xb5, 0x73, 0x4e, 0xda, 0x83, 0x62, 0xc4, 0x98, 0xb0, 0x1d, 0x72, 0x91, 0x7f, - 0x30, 0x63, 0x62, 0xaf, 0xdd, 0xa9, 0x4a, 0xa2, 0xac, 0xed, 0xf1, 0x1e, 0x17, 0x24, 0x75, 0x8f, - 0xa0, 0x37, 0xb0, 0x9e, 0x74, 0xc4, 0x23, 0xc6, 0x04, 0x17, 0x11, 0x09, 0xed, 0x3e, 0x1d, 0xcb, - 0x59, 0x29, 0xbf, 0x68, 0x36, 0x3e, 0x08, 0x9c, 0x68, 0xac, 0x9c, 0xf7, 0x8c, 0x8e, 0xf1, 0x9a, - 0x16, 0xd0, 0x49, 0xf8, 0xcf, 0xe8, 0x98, 0xa3, 0x47, 0xb0, 0x41, 0xa7, 0x30, 0x29, 0xd1, 0xf6, - 0xc9, 0x40, 0xf6, 0x7a, 0xdb, 0xf1, 0x99, 0xd3, 0x57, 0xed, 0xc6, 0xc4, 0xb7, 0x68, 0x5a, 0xd4, - 0xaf, 0x63, 0xc4, 0x9e, 0x04, 0x20, 0x0e, 0xd6, 0x91, 0x4f, 0x9c, 0xbe, 0xef, 0x71, 0xf9, 0xef, - 0x4f, 0x6a, 0xdc, 0x95, 0x1d, 0x43, 0xea, 0xb6, 0x73, 0x81, 0xb7, 0x5a, 0x9d, 0x19, 0x37, 0x35, - 0x3c, 0xeb, 0x8c, 0xfa, 0xee, 0x51, 0xf6, 0x29, 0xea, 0x40, 0x65, 0x18, 0xc8, 0xeb, 0x63, 0x1f, - 0x94, 0xaf, 0xea, 0x03, 0x88, 0x59, 0xd2, 0xf2, 0xda, 0x08, 0x36, 0x2e, 0xba, 0x3c, 0x23, 0x37, - 0x1f, 0xa7, 0x73, 0xb3, 0xb2, 0xfd, 0xe3, 0xac, 0xfb, 0xb2, 0x45, 0xa6, 0xf2, 0x38, 0x33, 0x6c, - 0xff, 0x61, 0x40, 0xe1, 0x25, 0x75, 0x22, 0x2a, 0xbe, 0x68, 0xd4, 0xee, 0x9c, 0x89, 0xda, 0x7a, - 0xf6, 0x20, 0x2c, 0x6f, 0x9d, 0x0b, 0xda, 0x1a, 0x94, 0xbc, 0x40, 0xd0, 0x28, 0x20, 0xbe, 0x8a, - 0xda, 0x12, 0x9e, 0xee, 0x33, 0x0d, 0xf8, 0x60, 0x40, 0x21, 0x9e, 0x14, 0xaf, 0xdb, 0x80, 0xf8, - 0xd6, 0xf3, 0x06, 0x64, 0x2a, 0xf9, 0x3f, 0x03, 0x4a, 0x49, 0xc3, 0xfa, 0xa2, 0x6a, 0x9e, 0x9b, - 0xbc, 0xf2, 0x5f, 0x7b, 0xf2, 0x42, 0x60, 0xf6, 0xbd, 0x40, 0xcf, 0x88, 0x58, 0xad, 0x51, 0x0b, - 0x8a, 0x21, 0x19, 0xfb, 0x8c, 0xb8, 0xba, 0x50, 0xae, 0xcd, 0xfd, 0xb0, 0xd0, 0x0e, 0xc6, 0x38, - 0x01, 0xed, 0xae, 0x1d, 0x9f, 0x34, 0x57, 0xa1, 0x9a, 0xb6, 0xfc, 0x9d, 0xd1, 0xfc, 0xb7, 0x01, - 0xe5, 0x83, 0x3f, 0x0a, 0x1a, 0xa8, 0x79, 0xe0, 0x1b, 0x69, 0xfc, 0xe6, 0xfc, 0x8f, 0x0f, 0xe5, - 0x33, 0xbf, 0x2b, 0x64, 0x3d, 0x6a, 0xc7, 0xfa, 0xf8, 0xa9, 0x7e, 0xe3, 0x3f, 0x9f, 0xea, 0x37, - 0xfe, 0x34, 0xa9, 0x1b, 0x1f, 0x27, 0x75, 0xe3, 0x5f, 0x93, 0xba, 0xf1, 0xdf, 0x49, 0xdd, 0x38, - 0x2a, 0x28, 0xff, 0xfc, 0xf4, 0xab, 0x00, 0x00, 0x00, 0xff, 0xff, 0xe4, 0x48, 0xcb, 0x39, 0xc2, - 0x12, 0x00, 0x00, + // 1514 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x58, 0x4f, 0x6f, 0x1b, 0x4d, + 0x19, 0xef, 0xda, 0x1b, 0xff, 0x79, 0xec, 0x98, 0x30, 0x0d, 0x61, 0x6b, 0x82, 0x9d, 0xd7, 0xaf, + 0x40, 0x15, 0x7a, 0xe5, 0xbc, 0x84, 0x82, 0xd2, 0x40, 0x69, 0xed, 0x24, 0x6a, 0xad, 0x52, 0x1a, + 0x4d, 0x4b, 0xcb, 0x6d, 0x99, 0xec, 0x4e, 0xdd, 0xc5, 0xeb, 0x9d, 0xd5, 0xce, 0xd8, 0xc5, 0x37, + 0xce, 0xf9, 0x02, 0xb9, 0x71, 0xe8, 0x07, 0xe0, 0x02, 0x17, 0x2e, 0x1c, 0x38, 0xf5, 0xc8, 0x09, + 0x71, 0x8a, 0xa8, 0xbf, 0x05, 0x12, 0x07, 0x34, 0xb3, 0xb3, 0xf6, 0x26, 0x5e, 0x27, 0x29, 0xaa, + 0x22, 0x4e, 0xd9, 0x99, 0xf9, 0xfd, 0x9e, 0x7f, 0xf3, 0x3c, 0xcf, 0x3c, 0x31, 0xac, 0xb2, 0xe3, + 0xdf, 0x52, 0x47, 0xf0, 0x76, 0x18, 0x31, 0xc1, 0x10, 0x72, 0x99, 0x33, 0xa0, 0x51, 0x9b, 0xbf, + 0x23, 0xd1, 0x70, 0xe0, 0x89, 0xf6, 0xf8, 0x87, 0xf5, 0x8a, 0x98, 0x84, 0x54, 0x03, 0xea, 0x15, + 0x1e, 0x52, 0x27, 0x59, 0x34, 0xfb, 0x8c, 0xf5, 0x7d, 0xba, 0xad, 0x56, 0xc7, 0xa3, 0x37, 0xdb, + 0xc2, 0x1b, 0x52, 0x2e, 0xc8, 0x30, 0xd4, 0x80, 0xf5, 0x3e, 0xeb, 0x33, 0xf5, 0xb9, 0x2d, 0xbf, + 0xf4, 0xee, 0x9d, 0x8b, 0x34, 0x12, 0x4c, 0xf4, 0xd1, 0xed, 0xd0, 0x1f, 0xf5, 0xbd, 0x60, 0x3b, + 0xfe, 0x13, 0x6f, 0xb6, 0xfe, 0x62, 0x80, 0xf9, 0x8c, 0x0a, 0x82, 0x7e, 0x0a, 0xc5, 0x31, 0x8d, + 0xb8, 0xc7, 0x02, 0xcb, 0xd8, 0x32, 0xee, 0x56, 0x76, 0xbe, 0xd3, 0x5e, 0xb4, 0xb7, 0xfd, 0x2a, + 0x86, 0x74, 0xcd, 0x0f, 0x67, 0xcd, 0x5b, 0x38, 0x61, 0xa0, 0xfb, 0x00, 0x4e, 0x44, 0x89, 0xa0, + 0xae, 0x4d, 0x84, 0x95, 0x53, 0xfc, 0x7a, 0x3b, 0x36, 0xa5, 0x9d, 0x98, 0xd2, 0x7e, 0x99, 0x78, + 0x80, 0xcb, 0x1a, 0xdd, 0x11, 0x92, 0x3a, 0x0a, 0xdd, 0x84, 0x9a, 0xbf, 0x9a, 0xaa, 0xd1, 0x1d, + 0xd1, 0xfa, 0x93, 0x09, 0xe6, 0x2f, 0x99, 0x4b, 0xd1, 0x06, 0xe4, 0x3c, 0x57, 0x99, 0x5d, 0xee, + 0x16, 0xa6, 0x67, 0xcd, 0x5c, 0xef, 0x00, 0xe7, 0x3c, 0x17, 0xed, 0x80, 0x39, 0xa4, 0x82, 0x68, + 0x83, 0xac, 0x2c, 0x87, 0xa4, 0xef, 0xda, 0x1b, 0x85, 0x45, 0x3f, 0x01, 0x53, 0x5e, 0x83, 0xb6, + 0x64, 0x33, 0x8b, 0x23, 0x75, 0xbe, 0x08, 0xa9, 0x93, 0xf0, 0x24, 0x1e, 0x1d, 0x42, 0xc5, 0xa5, + 0xdc, 0x89, 0xbc, 0x50, 0xc8, 0x18, 0x9a, 0x8a, 0xfe, 0xe5, 0x32, 0xfa, 0xc1, 0x1c, 0x8a, 0xd3, + 0x3c, 0xf4, 0x33, 0x28, 0x70, 0x41, 0xc4, 0x88, 0x5b, 0x2b, 0x4a, 0x42, 0x63, 0xa9, 0x01, 0x0a, + 0xa5, 0x4d, 0xd0, 0x1c, 0xf4, 0x04, 0x6a, 0x43, 0x12, 0x90, 0x3e, 0x8d, 0x6c, 0x2d, 0xa5, 0xa0, + 0xa4, 0x7c, 0x91, 0xe9, 0x7a, 0x8c, 0x8c, 0x05, 0xe1, 0xd5, 0x61, 0x7a, 0x89, 0x0e, 0x01, 0x88, + 0x10, 0xc4, 0x79, 0x3b, 0xa4, 0x81, 0xb0, 0x8a, 0x4a, 0xca, 0xf7, 0x32, 0x6d, 0xa1, 0xe2, 0x1d, + 0x8b, 0x06, 0x9d, 0x19, 0x18, 0xa7, 0x88, 0xe8, 0x31, 0x54, 0x1c, 0x1a, 0x09, 0xef, 0x8d, 0xe7, + 0x10, 0x41, 0xad, 0x92, 0x92, 0xd3, 0xcc, 0x92, 0xb3, 0x3f, 0x87, 0x69, 0xa7, 0xd2, 0x4c, 0xf4, + 0x35, 0x98, 0x11, 0xf3, 0xa9, 0x55, 0xde, 0x32, 0xee, 0xd6, 0x96, 0x5f, 0x0b, 0x66, 0x3e, 0xc5, + 0x0a, 0xb9, 0xb7, 0x71, 0x72, 0xda, 0x42, 0xb0, 0x56, 0x32, 0xd6, 0x0c, 0x95, 0x1a, 0xc6, 0xd7, + 0xc6, 0xaf, 0x8d, 0xdf, 0x18, 0xad, 0xff, 0xe4, 0xa1, 0xf8, 0x82, 0x46, 0x63, 0xcf, 0xf9, 0xbc, + 0x89, 0x73, 0xff, 0x5c, 0xe2, 0x64, 0xfa, 0xa8, 0xd5, 0x2e, 0xe4, 0xce, 0x2e, 0x94, 0x68, 0xe0, + 0x86, 0xcc, 0x0b, 0x84, 0x4e, 0x9c, 0x4c, 0x07, 0x0f, 0x35, 0x06, 0xcf, 0xd0, 0xe8, 0x10, 0x56, + 0xe3, 0x7a, 0xb0, 0xcf, 0x65, 0xcd, 0x56, 0x16, 0xfd, 0x57, 0x0a, 0xa8, 0xaf, 0xbb, 0x3a, 0x4a, + 0xad, 0xd0, 0x01, 0xac, 0x86, 0x11, 0x1d, 0x7b, 0x6c, 0xc4, 0x6d, 0xe5, 0x44, 0xe1, 0x5a, 0x4e, + 0xe0, 0x6a, 0xc2, 0x92, 0x2b, 0xf4, 0x73, 0xa8, 0x4a, 0xb2, 0x9d, 0xf4, 0x11, 0xb8, 0xb2, 0x8f, + 0x60, 0xd5, 0xf2, 0xf4, 0x02, 0x3d, 0x87, 0x6f, 0x9d, 0xb3, 0x62, 0x26, 0xa8, 0x72, 0xb5, 0xa0, + 0xdb, 0x69, 0x4b, 0xf4, 0xe6, 0x1e, 0x3a, 0x39, 0x6d, 0xd5, 0xa0, 0x9a, 0x4e, 0x81, 0xd6, 0x1f, + 0x72, 0x50, 0x4a, 0x02, 0x89, 0xee, 0xe9, 0x3b, 0x33, 0x96, 0x47, 0x2d, 0xc1, 0x2a, 0x7f, 0xe3, + 0xeb, 0xba, 0x07, 0x2b, 0x21, 0x8b, 0x04, 0xb7, 0x72, 0x5b, 0xf9, 0x65, 0x25, 0x7a, 0xc4, 0x22, + 0xb1, 0xcf, 0x82, 0x37, 0x5e, 0x1f, 0xc7, 0x60, 0xf4, 0x1a, 0x2a, 0x63, 0x2f, 0x12, 0x23, 0xe2, + 0xdb, 0x5e, 0xc8, 0xad, 0xbc, 0xe2, 0x7e, 0xff, 0x32, 0x95, 0xed, 0x57, 0x31, 0xbe, 0x77, 0xd4, + 0xad, 0x4d, 0xcf, 0x9a, 0x30, 0x5b, 0x72, 0x0c, 0x5a, 0x54, 0x2f, 0xe4, 0xf5, 0x67, 0x50, 0x9e, + 0x9d, 0xa0, 0xaf, 0x00, 0x82, 0xb8, 0x22, 0xed, 0x59, 0x66, 0xaf, 0x4e, 0xcf, 0x9a, 0x65, 0x5d, + 0xa7, 0xbd, 0x03, 0x5c, 0xd6, 0x80, 0x9e, 0x8b, 0x10, 0x98, 0xc4, 0x75, 0x23, 0x95, 0xe7, 0x65, + 0xac, 0xbe, 0x5b, 0x7f, 0x2c, 0x82, 0xf9, 0x92, 0xf0, 0xc1, 0x4d, 0x77, 0x55, 0xa9, 0x73, 0xa1, + 0x32, 0xbe, 0x02, 0xe0, 0x71, 0xbe, 0x49, 0x77, 0xcc, 0xb9, 0x3b, 0x3a, 0x0b, 0xa5, 0x3b, 0x1a, + 0x10, 0xbb, 0xc3, 0x7d, 0x26, 0x54, 0x11, 0x98, 0x58, 0x7d, 0xa3, 0x2f, 0xa1, 0x18, 0x30, 0x57, + 0xd1, 0x0b, 0x8a, 0x0e, 0xd3, 0xb3, 0x66, 0x41, 0xf6, 0x8a, 0xde, 0x01, 0x2e, 0xc8, 0xa3, 0x9e, + 0x2b, 0xdb, 0x14, 0x09, 0x02, 0x26, 0x88, 0xec, 0xc1, 0x5c, 0xb7, 0xbb, 0xcc, 0xec, 0xef, 0xcc, + 0x61, 0x49, 0x9b, 0x4a, 0x31, 0xd1, 0x2b, 0xb8, 0x9d, 0xd8, 0x9b, 0x16, 0x58, 0xfa, 0x14, 0x81, + 0x48, 0x4b, 0x48, 0x9d, 0xa4, 0x9e, 0x85, 0xf2, 0xf2, 0x67, 0x41, 0x45, 0x30, 0xeb, 0x59, 0xe8, + 0xc2, 0xaa, 0x4b, 0xb9, 0x17, 0x51, 0x57, 0xb5, 0x09, 0xaa, 0x2a, 0xb3, 0xb6, 0xf3, 0xdd, 0xcb, + 0x84, 0x50, 0x5c, 0xd5, 0x1c, 0xb5, 0x42, 0x1d, 0x28, 0xe9, 0xbc, 0xe1, 0x56, 0x45, 0xe5, 0xee, + 0x35, 0x9f, 0x83, 0x19, 0xed, 0x5c, 0x9b, 0xab, 0x7e, 0x52, 0x9b, 0xbb, 0x0f, 0xe0, 0xb3, 0xbe, + 0xed, 0x46, 0xde, 0x98, 0x46, 0xd6, 0xaa, 0x1e, 0x12, 0x32, 0xb8, 0x07, 0x0a, 0x81, 0xcb, 0x3e, + 0xeb, 0xc7, 0x9f, 0x0b, 0x4d, 0xa9, 0xf6, 0x89, 0x4d, 0x89, 0x40, 0x9d, 0x70, 0xee, 0xf5, 0x03, + 0xea, 0xda, 0x7d, 0x1a, 0xd0, 0xc8, 0x73, 0xec, 0x88, 0x72, 0x36, 0x8a, 0x1c, 0xca, 0xad, 0x6f, + 0xa8, 0x48, 0x64, 0x3e, 0xf3, 0x8f, 0x63, 0x30, 0xd6, 0x58, 0x6c, 0x25, 0x62, 0x2e, 0x1c, 0x70, + 0xf4, 0x05, 0x54, 0x5d, 0x16, 0x08, 0x29, 0x56, 0x90, 0x48, 0x58, 0x6b, 0x5b, 0xc6, 0xdd, 0x12, + 0xae, 0xc8, 0x3d, 0x1c, 0x6f, 0xed, 0xd5, 0x4f, 0x4e, 0x5b, 0x1b, 0xb0, 0x9e, 0xee, 0x64, 0xbb, + 0xc6, 0x23, 0xe3, 0x89, 0x71, 0x64, 0xb4, 0xfe, 0x96, 0x83, 0x6f, 0x2e, 0x84, 0x1d, 0xfd, 0x18, + 0x8a, 0x3a, 0xf0, 0x97, 0xcd, 0x73, 0x9a, 0x87, 0x13, 0x2c, 0xda, 0x84, 0xb2, 0xec, 0x02, 0x94, + 0x73, 0x1a, 0xf7, 0xb7, 0x32, 0x9e, 0x6f, 0x20, 0x0b, 0x8a, 0xc4, 0xf7, 0x88, 0x3c, 0xcb, 0xab, + 0xb3, 0x64, 0x89, 0x46, 0xb0, 0x11, 0xdf, 0x8e, 0x3d, 0x7f, 0xfd, 0x6d, 0x16, 0x0a, 0x6e, 0x99, + 0x2a, 0x44, 0x0f, 0xaf, 0x95, 0x2c, 0xfa, 0xfe, 0xe6, 0x1b, 0xcf, 0x43, 0xc1, 0x0f, 0x03, 0x11, + 0x4d, 0xf0, 0xba, 0x9b, 0x71, 0x54, 0x7f, 0x0c, 0x77, 0x96, 0x52, 0xd0, 0x1a, 0xe4, 0x07, 0x74, + 0x12, 0x77, 0x30, 0x2c, 0x3f, 0xd1, 0x3a, 0xac, 0x8c, 0x89, 0x3f, 0xa2, 0xba, 0xe1, 0xc5, 0x8b, + 0xbd, 0xdc, 0xae, 0xd1, 0x7a, 0x9f, 0x83, 0xa2, 0x36, 0xe7, 0xa6, 0xa7, 0x02, 0xad, 0x76, 0xa1, + 0xf7, 0x3d, 0x80, 0xaa, 0x0e, 0x69, 0x5c, 0xb4, 0xe6, 0x95, 0x69, 0x5f, 0x89, 0xf1, 0x71, 0xc1, + 0x3e, 0x00, 0xd3, 0x0b, 0xc9, 0x50, 0x4f, 0x04, 0x99, 0x9a, 0x7b, 0x47, 0x9d, 0x67, 0xcf, 0xc3, + 0xb8, 0xf7, 0x94, 0xa6, 0x67, 0x4d, 0x53, 0x6e, 0x60, 0x45, 0xcb, 0x7c, 0x3b, 0xff, 0xbc, 0x02, + 0xc5, 0x7d, 0x7f, 0xc4, 0x05, 0x8d, 0x6e, 0x3a, 0x48, 0x5a, 0xed, 0x42, 0x90, 0xf6, 0xa1, 0x18, + 0x31, 0x26, 0x6c, 0x87, 0x5c, 0x16, 0x1f, 0xcc, 0x98, 0xd8, 0xef, 0x74, 0x6b, 0x92, 0x28, 0xdb, + 0x7f, 0xbc, 0xc6, 0x05, 0x49, 0xdd, 0x27, 0xe8, 0x35, 0x6c, 0x24, 0x8f, 0xe6, 0x31, 0x63, 0x82, + 0x8b, 0x88, 0x84, 0xf6, 0x80, 0x4e, 0xe4, 0x38, 0x95, 0x5f, 0x36, 0x3e, 0x1f, 0x06, 0x4e, 0x34, + 0x51, 0xc1, 0x7b, 0x4a, 0x27, 0x78, 0x5d, 0x0b, 0xe8, 0x26, 0xfc, 0xa7, 0x74, 0xc2, 0xd1, 0x43, + 0xd8, 0xa4, 0x33, 0x98, 0x94, 0x68, 0xfb, 0x64, 0x28, 0xc7, 0x01, 0xdb, 0xf1, 0x99, 0x33, 0x50, + 0x2f, 0x92, 0x89, 0xef, 0xd0, 0xb4, 0xa8, 0x5f, 0xc4, 0x88, 0x7d, 0x09, 0x40, 0x1c, 0xac, 0x63, + 0x9f, 0x38, 0x03, 0xdf, 0xe3, 0xf2, 0x3f, 0xa4, 0xd4, 0x44, 0x2c, 0x1f, 0x15, 0x69, 0xdb, 0xee, + 0x25, 0xd1, 0x6a, 0x77, 0xe7, 0xdc, 0xd4, 0x7c, 0xad, 0x2b, 0xea, 0xdb, 0xc7, 0xd9, 0xa7, 0xa8, + 0x0b, 0x95, 0x51, 0x20, 0xd5, 0xc7, 0x31, 0x28, 0x5f, 0x37, 0x06, 0x10, 0xb3, 0xa4, 0xe7, 0xf5, + 0x31, 0x6c, 0x5e, 0xa6, 0x3c, 0xa3, 0x36, 0x1f, 0xa5, 0x6b, 0xb3, 0xb2, 0xf3, 0x83, 0x2c, 0x7d, + 0xd9, 0x22, 0x53, 0x75, 0x9c, 0x99, 0xb6, 0x7f, 0x35, 0xa0, 0xf0, 0x82, 0x3a, 0x11, 0x15, 0x9f, + 0x35, 0x6b, 0x77, 0xcf, 0x65, 0x6d, 0x23, 0x7b, 0x56, 0x96, 0x5a, 0x17, 0x92, 0xb6, 0x0e, 0x25, + 0x2f, 0x10, 0x34, 0x0a, 0x88, 0xaf, 0xb2, 0xb6, 0x84, 0x67, 0xeb, 0x4c, 0x07, 0xde, 0x1b, 0x50, + 0x88, 0x87, 0xc9, 0x9b, 0x76, 0x20, 0xd6, 0x7a, 0xd1, 0x81, 0x4c, 0x23, 0xff, 0x6d, 0x40, 0x29, + 0x79, 0xd3, 0x3e, 0xab, 0x99, 0x17, 0x86, 0xb3, 0xfc, 0xff, 0x3c, 0x9c, 0x21, 0x30, 0x07, 0x5e, + 0xa0, 0xc7, 0x48, 0xac, 0xbe, 0x51, 0x1b, 0x8a, 0x21, 0x99, 0xf8, 0x8c, 0xb8, 0xba, 0x51, 0xae, + 0x2f, 0xfc, 0xf6, 0xd0, 0x09, 0x26, 0x38, 0x01, 0xed, 0xad, 0x9f, 0x9c, 0xb6, 0xd6, 0xa0, 0x96, + 0xf6, 0xfc, 0xad, 0xd1, 0xfa, 0x87, 0x01, 0xe5, 0xc3, 0xdf, 0x09, 0x1a, 0xa8, 0x91, 0xe1, 0xff, + 0xd2, 0xf9, 0xad, 0xc5, 0xdf, 0x27, 0xca, 0xe7, 0x7e, 0x7a, 0xc8, 0xba, 0xd4, 0xae, 0xf5, 0xe1, + 0x63, 0xe3, 0xd6, 0x3f, 0x3f, 0x36, 0x6e, 0xfd, 0x7e, 0xda, 0x30, 0x3e, 0x4c, 0x1b, 0xc6, 0xdf, + 0xa7, 0x0d, 0xe3, 0x5f, 0xd3, 0x86, 0x71, 0x5c, 0x50, 0xf1, 0xf9, 0xd1, 0x7f, 0x03, 0x00, 0x00, + 0xff, 0xff, 0x50, 0xfa, 0x50, 0x4f, 0xe5, 0x12, 0x00, 0x00, } diff --git a/api/objects.proto b/api/objects.proto index 311c1f2fb6..4105822058 100644 --- a/api/objects.proto +++ b/api/objects.proto @@ -223,6 +223,12 @@ message Task { // is only written by the manager. TaskState desired_state = 10; + // DontRestart indicates that the restart supervisor decided not to + // start a replacement task for this task. This flag records the + // decision so that orchestrators can honor it when they do + // service-level reconciliation. + bool dont_restart = 16; + // List of network attachments by the task. repeated NetworkAttachment networks = 11; diff --git a/manager/orchestrator/global/global.go b/manager/orchestrator/global/global.go index bced3dd189..5eb0913388 100644 --- a/manager/orchestrator/global/global.go +++ b/manager/orchestrator/global/global.go @@ -265,8 +265,11 @@ func (g *Orchestrator) reconcileServices(ctx context.Context, serviceIDs []strin nodeTasks[serviceID] = make(map[string][]*api.Task) for _, t := range tasks { - if t.DesiredState <= api.TaskStateRunning { - // Collect all running instances of this service + if t.DesiredState <= api.TaskStateRunning || t.DontRestart { + // Collect all runnable instances of this service, + // and instances that were not be restarted due + // to restart policy but may be updated if the + // service spec changed. nodeTasks[serviceID][t.NodeID] = append(nodeTasks[serviceID][t.NodeID], t) } } @@ -405,7 +408,7 @@ func (g *Orchestrator) reconcileOneNode(ctx context.Context, node *api.Node) { if t.ServiceID != serviceID { continue } - if t.DesiredState <= api.TaskStateRunning { + if t.DesiredState <= api.TaskStateRunning || t.DontRestart { tasks[serviceID] = append(tasks[serviceID], t) } } diff --git a/manager/orchestrator/global/global_test.go b/manager/orchestrator/global/global_test.go index 51f4eda376..643fbef88d 100644 --- a/manager/orchestrator/global/global_test.go +++ b/manager/orchestrator/global/global_test.go @@ -213,7 +213,7 @@ func TestNodeAvailability(t *testing.T) { testutils.Expect(t, watch, state.EventCommit{}) // updating the service shouldn't restart the task - updateService(t, store, service1) + updateService(t, store, service1, true) testutils.Expect(t, watch, api.EventUpdateService{}) testutils.Expect(t, watch, state.EventCommit{}) select { @@ -241,7 +241,7 @@ func TestNodeAvailability(t *testing.T) { testutils.Expect(t, watch, state.EventCommit{}) // updating the service shouldn't restart the task - updateService(t, store, service1) + updateService(t, store, service1, true) testutils.Expect(t, watch, api.EventUpdateService{}) testutils.Expect(t, watch, state.EventCommit{}) select { @@ -277,7 +277,7 @@ func TestNodeState(t *testing.T) { testutils.Expect(t, watch, state.EventCommit{}) // updating the service shouldn't restart the task - updateService(t, store, service1) + updateService(t, store, service1, true) testutils.Expect(t, watch, api.EventUpdateService{}) testutils.Expect(t, watch, state.EventCommit{}) select { @@ -425,8 +425,20 @@ func TestTaskFailure(t *testing.T) { case <-time.After(100 * time.Millisecond): } - // update the service. now the task should be recreated. - updateService(t, store, serviceNoRestart) + // update the service with no spec changes, to trigger a + // reconciliation. the task should still not be updated. + updateService(t, store, serviceNoRestart, false) + testutils.Expect(t, watch, api.EventUpdateService{}) + testutils.Expect(t, watch, state.EventCommit{}) + + select { + case event := <-watch: + t.Fatalf("got unexpected event %T: %+v", event, event) + case <-time.After(100 * time.Millisecond): + } + + // update the service with spec changes. now the task should be recreated. + updateService(t, store, serviceNoRestart, true) testutils.Expect(t, watch, api.EventUpdateService{}) testutils.Expect(t, watch, state.EventCommit{}) @@ -444,11 +456,13 @@ func addService(t *testing.T, s *store.MemoryStore, service *api.Service) { }) } -func updateService(t *testing.T, s *store.MemoryStore, service *api.Service) { +func updateService(t *testing.T, s *store.MemoryStore, service *api.Service, force bool) { s.Update(func(tx store.Tx) error { service := store.GetService(tx, service.ID) require.NotNil(t, service) - service.Spec.Task.ForceUpdate++ + if force { + service.Spec.Task.ForceUpdate++ + } assert.NoError(t, store.UpdateService(tx, service)) return nil }) diff --git a/manager/orchestrator/replicated/restart_test.go b/manager/orchestrator/replicated/restart_test.go index 2d621c36a8..c9240fb442 100644 --- a/manager/orchestrator/replicated/restart_test.go +++ b/manager/orchestrator/replicated/restart_test.go @@ -118,6 +118,8 @@ func TestOrchestratorRestartOnAny(t *testing.T) { } func TestOrchestratorRestartOnFailure(t *testing.T) { + t.Parallel() + ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) @@ -126,7 +128,7 @@ func TestOrchestratorRestartOnFailure(t *testing.T) { orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() - watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) + watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) defer cancel() // Create a service with two instances specified before the orchestrator is @@ -181,9 +183,7 @@ func TestOrchestratorRestartOnFailure(t *testing.T) { return nil }) assert.NoError(t, err) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) observedTask3 := testutils.WatchTaskCreate(t, watch) @@ -191,8 +191,6 @@ func TestOrchestratorRestartOnFailure(t *testing.T) { assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") - testutils.Expect(t, watch, state.EventCommit{}) - observedTask4 := testutils.WatchTaskUpdate(t, watch) assert.Equal(t, observedTask4.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") @@ -205,20 +203,47 @@ func TestOrchestratorRestartOnFailure(t *testing.T) { return nil }) assert.NoError(t, err) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(100 * time.Millisecond): } + + // Update the service, but don't change anything in the spec. The + // second instance instance should not be restarted. + err = s.Update(func(tx store.Tx) error { + service := store.GetService(tx, "id1") + require.NotNil(t, service) + assert.NoError(t, store.UpdateService(tx, service)) + return nil + }) + assert.NoError(t, err) + + select { + case <-watch: + t.Fatal("got unexpected event") + case <-time.After(100 * time.Millisecond): + } + + // Update the service, and change the TaskSpec. Now the second instance + // should be restarted. + err = s.Update(func(tx store.Tx) error { + service := store.GetService(tx, "id1") + require.NotNil(t, service) + service.Spec.Task.ForceUpdate++ + assert.NoError(t, store.UpdateService(tx, service)) + return nil + }) + assert.NoError(t, err) + testutils.Expect(t, watch, api.EventCreateTask{}) } func TestOrchestratorRestartOnNone(t *testing.T) { + t.Parallel() + ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) @@ -227,7 +252,7 @@ func TestOrchestratorRestartOnNone(t *testing.T) { orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() - watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) + watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) defer cancel() // Create a service with two instances specified before the orchestrator is @@ -281,11 +306,8 @@ func TestOrchestratorRestartOnNone(t *testing.T) { return nil }) assert.NoError(t, err) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) select { case <-watch: @@ -302,18 +324,59 @@ func TestOrchestratorRestartOnNone(t *testing.T) { }) assert.NoError(t, err) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) select { case <-watch: t.Fatal("got unexpected event") case <-time.After(100 * time.Millisecond): } + + // Update the service, but don't change anything in the spec. Neither + // instance should be restarted. + err = s.Update(func(tx store.Tx) error { + service := store.GetService(tx, "id1") + require.NotNil(t, service) + assert.NoError(t, store.UpdateService(tx, service)) + return nil + }) + assert.NoError(t, err) + + select { + case <-watch: + t.Fatal("got unexpected event") + case <-time.After(100 * time.Millisecond): + } + + // Update the service, and change the TaskSpec. Both instances should + // be restarted. + err = s.Update(func(tx store.Tx) error { + service := store.GetService(tx, "id1") + require.NotNil(t, service) + service.Spec.Task.ForceUpdate++ + assert.NoError(t, store.UpdateService(tx, service)) + return nil + }) + assert.NoError(t, err) + testutils.Expect(t, watch, api.EventCreateTask{}) + newTask := testutils.WatchTaskUpdate(t, watch) + assert.Equal(t, api.TaskStateRunning, newTask.DesiredState) + err = s.Update(func(tx store.Tx) error { + newTask := store.GetTask(tx, newTask.ID) + require.NotNil(t, newTask) + newTask.Status.State = api.TaskStateRunning + assert.NoError(t, store.UpdateTask(tx, newTask)) + return nil + }) + assert.NoError(t, err) + testutils.Expect(t, watch, api.EventUpdateTask{}) + + testutils.Expect(t, watch, api.EventCreateTask{}) } func TestOrchestratorRestartDelay(t *testing.T) { + t.Parallel() + ctx := context.Background() s := store.NewMemoryStore(nil) assert.NotNil(t, s) @@ -414,7 +477,7 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { orchestrator := NewReplicatedOrchestrator(s) defer orchestrator.Stop() - watch, cancel := state.Watch(s.WatchQueue() /*api.EventCreateTask{}, api.EventUpdateTask{}*/) + watch, cancel := state.Watch(s.WatchQueue(), api.EventCreateTask{}, api.EventUpdateTask{}) defer cancel() // Create a service with two instances specified before the orchestrator is @@ -457,38 +520,65 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { assert.NoError(t, orchestrator.Run(ctx)) }() - testRestart := func() { + failTask := func(task *api.Task, expectRestart bool) { + task = task.Copy() + task.Status = api.TaskStatus{State: api.TaskStateFailed} + err = s.Update(func(tx store.Tx) error { + assert.NoError(t, store.UpdateTask(tx, task)) + return nil + }) + assert.NoError(t, err) + testutils.Expect(t, watch, api.EventUpdateTask{}) + task = testutils.WatchShutdownTask(t, watch) + if expectRestart { + createdTask := testutils.WatchTaskCreate(t, watch) + assert.Equal(t, createdTask.Status.State, api.TaskStateNew) + assert.Equal(t, createdTask.DesiredState, api.TaskStateReady) + assert.Equal(t, createdTask.ServiceAnnotations.Name, "name1") + } + err = s.Update(func(tx store.Tx) error { + task := task.Copy() + task.Status.State = api.TaskStateShutdown + assert.NoError(t, store.UpdateTask(tx, task)) + return nil + }) + assert.NoError(t, err) + testutils.Expect(t, watch, api.EventUpdateTask{}) + } + + testRestart := func(serviceUpdated bool) { observedTask1 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask1.Status.State, api.TaskStateNew) assert.Equal(t, observedTask1.ServiceAnnotations.Name, "name1") + if serviceUpdated { + runnableTask := testutils.WatchTaskUpdate(t, watch) + assert.Equal(t, observedTask1.ID, runnableTask.ID) + assert.Equal(t, api.TaskStateRunning, runnableTask.DesiredState) + err = s.Update(func(tx store.Tx) error { + task := runnableTask.Copy() + task.Status.State = api.TaskStateRunning + assert.NoError(t, store.UpdateTask(tx, task)) + return nil + }) + assert.NoError(t, err) + + testutils.Expect(t, watch, api.EventUpdateTask{}) + } + observedTask2 := testutils.WatchTaskCreate(t, watch) assert.Equal(t, observedTask2.Status.State, api.TaskStateNew) assert.Equal(t, observedTask2.ServiceAnnotations.Name, "name1") - testutils.Expect(t, watch, state.EventCommit{}) + if serviceUpdated { + testutils.Expect(t, watch, api.EventUpdateTask{}) + } // Fail the first task. Confirm that it gets restarted. - updatedTask1 := observedTask1.Copy() - updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} before := time.Now() - err = s.Update(func(tx store.Tx) error { - assert.NoError(t, store.UpdateTask(tx, updatedTask1)) - return nil - }) - assert.NoError(t, err) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) - testutils.Expect(t, watch, api.EventUpdateTask{}) - - observedTask3 := testutils.WatchTaskCreate(t, watch) - testutils.Expect(t, watch, state.EventCommit{}) - assert.Equal(t, observedTask3.Status.State, api.TaskStateNew) - assert.Equal(t, observedTask3.DesiredState, api.TaskStateReady) - assert.Equal(t, observedTask3.ServiceAnnotations.Name, "name1") + failTask(observedTask1, true) observedTask4 := testutils.WatchTaskUpdate(t, watch) - testutils.Expect(t, watch, state.EventCommit{}) after := time.Now() // At least 100 ms should have elapsed. Only check the lower bound, @@ -502,40 +592,15 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { assert.Equal(t, observedTask4.ServiceAnnotations.Name, "name1") // Fail the second task. Confirm that it gets restarted. - updatedTask2 := observedTask2.Copy() - updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} - err = s.Update(func(tx store.Tx) error { - assert.NoError(t, store.UpdateTask(tx, updatedTask2)) - return nil - }) - assert.NoError(t, err) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) - testutils.Expect(t, watch, api.EventUpdateTask{}) - - observedTask5 := testutils.WatchTaskCreate(t, watch) - testutils.Expect(t, watch, state.EventCommit{}) - assert.Equal(t, observedTask5.Status.State, api.TaskStateNew) - assert.Equal(t, observedTask5.DesiredState, api.TaskStateReady) + failTask(observedTask2, true) observedTask6 := testutils.WatchTaskUpdate(t, watch) // task gets started after a delay - testutils.Expect(t, watch, state.EventCommit{}) assert.Equal(t, observedTask6.Status.State, api.TaskStateNew) assert.Equal(t, observedTask6.DesiredState, api.TaskStateRunning) assert.Equal(t, observedTask6.ServiceAnnotations.Name, "name1") // Fail the first instance again. It should not be restarted. - updatedTask1 = observedTask3.Copy() - updatedTask1.Status = api.TaskStatus{State: api.TaskStateFailed} - err = s.Update(func(tx store.Tx) error { - assert.NoError(t, store.UpdateTask(tx, updatedTask1)) - return nil - }) - assert.NoError(t, err) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) + failTask(observedTask4, false) select { case <-watch: @@ -544,17 +609,7 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { } // Fail the second instance again. It should not be restarted. - updatedTask2 = observedTask5.Copy() - updatedTask2.Status = api.TaskStatus{State: api.TaskStateFailed} - err = s.Update(func(tx store.Tx) error { - assert.NoError(t, store.UpdateTask(tx, updatedTask2)) - return nil - }) - assert.NoError(t, err) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) - testutils.Expect(t, watch, api.EventUpdateTask{}) - testutils.Expect(t, watch, state.EventCommit{}) + failTask(observedTask6, false) select { case <-watch: @@ -563,7 +618,7 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { } } - testRestart() + testRestart(false) // Update the service spec err = s.Update(func(tx store.Tx) error { @@ -576,7 +631,7 @@ func TestOrchestratorRestartMaxAttempts(t *testing.T) { }) assert.NoError(t, err) - testRestart() + testRestart(true) } func TestOrchestratorRestartWindow(t *testing.T) { diff --git a/manager/orchestrator/replicated/update_test.go b/manager/orchestrator/replicated/update_test.go index 1599256fe8..3f61ee5469 100644 --- a/manager/orchestrator/replicated/update_test.go +++ b/manager/orchestrator/replicated/update_test.go @@ -284,10 +284,6 @@ func testUpdaterRollback(t *testing.T, rollbackFailureAction api.UpdateConfig_Fa assert.Equal(t, observedTask.Status.State, api.TaskStateNew) assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") - observedTask = testutils.WatchTaskCreate(t, watchCreate) - assert.Equal(t, observedTask.Status.State, api.TaskStateNew) - assert.Equal(t, observedTask.Spec.GetContainer().Image, "image1") - switch rollbackFailureAction { case api.UpdateConfig_PAUSE: // Should end up in ROLLBACK_PAUSED state diff --git a/manager/orchestrator/restart/restart.go b/manager/orchestrator/restart/restart.go index eed28f8202..a448f83240 100644 --- a/manager/orchestrator/restart/restart.go +++ b/manager/orchestrator/restart/restart.go @@ -131,14 +131,19 @@ func (r *Supervisor) Restart(ctx context.Context, tx store.Tx, cluster *api.Clus return errors.New("Restart called on task that was already shut down") } + shouldRestart := r.shouldRestart(ctx, &t, service) + t.DesiredState = api.TaskStateShutdown + if !shouldRestart { + t.DontRestart = true + } err := store.UpdateTask(tx, &t) if err != nil { log.G(ctx).WithError(err).Errorf("failed to set task desired state to dead") return err } - if !r.shouldRestart(ctx, &t, service) { + if !shouldRestart { return nil } @@ -337,7 +342,9 @@ func (r *Supervisor) DelayStart(ctx context.Context, _ store.Tx, oldTask *api.Ta var watch chan events.Event cancelWatch := func() {} - if waitStop && oldTask != nil { + waitForTask := waitStop && oldTask != nil && oldTask.Status.State <= api.TaskStateRunning + + if waitForTask { // Wait for either the old task to complete, or the old task's // node to become unavailable. watch, cancelWatch = state.Watch( @@ -378,7 +385,7 @@ func (r *Supervisor) DelayStart(ctx context.Context, _ store.Tx, oldTask *api.Ta } } - if waitStop && oldTask != nil { + if waitForTask { select { case <-watch: case <-oldTaskTimer.C: diff --git a/manager/orchestrator/slot.go b/manager/orchestrator/slot.go index ce347136ba..07c734898c 100644 --- a/manager/orchestrator/slot.go +++ b/manager/orchestrator/slot.go @@ -30,7 +30,7 @@ func GetRunnableAndDeadSlots(s *store.MemoryStore, serviceID string) (map[uint64 runningSlots := make(map[uint64]Slot) for _, t := range tasks { - if t.DesiredState <= api.TaskStateRunning { + if t.DesiredState <= api.TaskStateRunning || t.DontRestart { runningSlots[t.Slot] = append(runningSlots[t.Slot], t) } } diff --git a/manager/orchestrator/taskreaper/task_reaper.go b/manager/orchestrator/taskreaper/task_reaper.go index edb771c699..97e896a6ca 100644 --- a/manager/orchestrator/taskreaper/task_reaper.go +++ b/manager/orchestrator/taskreaper/task_reaper.go @@ -178,16 +178,25 @@ func (tr *TaskReaper) tick() { // TODO(aaronl): This could filter for non-running tasks and use quickselect // instead of sorting the whole slice. + // TODO(aaronl): This sort should really use lamport time instead of wall + // clock time. We should store a Version in the Status field. sort.Sort(tasksByTimestamp(historicTasks)) runningTasks := 0 - for _, t := range historicTasks { + for i, t := range historicTasks { if t.DesiredState <= api.TaskStateRunning || t.Status.State <= api.TaskStateRunning { // Don't delete running tasks runningTasks++ continue } + // If the most recent task has the DontRestart + // flag set, preserve that task to keep the + // flag visible to the orchestrator. + if i == len(historicTasks)-1 && t.DontRestart { + continue + } + deleteTasks[t.ID] = struct{}{} taskHistory++ diff --git a/manager/orchestrator/update/updater.go b/manager/orchestrator/update/updater.go index 349bccabb9..6b2d5044af 100644 --- a/manager/orchestrator/update/updater.go +++ b/manager/orchestrator/update/updater.go @@ -502,6 +502,9 @@ func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, remove return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { + if t.DontRestart { + return nil + } return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) } t.DesiredState = api.TaskStateShutdown @@ -584,28 +587,64 @@ func (u *Updater) rollbackUpdate(ctx context.Context, serviceID, message string) log.G(ctx).Debugf("starting rollback of service %s", serviceID) var service *api.Service - err := u.store.Update(func(tx store.Tx) error { - service = store.GetService(tx, serviceID) - if service == nil { - return nil + err := u.store.Batch(func(batch *store.Batch) error { + var serviceTasks []*api.Task + + err := batch.Update(func(tx store.Tx) error { + var err error + serviceTasks, err = store.FindTasks(tx, store.ByServiceID(serviceID)) + return err + }) + if err != nil { + return err } - if service.UpdateStatus == nil { - // The service was updated since we started this update - return nil + + // Clear DontRestart flag on all of this service's tasks. Since + // this is a rollback, we want a converged state instead of + // deferring to the restart policy. + for _, task := range serviceTasks { + if task.DontRestart { + err = batch.Update(func(tx store.Tx) error { + task.DontRestart = false + return store.UpdateTask(tx, task) + }) + if err != nil { + return err + } + } } - service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_STARTED - service.UpdateStatus.Message = message + return batch.Update(func(tx store.Tx) error { + service = store.GetService(tx, serviceID) + if service == nil { + return nil + } + if service.UpdateStatus == nil { + // The service was updated since we started this update + return nil + } - if service.PreviousSpec == nil { - return errors.New("cannot roll back service because no previous spec is available") - } - service.Spec = *service.PreviousSpec - service.SpecVersion = service.PreviousSpecVersion.Copy() - service.PreviousSpec = nil - service.PreviousSpecVersion = nil + service.UpdateStatus.State = api.UpdateStatus_ROLLBACK_STARTED + service.UpdateStatus.Message = message - return store.UpdateService(tx, service) + if service.PreviousSpec == nil { + return errors.New("cannot roll back service because no previous spec is available") + } + + var err error + serviceTasks, err = store.FindTasks(tx, store.ByServiceID(serviceID)) + if err != nil { + return err + } + + service.Spec = *service.PreviousSpec + service.SpecVersion = service.PreviousSpecVersion.Copy() + service.PreviousSpec = nil + service.PreviousSpecVersion = nil + + return store.UpdateService(tx, service) + + }) }) if err != nil {