From b43b2eca8bef361f0fd62b128aa13c1620fd529c Mon Sep 17 00:00:00 2001 From: Miguel Angel Ajo Date: Thu, 27 Jan 2022 15:44:42 +0100 Subject: [PATCH] If a service stops with error request all the other services to stop Related-issue: #556 Signed-off-by: Miguel Angel Ajo --- pkg/servicemanager/manager.go | 15 ++++- pkg/servicemanager/manager_test.go | 104 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/pkg/servicemanager/manager.go b/pkg/servicemanager/manager.go index 5263413835..cef680fadb 100644 --- a/pkg/servicemanager/manager.go +++ b/pkg/servicemanager/manager.go @@ -3,6 +3,7 @@ package servicemanager import ( "context" "fmt" + "syscall" "github.com/openshift/microshift/pkg/util/sigchannel" "k8s.io/klog/v2" @@ -97,9 +98,21 @@ func (m *ServiceManager) Run(ctx context.Context, ready chan<- struct{}, stopped func (m *ServiceManager) asyncRun(ctx context.Context, service Service) (<-chan struct{}, <-chan struct{}) { ready, stopped := make(chan struct{}), make(chan struct{}) go func() { + defer func() { + if r := recover(); r != nil { + klog.Errorf("%s panicked: %s", service.Name(), r) + klog.Error("Stopping MicroShift") + syscall.Kill(syscall.Getpid(), syscall.SIGTERM) + if !sigchannel.IsClosed(stopped) { + close(stopped) + } + } + }() + klog.Infof("Starting %s", service.Name()) if err := service.Run(ctx, ready, stopped); err != nil { - klog.Infof("%s stopped: %s", service.Name(), err) + klog.Errorf("service %s exited with error: %s, stopping MicroShift", service.Name(), err) + syscall.Kill(syscall.Getpid(), syscall.SIGTERM) } else { klog.Infof("%s completed", service.Name()) } diff --git a/pkg/servicemanager/manager_test.go b/pkg/servicemanager/manager_test.go index 12247a4416..f12f894596 100644 --- a/pkg/servicemanager/manager_test.go +++ b/pkg/servicemanager/manager_test.go @@ -3,7 +3,10 @@ package servicemanager import ( "context" "errors" + "os" + "os/signal" "sync" + "syscall" "testing" "time" @@ -125,3 +128,104 @@ func TestRunCancellation(t *testing.T) { cancel() } + +func TestRunToServiceCrash(t *testing.T) { + var wg sync.WaitGroup + defer wg.Wait() + + var waitForContext = func(ctx context.Context, ready chan<- struct{}, stopped chan<- struct{}) error { + defer close(stopped) + close(ready) + <-ctx.Done() + wg.Done() + return nil + } + + var runAndPanic = func(ctx context.Context, ready chan<- struct{}, stopped chan<- struct{}) error { + defer close(stopped) + close(ready) + <-time.After(time.Second) + wg.Done() + return errors.New("I'm crashing") + } + + m := NewServiceManager() + m.AddService(NewGenericService("foo", nil, waitForContext)) + m.AddService(NewGenericService("bar-crash", []string{"foo"}, runAndPanic)) + wg.Add(2) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cancelOnSigTerm(cancel, ctx) + + ready, stopped := make(chan struct{}), make(chan struct{}) + if err := m.Run(ctx, ready, stopped); err == nil { + t.Errorf("an error from bar-crash was expected %s: %v", m.Name(), err) + } + + if !sigchannel.IsClosed(ready) { + t.Errorf("ready channel not closed after completing service manager") + } + + if !sigchannel.IsClosed(stopped) { + t.Errorf("stopped channel not closed after completing service manager") + } +} + +func cancelOnSigTerm(cancel context.CancelFunc, ctx context.Context) { + sigTerm := make(chan os.Signal, 1) + signal.Notify(sigTerm, os.Interrupt, syscall.SIGTERM) + go func() { + select { + case <-sigTerm: + cancel() + case <-ctx.Done(): + } + }() +} + +func TestRunToServicePanic(t *testing.T) { + var wg sync.WaitGroup + defer wg.Wait() + + var waitForContext = func(ctx context.Context, ready chan<- struct{}, stopped chan<- struct{}) error { + defer close(stopped) + close(ready) + <-ctx.Done() + wg.Done() + return nil + } + + var runAndCrash = func(ctx context.Context, ready chan<- struct{}, stopped chan<- struct{}) error { + defer close(stopped) + close(ready) + <-time.After(time.Second) + wg.Done() + panic("I'm in panic") + + } + + m := NewServiceManager() + m.AddService(NewGenericService("foo", nil, waitForContext)) + m.AddService(NewGenericService("bar-panic", []string{"foo"}, runAndCrash)) + wg.Add(2) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + cancelOnSigTerm(cancel, ctx) + + ready, stopped := make(chan struct{}), make(chan struct{}) + if err := m.Run(ctx, ready, stopped); err == nil { + t.Errorf("an error from bar-panic was expected %s: %v", m.Name(), err) + } + + if !sigchannel.IsClosed(ready) { + t.Errorf("ready channel not closed after completing service manager") + } + + if !sigchannel.IsClosed(stopped) { + t.Errorf("stopped channel not closed after completing service manager") + } +}