diff --git a/internal/hcs/errors.go b/internal/hcs/errors.go index 644f0ab711..08a6631451 100644 --- a/internal/hcs/errors.go +++ b/internal/hcs/errors.go @@ -78,6 +78,9 @@ var ( // ErrNotSupported is an error encountered when hcs doesn't support the request ErrPlatformNotSupported = errors.New("unsupported platform request") + + // ErrProcessAlreadyStopped is returned by hcs if the process we're trying to kill has already been stopped. + ErrProcessAlreadyStopped = syscall.Errno(0x8037011f) ) type ErrorEvent struct { @@ -281,6 +284,7 @@ func IsTimeout(err error) bool { func IsAlreadyStopped(err error) bool { err = getInnerError(err) return err == ErrVmcomputeAlreadyStopped || + err == ErrProcessAlreadyStopped || err == ErrElementNotFound } diff --git a/internal/hcs/process.go b/internal/hcs/process.go index 3f97bb5e8c..f4605922ab 100644 --- a/internal/hcs/process.go +++ b/internal/hcs/process.go @@ -18,16 +18,17 @@ import ( // ContainerError is an error encountered in HCS type Process struct { - handleLock sync.RWMutex - handle vmcompute.HcsProcess - processID int - system *System - hasCachedStdio bool - stdioLock sync.Mutex - stdin io.WriteCloser - stdout io.ReadCloser - stderr io.ReadCloser - callbackNumber uintptr + handleLock sync.RWMutex + handle vmcompute.HcsProcess + processID int + system *System + hasCachedStdio bool + stdioLock sync.Mutex + stdin io.WriteCloser + stdout io.ReadCloser + stderr io.ReadCloser + callbackNumber uintptr + killSignalDelivered bool closedWaitOnce sync.Once waitBlock chan struct{} @@ -151,24 +152,45 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } - resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle) - if err != nil && errors.Is(err, os.ErrPermission) { - // HcsTerminateProcess ends up calling TerminateProcess in the context - // of a container. According to the TerminateProcess documentation: - // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-terminateprocess#remarks - // After a process has terminated, call to TerminateProcess with open - // handles to the process fails with ERROR_ACCESS_DENIED (5) error code. - // It's safe to ignore this error here. HCS should always have permissions - // to kill processes inside any container. So an ERROR_ACCESS_DENIED - // is unlikely to be anything else than what the ending remarks in the - // documentation states. + if process.killSignalDelivered { + // A kill signal has already been sent to this process. Sending a second + // one offers no real benefit, as processes cannot stop themselves from + // being terminated, once a TerminateProcess has been issued. Sending a + // second kill may result in a number of errors (two of which detailed bellow) + // and which we can avoid handling. return true, nil } + + resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle) + if err != nil { + // We still need to check these two cases, as processes may still be killed by an + // external actor (human operator, OOM, random script etc). + if errors.Is(err, os.ErrPermission) || IsAlreadyStopped(err) { + // There are two cases where it should be safe to ignore an error returned + // by HcsTerminateProcess. The first one is cause by the fact that + // HcsTerminateProcess ends up calling TerminateProcess in the context + // of a container. According to the TerminateProcess documentation: + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-terminateprocess#remarks + // After a process has terminated, call to TerminateProcess with open + // handles to the process fails with ERROR_ACCESS_DENIED (5) error code. + // It's safe to ignore this error here. HCS should always have permissions + // to kill processes inside any container. So an ERROR_ACCESS_DENIED + // is unlikely to be anything else than what the ending remarks in the + // documentation states. + // + // The second case is generated by hcs itself, if for any reason HcsTerminateProcess + // is called twice in a very short amount of time. In such cases, hcs may return + // HCS_E_PROCESS_ALREADY_STOPPED. + return true, nil + } + } events := processHcsResult(ctx, resultJSON) delivered, err := process.processSignalResult(ctx, err) if err != nil { err = makeProcessError(process, operation, err, events) } + + process.killSignalDelivered = delivered return delivered, err } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/errors.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/errors.go index 644f0ab711..08a6631451 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/errors.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/errors.go @@ -78,6 +78,9 @@ var ( // ErrNotSupported is an error encountered when hcs doesn't support the request ErrPlatformNotSupported = errors.New("unsupported platform request") + + // ErrProcessAlreadyStopped is returned by hcs if the process we're trying to kill has already been stopped. + ErrProcessAlreadyStopped = syscall.Errno(0x8037011f) ) type ErrorEvent struct { @@ -281,6 +284,7 @@ func IsTimeout(err error) bool { func IsAlreadyStopped(err error) bool { err = getInnerError(err) return err == ErrVmcomputeAlreadyStopped || + err == ErrProcessAlreadyStopped || err == ErrElementNotFound } diff --git a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go index 3f97bb5e8c..f4605922ab 100644 --- a/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go +++ b/test/vendor/github.com/Microsoft/hcsshim/internal/hcs/process.go @@ -18,16 +18,17 @@ import ( // ContainerError is an error encountered in HCS type Process struct { - handleLock sync.RWMutex - handle vmcompute.HcsProcess - processID int - system *System - hasCachedStdio bool - stdioLock sync.Mutex - stdin io.WriteCloser - stdout io.ReadCloser - stderr io.ReadCloser - callbackNumber uintptr + handleLock sync.RWMutex + handle vmcompute.HcsProcess + processID int + system *System + hasCachedStdio bool + stdioLock sync.Mutex + stdin io.WriteCloser + stdout io.ReadCloser + stderr io.ReadCloser + callbackNumber uintptr + killSignalDelivered bool closedWaitOnce sync.Once waitBlock chan struct{} @@ -151,24 +152,45 @@ func (process *Process) Kill(ctx context.Context) (bool, error) { return false, makeProcessError(process, operation, ErrAlreadyClosed, nil) } - resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle) - if err != nil && errors.Is(err, os.ErrPermission) { - // HcsTerminateProcess ends up calling TerminateProcess in the context - // of a container. According to the TerminateProcess documentation: - // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-terminateprocess#remarks - // After a process has terminated, call to TerminateProcess with open - // handles to the process fails with ERROR_ACCESS_DENIED (5) error code. - // It's safe to ignore this error here. HCS should always have permissions - // to kill processes inside any container. So an ERROR_ACCESS_DENIED - // is unlikely to be anything else than what the ending remarks in the - // documentation states. + if process.killSignalDelivered { + // A kill signal has already been sent to this process. Sending a second + // one offers no real benefit, as processes cannot stop themselves from + // being terminated, once a TerminateProcess has been issued. Sending a + // second kill may result in a number of errors (two of which detailed bellow) + // and which we can avoid handling. return true, nil } + + resultJSON, err := vmcompute.HcsTerminateProcess(ctx, process.handle) + if err != nil { + // We still need to check these two cases, as processes may still be killed by an + // external actor (human operator, OOM, random script etc). + if errors.Is(err, os.ErrPermission) || IsAlreadyStopped(err) { + // There are two cases where it should be safe to ignore an error returned + // by HcsTerminateProcess. The first one is cause by the fact that + // HcsTerminateProcess ends up calling TerminateProcess in the context + // of a container. According to the TerminateProcess documentation: + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-terminateprocess#remarks + // After a process has terminated, call to TerminateProcess with open + // handles to the process fails with ERROR_ACCESS_DENIED (5) error code. + // It's safe to ignore this error here. HCS should always have permissions + // to kill processes inside any container. So an ERROR_ACCESS_DENIED + // is unlikely to be anything else than what the ending remarks in the + // documentation states. + // + // The second case is generated by hcs itself, if for any reason HcsTerminateProcess + // is called twice in a very short amount of time. In such cases, hcs may return + // HCS_E_PROCESS_ALREADY_STOPPED. + return true, nil + } + } events := processHcsResult(ctx, resultJSON) delivered, err := process.processSignalResult(ctx, err) if err != nil { err = makeProcessError(process, operation, err, events) } + + process.killSignalDelivered = delivered return delivered, err }