Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion api/func-e_run_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2025 Tetrate
// Copyright func-e contributors
// SPDX-License-Identifier: Apache-2.0

package api
Expand All @@ -25,3 +25,8 @@ func TestRun_InvalidConfig(t *testing.T) {
func TestRun_StaticFile(t *testing.T) {
e2e.TestRun_StaticFile(context.Background(), t, fakeFuncEFactory{})
}

func TestRun_CtrlCs(t *testing.T) {
// This doesn't call ctrl-c, rather cancels the context multiple times
e2e.TestRun_CtrlCs(context.Background(), t, fakeFuncEFactory{})
}
8 changes: 5 additions & 3 deletions api/func-e_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2025 Tetrate
// Copyright func-e contributors
// SPDX-License-Identifier: Apache-2.0

package api
Expand Down Expand Up @@ -48,8 +48,10 @@ type fakeFuncE struct {

// Interrupt cancels the context created in Run as we don't want to actually interrupt the calling test!
func (f *fakeFuncE) Interrupt(context.Context) error {
f.cancelFunc()
f.cancelFunc = nil
if f.cancelFunc != nil {
f.cancelFunc()
// Don't set to nil in case interrupt is called multiple times (ctrl+c twice)
}
return nil
}

Expand Down
10 changes: 9 additions & 1 deletion e2e/func-e_run_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2025 Tetrate
// Copyright func-e contributors
// SPDX-License-Identifier: Apache-2.0

package e2e
Expand All @@ -25,3 +25,11 @@ func TestRun_InvalidConfig(t *testing.T) {
func TestRun_StaticFile(t *testing.T) {
e2e.TestRun_StaticFile(context.Background(), t, funcEFactory{})
}

func TestRun_CtrlCs(t *testing.T) {
e2e.TestRun_CtrlCs(context.Background(), t, funcEFactory{})
}

func TestRun_Kill9(t *testing.T) {
e2e.TestRun_Kill9(context.Background(), t, funcEFactory{})
}
9 changes: 6 additions & 3 deletions internal/envoy/proc_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ import (

// interrupt attempts to interrupt the process. It doesn't necessarily kill it.
func interrupt(p *os.Process) error {
// Send SIGINT to the child PID directly
if err := p.Signal(syscall.SIGINT); err != nil && !errors.Is(err, os.ErrProcessDone) {
return err
// Send SIGINT to the process group (negative PID) to ensure all child processes receive it
if err := syscall.Kill(-p.Pid, syscall.SIGINT); err != nil && !errors.Is(err, os.ErrProcessDone) {
// Fallback to sending to the process directly if group signal fails
if err := p.Signal(syscall.SIGINT); err != nil && !errors.Is(err, os.ErrProcessDone) {
return err
}
}
return nil
}
Expand Down
38 changes: 30 additions & 8 deletions internal/envoy/shutdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package envoy

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
Expand All @@ -15,13 +16,13 @@ import (
)

// RegisterShutdownHook registers the passed functions to be run after Envoy has started
// and just before func-e instructs Envoy to exit
// and just before func-e instructs Envoy to exit.
func (r *Runtime) RegisterShutdownHook(f func(context.Context) error) {
r.shutdownHooks = append(r.shutdownHooks, f)
}

func (r *Runtime) handleShutdown() {
// Ensure the SIGINT forwards to Envoy even if a shutdown hook panics
// Ensure the SIGINT forwards to Envoy even if a shutdown hook panics.
defer func() {
r.interruptEnvoy()
if r.cmd != nil && r.cmd.Process != nil {
Expand All @@ -30,23 +31,42 @@ func (r *Runtime) handleShutdown() {
}()

deadline := time.Now().Add(shutdownTimeout)
timeout, cancel := context.WithDeadline(context.Background(), deadline)
ctx, cancel := context.WithDeadline(context.Background(), deadline)
defer cancel()

fmt.Fprintf(r.Out, "invoking shutdown hooks with deadline %s\n", deadline.Format(dateFormat)) //nolint:errcheck

// Run each hook in parallel, logging each error
var (
mu sync.Mutex
errs []error
)

// Run each hook in parallel, collecting errors.
var wg sync.WaitGroup
wg.Add(len(r.shutdownHooks))
for _, f := range r.shutdownHooks {
go func(f func(context.Context) error) {
defer wg.Done()
if err := f(timeout); err != nil {
fmt.Fprintf(r.Out, "failed shutdown hook: %s\n", err) //nolint:errcheck
defer func() {
if p := recover(); p != nil {
mu.Lock()
errs = append(errs, fmt.Errorf("panic in shutdown hook: %v", p))
mu.Unlock()
}
}()
if err := f(ctx); err != nil && !errors.Is(err, context.Canceled) {
// Don't collect if context was cancelled (ctrl+c twice scenario).
mu.Lock()
errs = append(errs, fmt.Errorf("failed shutdown hook: %w", err))
mu.Unlock()
}
}(f)
}
wg.Wait()

if len(errs) > 0 {
fmt.Fprintf(r.Out, "shutdown errors: %v\n", errors.Join(errs...)) //nolint:errcheck
}
}

func (r *Runtime) interruptEnvoy() {
Expand All @@ -58,10 +78,12 @@ func (r *Runtime) interruptEnvoy() {
func (r *Runtime) archiveRunDir() error {
// Ensure logs are closed before we try to archive them.
if r.OutFile != nil {
r.OutFile.Close() //nolint
_ = r.OutFile.Sync()
_ = r.OutFile.Close()
}
if r.ErrFile != nil {
r.ErrFile.Close() //nolint
_ = r.ErrFile.Sync()
_ = r.ErrFile.Close()
}
if r.o.DontArchiveRunDir {
return nil
Expand Down
199 changes: 199 additions & 0 deletions internal/envoy/shutdown_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright func-e contributors
// SPDX-License-Identifier: Apache-2.0

package envoy

import (
"context"
"fmt"
"os"
"os/exec"
"syscall"
"testing"
"time"

"github.com/stretchr/testify/require"
)

var (
// testProcessTimeout is how long we wait for a process to exit in tests.
// Set to 40% of shutdownTimeout since the process should die quickly after
// handleShutdown completes (which already waited up to shutdownTimeout).
testProcessTimeout = shutdownTimeout * 2 / 5

// testSlowHookDuration simulates a hook that would exceed shutdownTimeout.
// Set to 6x shutdownTimeout to ensure it clearly exceeds the timeout.
testSlowHookDuration = shutdownTimeout * 6

// testShutdownBuffer is extra time beyond shutdownTimeout to account for
// goroutine scheduling and process termination overhead.
// Set to 40% of shutdownTimeout to be proportional but reasonable.
testShutdownBuffer = shutdownTimeout * 2 / 5
)

// TestHandleShutdown_PanicInHook tests that a panic in a shutdown hook
// doesn't prevent Envoy from being terminated
func TestHandleShutdown_PanicInHook(t *testing.T) {
// Create a test command that sleeps (simulating Envoy)
cmd := exec.Command("sleep", fmt.Sprintf("%d", int(testSlowHookDuration.Seconds())))
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}

require.NoError(t, cmd.Start())

r := &Runtime{
cmd: cmd,
Out: os.Stdout,
logf: func(format string, args ...interface{}) {
t.Logf(format, args...)
},
}

// Register a hook that panics
r.RegisterShutdownHook(func(ctx context.Context) error {
panic("shutdown hook panic!")
})

// Register another hook to verify execution continues
hookExecuted := false
r.RegisterShutdownHook(func(ctx context.Context) error {
hookExecuted = true
return nil
})

// Call handleShutdown - it should recover from panic and still kill the process
r.handleShutdown()

// Wait for process to exit
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()

select {
case <-done:
// Process exited as expected
case <-time.After(testProcessTimeout):
t.Fatal("Process didn't exit within timeout")
}

require.True(t, hookExecuted, "Other hooks should still execute despite panic")
}

// TestHandleShutdown_MultipleHooksPanic tests that multiple panicking hooks
// don't prevent termination
func TestHandleShutdown_MultipleHooksPanic(t *testing.T) {
cmd := exec.Command("sleep", fmt.Sprintf("%d", int(testSlowHookDuration.Seconds())))
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}

require.NoError(t, cmd.Start())

r := &Runtime{
cmd: cmd,
Out: os.Stdout,
logf: func(format string, args ...interface{}) {
t.Logf(format, args...)
},
}

// Register multiple panicking hooks
for i := 0; i < 3; i++ {
idx := i
r.RegisterShutdownHook(func(ctx context.Context) error {
panic(fmt.Sprintf("hook %d panic!", idx))
})
}

// Call handleShutdown
r.handleShutdown()

// Wait for process to exit
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()

select {
case <-done:
// Process exited as expected
case <-time.After(testProcessTimeout):
t.Fatal("Process didn't exit within timeout despite multiple panics")
}
}

// TestHandleShutdown_SlowHook tests that slow hooks don't prevent termination
func TestHandleShutdown_SlowHook(t *testing.T) {
cmd := exec.Command("sleep", fmt.Sprintf("%d", int(testSlowHookDuration.Seconds())))
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}

require.NoError(t, cmd.Start())

r := &Runtime{
cmd: cmd,
Out: os.Stdout,
logf: func(format string, args ...interface{}) {
t.Logf(format, args...)
},
}

// Register a slow hook
r.RegisterShutdownHook(func(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(testSlowHookDuration):
return nil
}
})

start := time.Now()
r.handleShutdown()
elapsed := time.Since(start)

// Should complete within shutdown timeout + buffer for overhead
require.Less(t, elapsed, shutdownTimeout+testShutdownBuffer, "Shutdown should respect timeout")

// Wait for process to exit
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()

select {
case <-done:
// Process exited as expected
case <-time.After(testProcessTimeout):
t.Fatal("Process didn't exit within timeout even with slow hooks")
}
}

// TestEnsureProcessDone tests that EnsureProcessDone kills the process
func TestEnsureProcessDone(t *testing.T) {
cmd := exec.Command("sleep", fmt.Sprintf("%d", int(testSlowHookDuration.Seconds())))
require.NoError(t, cmd.Start())

// Ensure process is running
require.NoError(t, cmd.Process.Signal(syscall.Signal(0)))

// Call ensureProcessDone
err := ensureProcessDone(cmd.Process)
require.NoError(t, err)

// Wait for process to exit
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()

select {
case <-done:
// Process exited as expected
case <-time.After(testProcessTimeout / 2):
// Use half of testProcessTimeout since EnsureProcessDone sends SIGKILL
// which should terminate the process immediately
t.Fatal("Process didn't exit after EnsureProcessDone")
}
}
Loading
Loading