From 139103077d9fb8ac64bc764b89dda83405a0c631 Mon Sep 17 00:00:00 2001 From: Richard Huang Date: Fri, 16 Jul 2021 11:13:25 -0700 Subject: [PATCH] v23/context: fix goroutine leak in WithRootCancel Whenever a child context is created via `WithRootCancel`, a goroutine spawns to gracefully handle closing the child context whenever the root context gets canceled. However, the current implementation leaks goroutines whenever the child context exits before the root context exits. This pull request looks to fix the problem by exiting the goroutine whenever the child context is done. See `TestRootCancelGoroutineLeak` for the reproduction use case and test that demonstrates the leak. This is also easily visible via `pprof` using the `goroutine` module. --- v23/context/context.go | 8 ++++++-- v23/context/context_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/v23/context/context.go b/v23/context/context.go index 34cad72e8..2c3618598 100644 --- a/v23/context/context.go +++ b/v23/context/context.go @@ -303,8 +303,12 @@ func WithRootCancel(parent *T) (*T, CancelFunc) { // Forward the cancelation from the root context to the newly // created context. go func() { - <-rootCtx.Done() - cancel() + select { + case <-rootCtx.Done(): + cancel() + case <-ctx.Done(): + cancel() + } }() } else if atomic.AddInt32(&nRootCancelWarning, 1) < 3 { vlog.Errorf("context.WithRootCancel: context %+v is not derived from root v23 context.\n", parent) diff --git a/v23/context/context_test.go b/v23/context/context_test.go index 02fbb0b18..167f9d2d6 100644 --- a/v23/context/context_test.go +++ b/v23/context/context_test.go @@ -9,6 +9,8 @@ import ( gocontext "context" "fmt" "os" + "runtime" + "strings" "sync" "testing" "time" @@ -320,6 +322,36 @@ func TestRootCancelChain(t *testing.T) { } } +func TestRootCancelGoroutineLeak(t *testing.T) { + rootCtx, rootcancel := context.RootContext() + const iterations = 1024 + for i := 0; i != iterations; i++ { + _, cancel := context.WithRootCancel(rootCtx) + cancel() + } + + // Arbitrary threshold to wait for the goroutines in the created contexts + // above to exit. This threshold was arbitrarily created after running + // `go test -count=10000 -run TestRootCancelGoroutineLeak$` and verifying + // that the tests did not fail flakily. + const waitThreshold = 8*time.Millisecond + time.Sleep(waitThreshold) + + // Verify that goroutines no longer exist in the runtime stack. + buf := make([]byte, 2<<20) + buf = buf[:runtime.Stack(buf, true)] + count := 0 + for _, g := range strings.Split(string(buf), "\n\n") { + if strings.Contains(g, "v.io/v23/context.WithRootCancel.func1") { + count++ + } + } + if count != 0 { + t.Errorf("expected 0 but got %d: goroutine leaking in WithRootCancel", count) + } + rootcancel() +} + func TestRootCancel_GoContext(t *testing.T) { root, rootcancel := context.RootContext()