From 3e335e536578b3d50e1d87fecd8db3b0ea746b37 Mon Sep 17 00:00:00 2001 From: Qiao Han Date: Fri, 6 Jan 2023 16:38:03 +0800 Subject: [PATCH 1/5] chore: add docker health checks --- internal/start/start.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/internal/start/start.go b/internal/start/start.go index a7615c6511..3f9905c5e3 100644 --- a/internal/start/start.go +++ b/internal/start/start.go @@ -202,6 +202,12 @@ EOF container.Config{ Image: utils.GotrueImage, Env: env, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:9999/health"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ RestartPolicy: container.RestartPolicy{Name: "always"}, @@ -263,6 +269,12 @@ EOF "/bin/sh", "-c", "/app/bin/migrate && /app/bin/realtime eval 'Realtime.Release.seeds(Realtime.Repo)' && /app/bin/server", }, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "printf", "\\0", ">", "/dev/tcp/localhost/4000"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ RestartPolicy: container.RestartPolicy{Name: "always"}, @@ -321,6 +333,12 @@ EOF "ENABLE_IMAGE_TRANSFORMATION=true", "IMGPROXY_URL=http://" + utils.ImgProxyId + ":5001", }, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:5000/status"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ RestartPolicy: container.RestartPolicy{Name: "always"}, @@ -342,6 +360,12 @@ EOF "IMGPROXY_LOCAL_FILESYSTEM_ROOT=/", "IMGPROXY_USE_ETAG=/", }, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "imgproxy", "health"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ VolumesFrom: []string{utils.StorageId}, @@ -363,6 +387,12 @@ EOF "PG_META_PORT=8080", "PG_META_DB_HOST=" + utils.DbId, }, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "node", "-e", "require('http').get('http://localhost:8080/health', (r) => {if (r.statusCode !== 200) throw new Error(r.statusCode)})"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ RestartPolicy: container.RestartPolicy{Name: "always"}, @@ -389,6 +419,12 @@ EOF "SUPABASE_ANON_KEY=" + utils.AnonKey, "SUPABASE_SERVICE_KEY=" + utils.ServiceRoleKey, }, + Healthcheck: &container.HealthConfig{ + Test: []string{"CMD", "node", "-e", "require('http').get('http://localhost:3000/api/profile', (r) => {if (r.statusCode !== 200) throw new Error(r.statusCode)})"}, + Interval: 2 * time.Second, + Timeout: 2 * time.Second, + Retries: 10, + }, }, container.HostConfig{ PortBindings: nat.PortMap{"3000/tcp": []nat.PortBinding{{HostPort: strconv.FormatUint(uint64(utils.Config.Studio.Port), 10)}}}, From e36baa20bdcfcc7ddcf272cfcbd25ce5ef4cd7ae Mon Sep 17 00:00:00 2001 From: Qiao Han Date: Fri, 6 Jan 2023 16:39:29 +0800 Subject: [PATCH 2/5] fix: wait for started services to be healthy --- internal/db/reset/reset.go | 16 +++++++++----- internal/db/start/start.go | 2 +- internal/start/start.go | 45 +++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/internal/db/reset/reset.go b/internal/db/reset/reset.go index 2524ec0731..e4203e182a 100644 --- a/internal/db/reset/reset.go +++ b/internal/db/reset/reset.go @@ -141,7 +141,7 @@ func RestartDatabase(ctx context.Context) { fmt.Fprintln(os.Stderr, "Failed to restart database:", err) return } - if !WaitForHealthyDatabase(ctx, healthTimeout) { + if !WaitForHealthyService(ctx, utils.DbId, healthTimeout) { fmt.Fprintln(os.Stderr, "Database is not healthy.") return } @@ -151,17 +151,23 @@ func RestartDatabase(ctx context.Context) { } } -func WaitForHealthyDatabase(ctx context.Context, timeout time.Duration) bool { - // Poll for container health status +func RetryWithBackoff(callback func() bool, timeout time.Duration) bool { now := time.Now() expiry := now.Add(timeout) ticker := time.NewTicker(time.Second) defer ticker.Stop() for t := now; t.Before(expiry); t = <-ticker.C { - if resp, err := utils.Docker.ContainerInspect(ctx, utils.DbId); err == nil && - resp.State.Health != nil && resp.State.Health.Status == "healthy" { + if callback() { return true } } return false } + +func WaitForHealthyService(ctx context.Context, container string, timeout time.Duration) bool { + return RetryWithBackoff(func() bool { + // Poll for container health status + resp, err := utils.Docker.ContainerInspect(ctx, container) + return err == nil && resp.State.Health != nil && resp.State.Health.Status == "healthy" + }, timeout) +} diff --git a/internal/db/start/start.go b/internal/db/start/start.go index 73a86c8d77..be93ef80a2 100644 --- a/internal/db/start/start.go +++ b/internal/db/start/start.go @@ -81,7 +81,7 @@ func StartDatabase(ctx context.Context, fsys afero.Fs, w io.Writer, options ...f } func initDatabase(ctx context.Context, fsys afero.Fs, w io.Writer, options ...func(*pgx.ConnConfig)) error { - if !reset.WaitForHealthyDatabase(ctx, 20*time.Second) { + if !reset.WaitForHealthyService(ctx, utils.DbId, 20*time.Second) { fmt.Fprintln(os.Stderr, "Database is not healthy.") } // Initialise globals diff --git a/internal/start/start.go b/internal/start/start.go index 3f9905c5e3..66a5470916 100644 --- a/internal/start/start.go +++ b/internal/start/start.go @@ -6,15 +6,18 @@ import ( _ "embed" "errors" "fmt" + "net/http" "os" "strconv" "strings" "text/template" + "time" "github.com/docker/docker/api/types/container" "github.com/docker/go-connections/nat" "github.com/jackc/pgx/v4" "github.com/spf13/afero" + "github.com/supabase/cli/internal/db/reset" "github.com/supabase/cli/internal/db/start" "github.com/supabase/cli/internal/utils" ) @@ -91,6 +94,7 @@ func run(p utils.Program, ctx context.Context, fsys afero.Fs, excludedContainers } p.Send(utils.StatusMsg("Starting containers...")) + var started []string // Start Kong. if !isContainerExcluded(utils.KongImage, excluded) { @@ -131,6 +135,7 @@ EOF ); err != nil { return err } + started = append(started, utils.KongId) } // Start GoTrue. @@ -216,6 +221,7 @@ EOF ); err != nil { return err } + started = append(started, utils.GotrueId) } // Start Inbucket. @@ -240,6 +246,7 @@ EOF ); err != nil { return err } + started = append(started, utils.InbucketId) } // Start Realtime. @@ -283,6 +290,7 @@ EOF ); err != nil { return err } + started = append(started, utils.RealtimeId) } // Start PostgREST. @@ -306,6 +314,7 @@ EOF ); err != nil { return err } + started = append(started, utils.RestId) } // Start Storage. @@ -347,6 +356,7 @@ EOF ); err != nil { return err } + started = append(started, utils.StorageId) } // Start Storage ImgProxy. @@ -375,6 +385,7 @@ EOF ); err != nil { return err } + started = append(started, utils.ImgProxyId) } // Start pg-meta. @@ -401,6 +412,7 @@ EOF ); err != nil { return err } + started = append(started, utils.PgmetaId) } // Start Studio. @@ -434,9 +446,10 @@ EOF ); err != nil { return err } + started = append(started, utils.StudioId) } - return nil + return waitForServiceReady(ctx, started) } func isContainerExcluded(imageName string, excluded map[string]bool) bool { @@ -454,3 +467,33 @@ func ExcludableContainers() []string { } return names } + +func waitForServiceReady(ctx context.Context, started []string) error { + timeout := 10 * time.Second + for _, container := range started { + var ready bool + if container == utils.RestId { + // PostgREST does not support native health checks + restUrl := fmt.Sprintf("http://localhost:%d/rest/v1/", utils.Config.Api.Port) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, restUrl, nil) + if err != nil { + return err + } + req.Header.Add("apikey", utils.AnonKey) + ready = waitForStatusOK(req, timeout) + } else { + ready = reset.WaitForHealthyService(ctx, container, timeout) + } + if !ready { + fmt.Fprintln(os.Stderr, "Service not healthy:", container) + } + } + return nil +} + +func waitForStatusOK(req *http.Request, timeout time.Duration) bool { + return reset.RetryWithBackoff(func() bool { + resp, err := http.DefaultClient.Do(req) + return err == nil && resp.StatusCode == http.StatusOK + }, timeout) +} From 688d9bfa5c01861bf103957062deb575a0a4f393 Mon Sep 17 00:00:00 2001 From: Qiao Han Date: Fri, 6 Jan 2023 16:47:06 +0800 Subject: [PATCH 3/5] chore: rename retry function --- internal/db/reset/reset.go | 4 ++-- internal/start/start.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/db/reset/reset.go b/internal/db/reset/reset.go index e4203e182a..f0c2d8557e 100644 --- a/internal/db/reset/reset.go +++ b/internal/db/reset/reset.go @@ -151,7 +151,7 @@ func RestartDatabase(ctx context.Context) { } } -func RetryWithBackoff(callback func() bool, timeout time.Duration) bool { +func RetryEverySecond(callback func() bool, timeout time.Duration) bool { now := time.Now() expiry := now.Add(timeout) ticker := time.NewTicker(time.Second) @@ -165,7 +165,7 @@ func RetryWithBackoff(callback func() bool, timeout time.Duration) bool { } func WaitForHealthyService(ctx context.Context, container string, timeout time.Duration) bool { - return RetryWithBackoff(func() bool { + return RetryEverySecond(func() bool { // Poll for container health status resp, err := utils.Docker.ContainerInspect(ctx, container) return err == nil && resp.State.Health != nil && resp.State.Health.Status == "healthy" diff --git a/internal/start/start.go b/internal/start/start.go index 66a5470916..f5fcad4881 100644 --- a/internal/start/start.go +++ b/internal/start/start.go @@ -492,7 +492,7 @@ func waitForServiceReady(ctx context.Context, started []string) error { } func waitForStatusOK(req *http.Request, timeout time.Duration) bool { - return reset.RetryWithBackoff(func() bool { + return reset.RetryEverySecond(func() bool { resp, err := http.DefaultClient.Do(req) return err == nil && resp.StatusCode == http.StatusOK }, timeout) From fabbb03834b870fb85b5a7f86a282ca2517ec366 Mon Sep 17 00:00:00 2001 From: Qiao Han Date: Fri, 6 Jan 2023 17:43:44 +0800 Subject: [PATCH 4/5] chore: check all containers at once --- internal/db/reset/reset.go | 14 +++++++---- internal/start/start.go | 48 +++++++++++++++++++++----------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/internal/db/reset/reset.go b/internal/db/reset/reset.go index f0c2d8557e..a796d58329 100644 --- a/internal/db/reset/reset.go +++ b/internal/db/reset/reset.go @@ -164,10 +164,14 @@ func RetryEverySecond(callback func() bool, timeout time.Duration) bool { return false } +func IsContainerHealthy(ctx context.Context, container string) bool { + resp, err := utils.Docker.ContainerInspect(ctx, container) + return err == nil && resp.State.Health != nil && resp.State.Health.Status == "healthy" +} + func WaitForHealthyService(ctx context.Context, container string, timeout time.Duration) bool { - return RetryEverySecond(func() bool { - // Poll for container health status - resp, err := utils.Docker.ContainerInspect(ctx, container) - return err == nil && resp.State.Health != nil && resp.State.Health.Status == "healthy" - }, timeout) + probe := func() bool { + return IsContainerHealthy(ctx, container) + } + return RetryEverySecond(probe, timeout) } diff --git a/internal/start/start.go b/internal/start/start.go index f5fcad4881..895366d5b8 100644 --- a/internal/start/start.go +++ b/internal/start/start.go @@ -469,31 +469,37 @@ func ExcludableContainers() []string { } func waitForServiceReady(ctx context.Context, started []string) error { - timeout := 10 * time.Second - for _, container := range started { - var ready bool - if container == utils.RestId { - // PostgREST does not support native health checks - restUrl := fmt.Sprintf("http://localhost:%d/rest/v1/", utils.Config.Api.Port) - req, err := http.NewRequestWithContext(ctx, http.MethodHead, restUrl, nil) - if err != nil { - return err + probe := func() bool { + var unhealthy []string + for _, container := range started { + if !isServiceReady(ctx, container) { + unhealthy = append(unhealthy, container) } - req.Header.Add("apikey", utils.AnonKey) - ready = waitForStatusOK(req, timeout) - } else { - ready = reset.WaitForHealthyService(ctx, container, timeout) - } - if !ready { - fmt.Fprintln(os.Stderr, "Service not healthy:", container) } + started = unhealthy + return len(started) == 0 + } + if !reset.RetryEverySecond(probe, 10*time.Second) { + return fmt.Errorf("service not healthy: %v", started) } return nil } -func waitForStatusOK(req *http.Request, timeout time.Duration) bool { - return reset.RetryEverySecond(func() bool { - resp, err := http.DefaultClient.Do(req) - return err == nil && resp.StatusCode == http.StatusOK - }, timeout) +func isServiceReady(ctx context.Context, container string) bool { + if container == utils.RestId { + return IsPostgRESTHealthy(ctx) + } + return reset.IsContainerHealthy(ctx, container) +} + +func IsPostgRESTHealthy(ctx context.Context) bool { + // PostgREST does not support native health checks + restUrl := fmt.Sprintf("http://localhost:%d/rest/v1/", utils.Config.Api.Port) + req, err := http.NewRequestWithContext(ctx, http.MethodHead, restUrl, nil) + if err != nil { + return false + } + req.Header.Add("apikey", utils.AnonKey) + resp, err := http.DefaultClient.Do(req) + return err == nil && resp.StatusCode == http.StatusOK } From ecfa87cc16d1ea2f40de74b0feaa9b484165011b Mon Sep 17 00:00:00 2001 From: Qiao Han Date: Fri, 6 Jan 2023 17:49:53 +0800 Subject: [PATCH 5/5] chore: update tests with health probes --- internal/start/start_test.go | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/internal/start/start_test.go b/internal/start/start_test.go index f12e746cf1..6ab6c245a6 100644 --- a/internal/start/start_test.go +++ b/internal/start/start_test.go @@ -144,12 +144,6 @@ func TestDatabaseStart(t *testing.T) { utils.DbId = "test-postgres" utils.Config.Db.Port = 54322 apitest.MockDockerStart(utils.Docker, imageUrl, utils.DbId) - gock.New(utils.Docker.DaemonHost()). - Get("/v" + utils.Docker.ClientVersion() + "/containers/" + utils.DbId + "/json"). - Reply(http.StatusOK). - JSON(types.ContainerJSON{ContainerJSONBase: &types.ContainerJSONBase{ - State: &types.ContainerState{Health: &types.Health{Status: "healthy"}}, - }}) // Start services utils.KongId = "test-kong" apitest.MockDockerStart(utils.Docker, utils.GetRegistryImageUrl(utils.KongImage), utils.KongId) @@ -168,7 +162,7 @@ func TestDatabaseStart(t *testing.T) { apitest.MockDockerStart(utils.Docker, utils.GetRegistryImageUrl(utils.PostgrestImage), utils.RestId) utils.StorageId = "test-storage" apitest.MockDockerStart(utils.Docker, utils.GetRegistryImageUrl(utils.StorageImage), utils.StorageId) - utils.StudioId = "test-imgproxy" + utils.ImgProxyId = "test-imgproxy" apitest.MockDockerStart(utils.Docker, utils.GetRegistryImageUrl(utils.ImageProxyImage), utils.ImgProxyId) utils.DifferId = "test-differ" apitest.MockDockerStart(utils.Docker, utils.GetRegistryImageUrl(utils.DifferImage), utils.DifferId) @@ -185,6 +179,22 @@ func TestDatabaseStart(t *testing.T) { Reply("CREATE SCHEMA"). Query(utils.InitialSchemaSql). Reply("CREATE SCHEMA") + // Setup health probes + started := []string{ + utils.DbId, utils.KongId, utils.GotrueId, utils.InbucketId, utils.RealtimeId, + utils.StorageId, utils.ImgProxyId, utils.PgmetaId, utils.StudioId, + } + for _, container := range started { + gock.New(utils.Docker.DaemonHost()). + Get("/v" + utils.Docker.ClientVersion() + "/containers/" + container + "/json"). + Reply(http.StatusOK). + JSON(types.ContainerJSON{ContainerJSONBase: &types.ContainerJSONBase{ + State: &types.ContainerState{Health: &types.Health{Status: "healthy"}}, + }}) + } + gock.New("localhost"). + Head("/rest/v1/"). + Reply(http.StatusOK) // Run test err := utils.RunProgram(context.Background(), func(p utils.Program, ctx context.Context) error { return run(p, context.Background(), fsys, []string{}, conn.Intercept)