diff --git a/images/router/haproxy/reload-haproxy b/images/router/haproxy/reload-haproxy index 7e6855156..fed5478f8 100755 --- a/images/router/haproxy/reload-haproxy +++ b/images/router/haproxy/reload-haproxy @@ -66,6 +66,10 @@ function haproxyHealthCheck() { } +if [[ -e ${TERMINATE_MARKER:-'/var/lib/haproxy/run/terminating'} ]]; then + exit 0 +fi + old_pids=$(pidof haproxy) reload_status=0 diff --git a/images/router/haproxy/shutdown-haproxy b/images/router/haproxy/shutdown-haproxy new file mode 100755 index 000000000..38c292be3 --- /dev/null +++ b/images/router/haproxy/shutdown-haproxy @@ -0,0 +1,49 @@ +#!/bin/bash + +set -euo pipefail + +# DRAIN_MARKER is the path to a file that indicates that proxies or load +# balancers should be draining connections to the router. The router's health +# check for load balancers checks for this file, and if the file is present, the +# health check fails in order to cause load balancers to drain connections to +# the router. +: ${DRAIN_MARKER:='/var/lib/haproxy/run/draining'} + +# DRAIN_PERIOD is the number of seconds that this script will wait for +# connections to drain before it sends the SIGUSR1 signal to terminate HAProxy +# processes. +: ${DRAIN_PERIOD:=90} + +# TERMINATE_MARKER is the path to a file that indicates that this router is +# terminating. The reload-haproxy script exits immediately if this marker is +# present. +: ${TERMINATE_MARKER:='/var/lib/haproxy/run/terminating'} + +# GRACE_PERIOD is the number of seconds that this script will wait for HAProxy +# processes to terminate after it sends the SIGUSR1 signal, before it sends +# SIGTERM to any remaining HAProxy processes. +: ${GRACE_PERIOD:=30} + +echo " - Setting drain marker..." +: > "$DRAIN_MARKER" + +echo " - Sleeping $DRAIN_PERIOD seconds to let connections drain..." +sleep "$DRAIN_PERIOD" + +echo " - Sending SIGUSR1 to HAProxy processes and waiting up to $GRACE_PERIOD seconds for processes to terminate..." +stop=$((SECONDS + GRACE_PERIOD)) +while pkill -USR1 haproxy; rc=$?; [[ $rc -ne 1 ]] && ((SECONDS < stop)); do + sleep 1 +done + +if [[ "$rc" -eq 1 ]]; then + echo ' - Done. All processes have exited.' + exit 0 +fi + +: > "$TERMINATE_MARKER" +echo ' - Sending SIGTERM to HAProxy processes...' +while pkill -TERM haproxy; [[ $? -ne 1 ]]; do + sleep 1 +done +echo ' - Done. All processes have been terminated.' diff --git a/pkg/cmd/infra/router/template.go b/pkg/cmd/infra/router/template.go index 6882c5fd4..b5f3f3857 100644 --- a/pkg/cmd/infra/router/template.go +++ b/pkg/cmd/infra/router/template.go @@ -374,10 +374,16 @@ func (o *TemplateRouterOptions) Run() error { return err } checkController := metrics.ControllerLive() - liveChecks := []healthz.HealthzChecker{checkController} + podLiveChecks := []healthz.HealthzChecker{checkController} if !(isTrue(env("ROUTER_BIND_PORTS_BEFORE_SYNC", ""))) { - liveChecks = append(liveChecks, checkBackend) + podLiveChecks = append(podLiveChecks, checkBackend) } + podReadyChecks := []healthz.HealthzChecker{ + checkBackend, + checkSync, + } + checkDraining := metrics.IsDraining() + lbReadyChecks := append(podReadyChecks, checkDraining) kubeconfig, _, err := o.Config.KubeConfig() if err != nil { @@ -420,8 +426,9 @@ func (o *TemplateRouterOptions) Run() error { Resource: "routers", Name: o.RouterName, }, - LiveChecks: liveChecks, - ReadyChecks: []healthz.HealthzChecker{checkBackend, checkSync}, + LBReadyChecks: lbReadyChecks, + PodLiveChecks: podLiveChecks, + PodReadyChecks: podReadyChecks, } if certFile := env("ROUTER_METRICS_TLS_CERT_FILE", ""); len(certFile) > 0 { certificate, err := tls.LoadX509KeyPair(certFile, env("ROUTER_METRICS_TLS_KEY_FILE", "")) diff --git a/pkg/router/metrics/health.go b/pkg/router/metrics/health.go index 9f5f28cff..ce215d1f4 100644 --- a/pkg/router/metrics/health.go +++ b/pkg/router/metrics/health.go @@ -8,6 +8,7 @@ import ( "net" "net/http" "net/url" + "os" "time" "github.com/golang/glog" @@ -53,6 +54,20 @@ func HasSynced(routerPtr **templateplugin.TemplatePlugin) (healthz.HealthzChecke }), nil } +// IsDraining returns a healthz check that fails if the "draining" marker is +// present, indicating that load balancers should be draining connections to the +// router. +func IsDraining() healthz.HealthzChecker { + const drainMarker string = "/var/lib/haproxy/run/draining" + + return healthz.NamedCheck("is-terminating", func(r *http.Request) error { + if _, err := os.Stat(drainMarker); os.IsNotExist(err) { + return nil + } + return fmt.Errorf("Terminating") + }) +} + func ControllerLive() healthz.HealthzChecker { return healthz.NamedCheck("controller", func(r *http.Request) error { return nil diff --git a/pkg/router/metrics/metrics.go b/pkg/router/metrics/metrics.go index f9b72e79d..918859866 100644 --- a/pkg/router/metrics/metrics.go +++ b/pkg/router/metrics/metrics.go @@ -31,14 +31,16 @@ type Listener struct { Authorizer authorizer.Authorizer Record authorizer.AttributesRecord - LiveChecks []healthz.HealthzChecker - ReadyChecks []healthz.HealthzChecker + LBReadyChecks []healthz.HealthzChecker + PodLiveChecks []healthz.HealthzChecker + PodReadyChecks []healthz.HealthzChecker } func (l Listener) handler() http.Handler { mux := http.NewServeMux() - healthz.InstallHandler(mux, l.LiveChecks...) - healthz.InstallPathHandler(mux, "/healthz/ready", l.ReadyChecks...) + healthz.InstallHandler(mux, l.LBReadyChecks...) + healthz.InstallPathHandler(mux, "/healthz/live", l.PodLiveChecks...) + healthz.InstallPathHandler(mux, "/healthz/ready", l.PodReadyChecks...) if l.Authenticator != nil { protected := http.NewServeMux()