From 9af59fd772adf0715ed04467f0b84e5e0583304c Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 16 Mar 2017 14:22:56 +0900 Subject: [PATCH] Reduce downtime after LB backend removal When an LB backend is removed and the corresponding ipvs destination is deleted, ipvs maintains dangling connections pointing to the destination if the connections have existed since before destination removal. Then packets going to this connection will be dropped in kernel because there is no destination with this connection. This continues until the connection expires (e.g. 60 seconds for SYN_RECV (tcp initial) state). This in some cases causes TCP connection timeout if the connection is initiated between container failure and ipvs destination deletion. ipvs provides a parameter "expire_nodest_conn" to reduce the downtime. When enabling the option, the staled connection immediately expires on receiving a packet on the connection. Although this option is not suitable if flapping can happen, I think the user of LB should ensure it not to happen by taking enough time before determining the container is unreachable. Reference: https://www.kernel.org/doc/Documentation/networking/ipvs-sysctl.txt Signed-off-by: Toshiaki Makita --- service_linux.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/service_linux.go b/service_linux.go index 2bcb6de5eb..82e7ca8379 100644 --- a/service_linux.go +++ b/service_linux.go @@ -644,6 +644,12 @@ func fwMarker() { logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err) os.Exit(8) } + + err = ioutil.WriteFile("/proc/sys/net/ipv4/vs/expire_nodest_conn", []byte{'1', '\n'}, 0644) + if err != nil { + logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/expire_nodest_conn: %v", err) + os.Exit(10) + } } }