From 0f18fd1037e783e9b0d01e5a801f7675b6e7a2cc Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Fri, 5 Jan 2018 14:51:43 -0800 Subject: [PATCH 1/2] Remove watchMiss for swarm mode Swarm mode does not really have anymore a use for the watchMiss. Peer entries are configured at configuration time. If the gcthresh denies the insertion the peerAdd will fail. Signed-off-by: Flavio Crisciani (cherry picked from commit 3d9a808983baf4ab3047210e161e7dcbeeac1ab6) --- drivers/overlay/ov_network.go | 37 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 11314170b1..47e3d78848 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -13,7 +13,6 @@ import ( "strings" "sync" "syscall" - "time" "github.com/docker/docker/pkg/reexec" "github.com/docker/libnetwork/datastore" @@ -693,6 +692,12 @@ func (n *network) initSandbox(restore bool) error { n.driver.initSandboxPeerDB(n.id) } + // If we are in swarm mode, we don't need anymore the watchMiss routine. + // This will save 1 thread and 1 netlink socket per network + if !n.driver.isSerfAlive() { + return nil + } + var nlSock *nl.NetlinkSocket sbox.InvokeFunc(func() { nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) @@ -716,7 +721,6 @@ func (n *network) initSandbox(restore bool) error { } func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { - t := time.Now() for { msgs, err := nlSock.Receive() if err != nil { @@ -772,30 +776,13 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - if n.driver.isSerfAlive() { - logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) - mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) - if err != nil { - logrus.Errorf("could not resolve peer %q: %v", ip, err) - continue - } - n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) - } else if l3Miss && time.Since(t) > time.Second { - // All the local peers will trigger a miss notification but this one is expected and the local container will reply - // autonomously to the ARP request - // In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following - // extra logs that will inform of the possible issue. - // Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html: - // Entries which are marked as permanent are never deleted by the garbage-collector. - // The time limit here is to guarantee that the dbSearch is not - // done too frequently causing a stall of the peerDB operations. - pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip) - if err == nil && !pEntry.isLocal { - t = time.Now() - logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v", - neigh, l3Miss, l2Miss, *pKey, *pEntry, err) - } + logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) + mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) + if err != nil { + logrus.Errorf("could not resolve peer %q: %v", ip, err) + continue } + n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) } } } From a066a78bcddd1d933e02cb16f75161eb4b76ac2c Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Wed, 10 Jan 2018 13:49:19 -0800 Subject: [PATCH 2/2] Fix watchMiss thread context The netlink deserialize is fetching information from the link. This require the go routine to be in the correct namespace to succeed Signed-off-by: Flavio Crisciani (cherry picked from commit 1717eee2221383427753ea71dc1c301458838be3) --- drivers/overlay/ov_network.go | 20 ++++++++++++++++++-- drivers/overlay/overlay_test.go | 4 +++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 47e3d78848..d33939beeb 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -711,7 +711,7 @@ func (n *network) initSandbox(restore bool) error { n.setNetlinkSocket(nlSock) if err == nil { - go n.watchMiss(nlSock) + go n.watchMiss(nlSock, key) } else { logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", n.id, sbox.Key(), err) @@ -720,7 +720,23 @@ func (n *network) initSandbox(restore bool) error { return nil } -func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { +func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { + // With the new version of the netlink library the deserialize function makes + // requests about the interface of the netlink message. This can succeed only + // if this go routine is in the target namespace. For this reason following we + // lock the thread on that namespace + runtime.LockOSThread() + defer runtime.UnlockOSThread() + newNs, err := netns.GetFromPath(nsPath) + if err != nil { + logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) + return + } + defer newNs.Close() + if err = netns.Set(newNs); err != nil { + logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) + return + } for { msgs, err := nlSock.Receive() if err != nil { diff --git a/drivers/overlay/overlay_test.go b/drivers/overlay/overlay_test.go index 75c89da6bb..7e6fc169d7 100644 --- a/drivers/overlay/overlay_test.go +++ b/drivers/overlay/overlay_test.go @@ -2,7 +2,9 @@ package overlay import ( "context" + "fmt" "net" + "os" "syscall" "testing" "time" @@ -158,7 +160,7 @@ func TestNetlinkSocket(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() go func() { - n.watchMiss(nlSock) + n.watchMiss(nlSock, fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid())) ch <- nil }() time.Sleep(5 * time.Second)