diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 11314170b1..d33939beeb 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -13,7 +13,6 @@ import ( "strings" "sync" "syscall" - "time" "github.com/docker/docker/pkg/reexec" "github.com/docker/libnetwork/datastore" @@ -693,6 +692,12 @@ func (n *network) initSandbox(restore bool) error { n.driver.initSandboxPeerDB(n.id) } + // If we are in swarm mode, we don't need anymore the watchMiss routine. + // This will save 1 thread and 1 netlink socket per network + if !n.driver.isSerfAlive() { + return nil + } + var nlSock *nl.NetlinkSocket sbox.InvokeFunc(func() { nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH) @@ -706,7 +711,7 @@ func (n *network) initSandbox(restore bool) error { n.setNetlinkSocket(nlSock) if err == nil { - go n.watchMiss(nlSock) + go n.watchMiss(nlSock, key) } else { logrus.Errorf("failed to subscribe to neighbor group netlink messages for overlay network %s in sbox %s: %v", n.id, sbox.Key(), err) @@ -715,8 +720,23 @@ func (n *network) initSandbox(restore bool) error { return nil } -func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { - t := time.Now() +func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) { + // With the new version of the netlink library the deserialize function makes + // requests about the interface of the netlink message. This can succeed only + // if this go routine is in the target namespace. For this reason following we + // lock the thread on that namespace + runtime.LockOSThread() + defer runtime.UnlockOSThread() + newNs, err := netns.GetFromPath(nsPath) + if err != nil { + logrus.WithError(err).Errorf("failed to get the namespace %s", nsPath) + return + } + defer newNs.Close() + if err = netns.Set(newNs); err != nil { + logrus.WithError(err).Errorf("failed to enter the namespace %s", nsPath) + return + } for { msgs, err := nlSock.Receive() if err != nil { @@ -772,30 +792,13 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - if n.driver.isSerfAlive() { - logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) - mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) - if err != nil { - logrus.Errorf("could not resolve peer %q: %v", ip, err) - continue - } - n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) - } else if l3Miss && time.Since(t) > time.Second { - // All the local peers will trigger a miss notification but this one is expected and the local container will reply - // autonomously to the ARP request - // In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following - // extra logs that will inform of the possible issue. - // Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html: - // Entries which are marked as permanent are never deleted by the garbage-collector. - // The time limit here is to guarantee that the dbSearch is not - // done too frequently causing a stall of the peerDB operations. - pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip) - if err == nil && !pEntry.isLocal { - t = time.Now() - logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v", - neigh, l3Miss, l2Miss, *pKey, *pEntry, err) - } + logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) + mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) + if err != nil { + logrus.Errorf("could not resolve peer %q: %v", ip, err) + continue } + n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) } } } diff --git a/drivers/overlay/overlay_test.go b/drivers/overlay/overlay_test.go index 75c89da6bb..7e6fc169d7 100644 --- a/drivers/overlay/overlay_test.go +++ b/drivers/overlay/overlay_test.go @@ -2,7 +2,9 @@ package overlay import ( "context" + "fmt" "net" + "os" "syscall" "testing" "time" @@ -158,7 +160,7 @@ func TestNetlinkSocket(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() go func() { - n.watchMiss(nlSock) + n.watchMiss(nlSock, fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), syscall.Gettid())) ch <- nil }() time.Sleep(5 * time.Second)