diff --git a/components/engine/integration-cli/docker_cli_swarm_test.go b/components/engine/integration-cli/docker_cli_swarm_test.go index 68addbb2157..3f32f37447a 100644 --- a/components/engine/integration-cli/docker_cli_swarm_test.go +++ b/components/engine/integration-cli/docker_cli_swarm_test.go @@ -1545,7 +1545,8 @@ func (s *DockerSwarmSuite) TestSwarmNetworkIPAMOptions(c *check.C) { out, err = d.Cmd("network", "inspect", "--format", "{{.IPAM.Options}}", "foo") c.Assert(err, checker.IsNil, check.Commentf(out)) - c.Assert(strings.TrimSpace(out), checker.Equals, "map[foo:bar]") + c.Assert(strings.TrimSpace(out), checker.Contains, "foo:bar") + c.Assert(strings.TrimSpace(out), checker.Contains, "com.docker.network.ipam.serial:true") out, err = d.Cmd("service", "create", "--detach", "--no-resolve-image", "--network=foo", "--name", "top", "busybox", "top") c.Assert(err, checker.IsNil, check.Commentf(out)) @@ -1555,7 +1556,8 @@ func (s *DockerSwarmSuite) TestSwarmNetworkIPAMOptions(c *check.C) { out, err = d.Cmd("network", "inspect", "--format", "{{.IPAM.Options}}", "foo") c.Assert(err, checker.IsNil, check.Commentf(out)) - c.Assert(strings.TrimSpace(out), checker.Equals, "map[foo:bar]") + c.Assert(strings.TrimSpace(out), checker.Contains, "foo:bar") + c.Assert(strings.TrimSpace(out), checker.Contains, "com.docker.network.ipam.serial:true") } func (s *DockerTrustedSwarmSuite) TestTrustedServiceCreate(c *check.C) { diff --git a/components/engine/vendor.conf b/components/engine/vendor.conf index 2cd06e717ed..22004d2916d 100644 --- a/components/engine/vendor.conf +++ b/components/engine/vendor.conf @@ -31,7 +31,7 @@ github.com/moby/buildkit aaff9d591ef128560018433fe61beb802e149de8 github.com/tonistiigi/fsutil 1dedf6e90084bd88c4c518a15e68a37ed1370203 #get libnetwork packages -github.com/docker/libnetwork 0f08d31bf0e640e0cdc6d5161227f87602d605c5 +github.com/docker/libnetwork 6c512920fef411945513d04a50740ebbf13f2fd8 github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9 github.com/armon/go-radix e39d623f12e8e41c7b5529e9a9dd67a1e2261f80 github.com/armon/go-metrics eb0af217e5e9747e41dd5303755356b62d28e3ec @@ -110,7 +110,7 @@ github.com/tonistiigi/fifo 1405643975692217d6720f8b54aeee1bf2cd5cf4 github.com/stevvooe/continuity cd7a8e21e2b6f84799f5dd4b65faf49c8d3ee02d # cluster -github.com/docker/swarmkit 941a01844b89c56aa61086fecb167ab3af1de22b +github.com/docker/swarmkit 872861d2ae46958af7ead1d5fffb092c73afbaf0 github.com/gogo/protobuf v0.4 github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a github.com/google/certificate-transparency d90e65c3a07988180c5b1ece71791c0b6506826e diff --git a/components/engine/vendor/github.com/docker/libnetwork/agent.go b/components/engine/vendor/github.com/docker/libnetwork/agent.go index b0d66476e6e..f12006583b5 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/agent.go +++ b/components/engine/vendor/github.com/docker/libnetwork/agent.go @@ -6,11 +6,9 @@ import ( "encoding/json" "fmt" "net" - "os" "sort" "sync" - "github.com/docker/docker/pkg/stringid" "github.com/docker/go-events" "github.com/docker/libnetwork/cluster" "github.com/docker/libnetwork/datastore" @@ -282,12 +280,8 @@ func (c *controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, d } keys, _ := c.getKeys(subsysGossip) - hostname, _ := os.Hostname() - nodeName := hostname + "-" + stringid.TruncateID(stringid.GenerateRandomID()) - logrus.Info("Gossip cluster hostname ", nodeName) netDBConf := networkdb.DefaultConfig() - netDBConf.NodeName = nodeName netDBConf.BindAddr = listenAddr netDBConf.AdvertiseAddr = advertiseAddr netDBConf.Keys = keys diff --git a/components/engine/vendor/github.com/docker/libnetwork/bitseq/sequence.go b/components/engine/vendor/github.com/docker/libnetwork/bitseq/sequence.go index 3946473d8b5..a1a9810dc5b 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/bitseq/sequence.go +++ b/components/engine/vendor/github.com/docker/libnetwork/bitseq/sequence.go @@ -41,6 +41,7 @@ type Handle struct { id string dbIndex uint64 dbExists bool + curr uint64 store datastore.DataStore sync.Mutex } @@ -193,26 +194,27 @@ func (h *Handle) getCopy() *Handle { dbIndex: h.dbIndex, dbExists: h.dbExists, store: h.store, + curr: h.curr, } } // SetAnyInRange atomically sets the first unset bit in the specified range in the sequence and returns the corresponding ordinal -func (h *Handle) SetAnyInRange(start, end uint64) (uint64, error) { +func (h *Handle) SetAnyInRange(start, end uint64, serial bool) (uint64, error) { if end < start || end >= h.bits { return invalidPos, fmt.Errorf("invalid bit range [%d, %d]", start, end) } if h.Unselected() == 0 { return invalidPos, ErrNoBitAvailable } - return h.set(0, start, end, true, false) + return h.set(0, start, end, true, false, serial) } // SetAny atomically sets the first unset bit in the sequence and returns the corresponding ordinal -func (h *Handle) SetAny() (uint64, error) { +func (h *Handle) SetAny(serial bool) (uint64, error) { if h.Unselected() == 0 { return invalidPos, ErrNoBitAvailable } - return h.set(0, 0, h.bits-1, true, false) + return h.set(0, 0, h.bits-1, true, false, serial) } // Set atomically sets the corresponding bit in the sequence @@ -220,7 +222,7 @@ func (h *Handle) Set(ordinal uint64) error { if err := h.validateOrdinal(ordinal); err != nil { return err } - _, err := h.set(ordinal, 0, 0, false, false) + _, err := h.set(ordinal, 0, 0, false, false, false) return err } @@ -229,7 +231,7 @@ func (h *Handle) Unset(ordinal uint64) error { if err := h.validateOrdinal(ordinal); err != nil { return err } - _, err := h.set(ordinal, 0, 0, false, true) + _, err := h.set(ordinal, 0, 0, false, true, false) return err } @@ -298,7 +300,7 @@ func (h *Handle) CheckConsistency() error { } // set/reset the bit -func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64, error) { +func (h *Handle) set(ordinal, start, end uint64, any bool, release bool, serial bool) (uint64, error) { var ( bitPos uint64 bytePos uint64 @@ -308,6 +310,7 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64 for { var store datastore.DataStore + curr := uint64(0) h.Lock() store = h.store h.Unlock() @@ -318,15 +321,18 @@ func (h *Handle) set(ordinal, start, end uint64, any bool, release bool) (uint64 } h.Lock() + if serial { + curr = h.curr + } // Get position if available if release { bytePos, bitPos = ordinalToPos(ordinal) } else { if any { - bytePos, bitPos, err = getFirstAvailable(h.head, start) + bytePos, bitPos, err = getAvailableFromCurrent(h.head, start, curr, end) ret = posToOrdinal(bytePos, bitPos) - if end < ret { - err = ErrNoBitAvailable + if err == nil { + h.curr = ret + 1 } } else { bytePos, bitPos, err = checkIfAvailable(h.head, ordinal) @@ -515,6 +521,29 @@ func getFirstAvailable(head *sequence, start uint64) (uint64, uint64, error) { return invalidPos, invalidPos, ErrNoBitAvailable } +// getAvailableFromCurrent will look for available ordinal from the current ordinal. +// If none found then it will loop back to the start to check of the available bit. +// This can be further optimized to check from start till curr in case of a rollover +func getAvailableFromCurrent(head *sequence, start, curr, end uint64) (uint64, uint64, error) { + var bytePos, bitPos uint64 + if curr != 0 && curr > start { + bytePos, bitPos, _ = getFirstAvailable(head, curr) + ret := posToOrdinal(bytePos, bitPos) + if end < ret { + goto begin + } + return bytePos, bitPos, nil + } + +begin: + bytePos, bitPos, _ = getFirstAvailable(head, start) + ret := posToOrdinal(bytePos, bitPos) + if end < ret { + return invalidPos, invalidPos, ErrNoBitAvailable + } + return bytePos, bitPos, nil +} + // checkIfAvailable checks if the bit correspondent to the specified ordinal is unset // If the ordinal is beyond the sequence limits, a negative response is returned func checkIfAvailable(head *sequence, ordinal uint64) (uint64, uint64, error) { diff --git a/components/engine/vendor/github.com/docker/libnetwork/bitseq/store.go b/components/engine/vendor/github.com/docker/libnetwork/bitseq/store.go index 5448927eb1f..cdb7f04264e 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/bitseq/store.go +++ b/components/engine/vendor/github.com/docker/libnetwork/bitseq/store.go @@ -87,6 +87,7 @@ func (h *Handle) CopyTo(o datastore.KVObject) error { dstH.dbIndex = h.dbIndex dstH.dbExists = h.dbExists dstH.store = h.store + dstH.curr = h.curr dstH.Unlock() return nil diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go index f12d7a8c671..802d7bc36d5 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/encryption.go @@ -21,7 +21,6 @@ import ( const ( r = 0xD0C4E3 - timeout = 30 pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8) ) diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/joinleave.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/joinleave.go index a07838b7603..0770513e7d5 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/joinleave.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/joinleave.go @@ -68,7 +68,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, ep.ifName = containerIfName - if err := d.writeEndpointToStore(ep); err != nil { + if err = d.writeEndpointToStore(ep); err != nil { return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err) } @@ -86,7 +86,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, return err } - if err := sbox.AddInterface(overlayIfName, "veth", + if err = sbox.AddInterface(overlayIfName, "veth", sbox.InterfaceOptions().Master(s.brName)); err != nil { return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err) } @@ -100,7 +100,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, return err } - if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil { + if err = nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil { return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err) } @@ -108,7 +108,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, if sub == s { continue } - if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil { + if err = jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil { logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id) } } @@ -122,7 +122,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true) - if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil { + if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil { logrus.Warn(err) } @@ -200,7 +200,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri } if etype == driverapi.Delete { - d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep) + d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, false) return } @@ -232,11 +232,9 @@ func (d *driver) Leave(nid, eid string) error { } } - n.leaveSandbox() + d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true) - if err := d.checkEncryption(nid, nil, 0, true, false); err != nil { - logrus.Warn(err) - } + n.leaveSandbox() return nil } diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_network.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_network.go index 0e9ca77866f..3fbfccf007f 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_network.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_network.go @@ -119,7 +119,7 @@ func setDefaultVlan() { data := []byte{'0', '\n'} if err = ioutil.WriteFile(path, data, 0644); err != nil { - logrus.Errorf("endbling default vlan on bridge %s failed %v", brName, err) + logrus.Errorf("enabling default vlan on bridge %s failed %v", brName, err) os.Exit(1) } os.Exit(0) @@ -251,8 +251,9 @@ func (d *driver) DeleteNetwork(nid string) error { if err := d.deleteEndpointFromStore(ep); err != nil { logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err) } - } + // flush the peerDB entries + d.peerFlush(nid) d.deleteNetwork(nid) vnis, err := n.releaseVxlanID() @@ -505,11 +506,7 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro vxlanIfaceOption := make([]osl.IfaceOption, 1) vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName)) Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption - err = sbox.Restore(Ifaces, nil, nil, nil) - if err != nil { - return err - } - return nil + return sbox.Restore(Ifaces, nil, nil, nil) } func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error { @@ -760,58 +757,38 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) - if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 { continue } if n.driver.isSerfAlive() { + logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac) mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) if err != nil { logrus.Errorf("could not resolve peer %q: %v", ip, err) continue } n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false) - } else { - // If the gc_thresh values are lower kernel might knock off the neighor entries. - // When we get a L3 miss check if its a valid peer and reprogram the neighbor - // entry again. Rate limit it to once attempt every 500ms, just in case a faulty - // container sends a flood of packets to invalid peers - if !l3Miss { - continue - } - if time.Since(t) > 500*time.Millisecond { + } else if l3Miss && time.Since(t) > time.Second { + // All the local peers will trigger a miss notification but this one is expected and the local container will reply + // autonomously to the ARP request + // In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following + // extra logs that will inform of the possible issue. + // Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html: + // Entries which are marked as permanent are never deleted by the garbage-collector. + // The time limit here is to guarantee that the dbSearch is not + // done too frequently causing a stall of the peerDB operations. + pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip) + if err == nil && !pEntry.isLocal { t = time.Now() - n.programNeighbor(ip) + logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v", + neigh, l3Miss, l2Miss, *pKey, *pEntry, err) } } } } } -func (n *network) programNeighbor(ip net.IP) { - peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip) - if err != nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip) - return - } - s := n.getSubnetforIPAddr(ip) - if s == nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip) - return - } - sbox := n.sandbox() - if sbox == nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip) - return - } - if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { - logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err) - return - } -} - func (d *driver) addNetwork(n *network) { d.Lock() d.networks[n.id] = n @@ -1058,7 +1035,7 @@ func (n *network) obtainVxlanID(s *subnet) error { } if s.vni == 0 { - vxlanID, err := n.driver.vxlanIdm.GetID() + vxlanID, err := n.driver.vxlanIdm.GetID(true) if err != nil { return fmt.Errorf("failed to allocate vxlan id: %v", err) } @@ -1090,15 +1067,6 @@ func (n *network) contains(ip net.IP) bool { return false } -func (n *network) getSubnetforIPAddr(ip net.IP) *subnet { - for _, s := range n.subnets { - if s.subnetIP.Contains(ip) { - return s - } - } - return nil -} - // getSubnetforIP returns the subnet to which the given IP belongs func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { for _, s := range n.subnets { diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_serf.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_serf.go index 6e034ada46b..f644799afd1 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_serf.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ov_serf.go @@ -122,7 +122,7 @@ func (d *driver) processEvent(u serf.UserEvent) { case "join": d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false, false, false) case "leave": - d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr)) + d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false) } } @@ -135,13 +135,13 @@ func (d *driver) processQuery(q *serf.Query) { fmt.Printf("Failed to scan query payload string: %v\n", err) } - peerMac, peerIPMask, vtep, err := d.peerDbSearch(nid, net.ParseIP(ipStr)) + pKey, pEntry, err := d.peerDbSearch(nid, net.ParseIP(ipStr)) if err != nil { return } - logrus.Debugf("Sending peer query resp mac %s, mask %s, vtep %s", peerMac, net.IP(peerIPMask), vtep) - q.Respond([]byte(fmt.Sprintf("%s %s %s", peerMac.String(), net.IP(peerIPMask).String(), vtep.String()))) + logrus.Debugf("Sending peer query resp mac %v, mask %s, vtep %s", pKey.peerMac, net.IP(pEntry.peerIPMask).String(), pEntry.vtep) + q.Respond([]byte(fmt.Sprintf("%s %s %s", pKey.peerMac.String(), net.IP(pEntry.peerIPMask).String(), pEntry.vtep.String()))) } func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.go index 2bae0823e1b..f029c5cce47 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.go @@ -262,7 +262,7 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) { d.Unlock() // If containers are already running on this network update the - // advertiseaddress in the peerDB + // advertise address in the peerDB d.localJoinOnce.Do(func() { d.peerDBUpdateSelf() }) diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.proto b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.proto index 45b8c9de7e8..3133386e036 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.proto +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/overlay.proto @@ -24,4 +24,4 @@ message PeerRecord { // which this container is running and can be reached by // building a tunnel to that host IP. string tunnel_endpoint_ip = 3 [(gogoproto.customname) = "TunnelEndpointIP"]; -} \ No newline at end of file +} diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ovmanager/ovmanager.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ovmanager/ovmanager.go index a80f3358924..58cc687d4fe 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ovmanager/ovmanager.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/ovmanager/ovmanager.go @@ -165,7 +165,7 @@ func (n *network) obtainVxlanID(s *subnet) error { n.Unlock() if vni == 0 { - vni, err = n.driver.vxlanIdm.GetIDInRange(vxlanIDStart, vxlanIDEnd) + vni, err = n.driver.vxlanIdm.GetIDInRange(vxlanIDStart, vxlanIDEnd, true) if err != nil { return err } diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/peerdb.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/peerdb.go index f953e3c872b..bdd3cb12af9 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/peerdb.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/overlay/peerdb.go @@ -8,6 +8,7 @@ import ( "syscall" "github.com/docker/libnetwork/common" + "github.com/docker/libnetwork/osl" "github.com/sirupsen/logrus" ) @@ -22,16 +23,48 @@ type peerEntry struct { eid string vtep net.IP peerIPMask net.IPMask - inSandbox bool isLocal bool } +func (p *peerEntry) MarshalDB() peerEntryDB { + ones, bits := p.peerIPMask.Size() + return peerEntryDB{ + eid: p.eid, + vtep: p.vtep.String(), + peerIPMaskOnes: ones, + peerIPMaskBits: bits, + isLocal: p.isLocal, + } +} + +// This the structure saved into the set (SetMatrix), due to the implementation of it +// the value inserted in the set has to be Hashable so the []byte had to be converted into +// strings +type peerEntryDB struct { + eid string + vtep string + peerIPMaskOnes int + peerIPMaskBits int + isLocal bool +} + +func (p *peerEntryDB) UnMarshalDB() peerEntry { + return peerEntry{ + eid: p.eid, + vtep: net.ParseIP(p.vtep), + peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), + isLocal: p.isLocal, + } +} + type peerMap struct { - mp map[string]peerEntry + // set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks + mp common.SetMatrix sync.Mutex } type peerNetworkMap struct { + // map with key peerKey mp map[string]*peerMap sync.Mutex } @@ -54,11 +87,7 @@ func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { } pKey.peerMac, err = net.ParseMAC(string(macB)) - if err != nil { - return err - } - - return nil + return err } func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { @@ -87,10 +116,13 @@ func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool } mp := map[string]peerEntry{} - pMap.Lock() - for pKeyStr, pEntry := range pMap.mp { - mp[pKeyStr] = pEntry + for _, pKeyStr := range pMap.mp.Keys() { + entryDBList, ok := pMap.mp.Get(pKeyStr) + if ok { + peerEntryDB := entryDBList[0].(peerEntryDB) + mp[pKeyStr] = peerEntryDB.UnMarshalDB() + } } pMap.Unlock() @@ -107,45 +139,38 @@ func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool return nil } -func (d *driver) peerDbSearch(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) { - var ( - peerMac net.HardwareAddr - vtep net.IP - peerIPMask net.IPMask - found bool - ) - +func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { + var pKeyMatched *peerKey + var pEntryMatched *peerEntry err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { if pKey.peerIP.Equal(peerIP) { - peerMac = pKey.peerMac - peerIPMask = pEntry.peerIPMask - vtep = pEntry.vtep - found = true - return found + pKeyMatched = pKey + pEntryMatched = pEntry + return true } - return found + return false }) if err != nil { - return nil, nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) + return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) } - if !found { - return nil, nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) + if pKeyMatched == nil || pEntryMatched == nil { + return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) } - return peerMac, peerIPMask, vtep, nil + return pKeyMatched, pEntryMatched, nil } func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP, isLocal bool) { + peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { d.peerDb.mp[nid] = &peerMap{ - mp: make(map[string]peerEntry), + mp: common.NewSetMatrix(), } pMap = d.peerDb.mp[nid] @@ -165,18 +190,24 @@ func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask } pMap.Lock() - pMap.mp[pKey.String()] = pEntry - pMap.Unlock() + defer pMap.Unlock() + b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) + if i != 1 { + // Transient case, there is more than one endpoint that is using the same IP,MAC pair + s, _ := pMap.mp.String(pKey.String()) + logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) + } + return b, i } func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) peerEntry { + peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { d.peerDb.Unlock() - return peerEntry{} + return false, 0 } d.peerDb.Unlock() @@ -185,22 +216,22 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPM peerMac: peerMac, } - pMap.Lock() - - pEntry, ok := pMap.mp[pKey.String()] - if ok { - // Mismatched endpoint ID(possibly outdated). Do not - // delete peerdb - if pEntry.eid != eid { - pMap.Unlock() - return pEntry - } + pEntry := peerEntry{ + eid: eid, + vtep: vtep, + peerIPMask: peerIPMask, + isLocal: isLocal, } - delete(pMap.mp, pKey.String()) - pMap.Unlock() - - return pEntry + pMap.Lock() + defer pMap.Unlock() + b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) + if i != 0 { + // Transient case, there is more than one endpoint that is using the same IP,MAC pair + s, _ := pMap.mp.String(pKey.String()) + logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) + } + return b, i } // The overlay uses a lazy initialization approach, this means that when a network is created @@ -224,6 +255,7 @@ const ( peerOperationINIT peerOperationType = iota peerOperationADD peerOperationDELETE + peerOperationFLUSH ) type peerOperation struct { @@ -253,7 +285,9 @@ func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { case peerOperationADD: err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer) case peerOperationDELETE: - err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP) + err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) + case peerOperationFLUSH: + err = d.peerFlushOp(op.networkID) } if err != nil { logrus.Warnf("Peer operation failed:%s op:%v", err, op) @@ -286,7 +320,6 @@ func (d *driver) peerInitOp(nid string) error { func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { - callerName := common.CallerName(1) d.peerOpCh <- &peerOperation{ opType: peerOperationADD, networkID: nid, @@ -298,24 +331,32 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, l2Miss: l2Miss, l3Miss: l3Miss, localPeer: localPeer, - callerName: callerName, + callerName: common.CallerName(1), } } func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, updateOnlyDB bool) error { + peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { if err := validateID(nid, eid); err != nil { return err } + var dbEntries int + var inserted bool if updateDB { - d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, false) - if updateOnlyDB { - return nil + inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) + if !inserted { + logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", + nid, eid, peerIP, peerMac, localPeer, vtep) } } + // Local peers do not need any further configuration + if localPeer { + return nil + } + n := d.network(nid) if n == nil { return nil @@ -353,21 +394,26 @@ func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask // Add neighbor entry for the peer IP if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { - return fmt.Errorf("could not add neighbor entry into the sandbox: %v", err) + if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { + // We are in the transient case so only the first configuration is programmed into the kernel + // Upon deletion if the active configuration is deleted the next one from the database will be restored + // Note we are skipping also the next configuration + return nil + } + return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) } // Add fdb entry to the bridge for the peer mac if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName), sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil { - return fmt.Errorf("could not add fdb entry into the sandbox: %v", err) + return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) } return nil } func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) { - callerName := common.CallerName(1) + peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { d.peerOpCh <- &peerOperation{ opType: peerOperationDELETE, networkID: nid, @@ -376,18 +422,23 @@ func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMas peerIPMask: peerIPMask, peerMac: peerMac, vtepIP: vtep, - callerName: callerName, + callerName: common.CallerName(1), + localPeer: localPeer, } } func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP) error { + peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { if err := validateID(nid, eid); err != nil { return err } - pEntry := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep) + deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) + if !deleted { + logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", + nid, eid, peerIP, peerMac, localPeer, vtep) + } n := d.network(nid) if n == nil { @@ -399,30 +450,59 @@ func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPM return nil } - // Delete fdb entry to the bridge for the peer mac only if the - // entry existed in local peerdb. If it is a stale delete - // request, still call DeleteNeighbor but only to cleanup any - // leftover sandbox neighbor cache and not actually delete the - // kernel state. - if (eid == pEntry.eid && vtep.Equal(pEntry.vtep)) || - (eid != pEntry.eid && !vtep.Equal(pEntry.vtep)) { - if err := sbox.DeleteNeighbor(vtep, peerMac, - eid == pEntry.eid && vtep.Equal(pEntry.vtep)); err != nil { - return fmt.Errorf("could not delete fdb entry into the sandbox: %v", err) - } + if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil { + logrus.Warn(err) } - // Delete neighbor entry for the peer IP - if eid == pEntry.eid { + // Local peers do not have any local configuration to delete + if !localPeer { + // Remove fdb entry to the bridge for the peer mac + if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil { + if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { + // We fall in here if there is a transient state and if the neighbor that is being deleted + // was never been configured into the kernel (we allow only 1 configuration at the time per mapping) + return nil + } + return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) + } + + // Delete neighbor entry for the peer IP if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { - return fmt.Errorf("could not delete neighbor entry into the sandbox: %v", err) + return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) } } - if err := d.checkEncryption(nid, vtep, 0, false, false); err != nil { - logrus.Warn(err) + if dbEntries == 0 { + return nil + } + + // If there is still an entry into the database and the deletion went through without errors means that there is now no + // configuration active in the kernel. + // Restore one configuration for the directly from the database, note that is guaranteed that there is one + peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) + if err != nil { + logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) + return err } + return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) +} +func (d *driver) peerFlush(nid string) { + d.peerOpCh <- &peerOperation{ + opType: peerOperationFLUSH, + networkID: nid, + callerName: common.CallerName(1), + } +} + +func (d *driver) peerFlushOp(nid string) error { + d.peerDb.Lock() + defer d.peerDb.Unlock() + _, ok := d.peerDb.mp[nid] + if !ok { + return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) + } + delete(d.peerDb.mp, nid) return nil } diff --git a/components/engine/vendor/github.com/docker/libnetwork/drivers/solaris/overlay/ov_network.go b/components/engine/vendor/github.com/docker/libnetwork/drivers/solaris/overlay/ov_network.go index 4e3f9ae76f0..039dbd58b66 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/drivers/solaris/overlay/ov_network.go +++ b/components/engine/vendor/github.com/docker/libnetwork/drivers/solaris/overlay/ov_network.go @@ -718,7 +718,7 @@ func (n *network) obtainVxlanID(s *subnet) error { } if s.vni == 0 { - vxlanID, err := n.driver.vxlanIdm.GetID() + vxlanID, err := n.driver.vxlanIdm.GetID(true) if err != nil { return fmt.Errorf("failed to allocate vxlan id: %v", err) } diff --git a/components/engine/vendor/github.com/docker/libnetwork/idm/idm.go b/components/engine/vendor/github.com/docker/libnetwork/idm/idm.go index 7e449a0dc89..d5843d4a580 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/idm/idm.go +++ b/components/engine/vendor/github.com/docker/libnetwork/idm/idm.go @@ -34,11 +34,11 @@ func New(ds datastore.DataStore, id string, start, end uint64) (*Idm, error) { } // GetID returns the first available id in the set -func (i *Idm) GetID() (uint64, error) { +func (i *Idm) GetID(serial bool) (uint64, error) { if i.handle == nil { return 0, errors.New("ID set is not initialized") } - ordinal, err := i.handle.SetAny() + ordinal, err := i.handle.SetAny(serial) return i.start + ordinal, err } @@ -56,7 +56,7 @@ func (i *Idm) GetSpecificID(id uint64) error { } // GetIDInRange returns the first available id in the set within a [start,end] range -func (i *Idm) GetIDInRange(start, end uint64) (uint64, error) { +func (i *Idm) GetIDInRange(start, end uint64, serial bool) (uint64, error) { if i.handle == nil { return 0, errors.New("ID set is not initialized") } @@ -65,7 +65,7 @@ func (i *Idm) GetIDInRange(start, end uint64) (uint64, error) { return 0, errors.New("Requested range does not belong to the set") } - ordinal, err := i.handle.SetAnyInRange(start-i.start, end-i.start) + ordinal, err := i.handle.SetAnyInRange(start-i.start, end-i.start, serial) return i.start + ordinal, err } diff --git a/components/engine/vendor/github.com/docker/libnetwork/ipam/allocator.go b/components/engine/vendor/github.com/docker/libnetwork/ipam/allocator.go index c4ed9a0e329..5beb429dfc5 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/ipam/allocator.go +++ b/components/engine/vendor/github.com/docker/libnetwork/ipam/allocator.go @@ -457,7 +457,15 @@ func (a *Allocator) RequestAddress(poolID string, prefAddress net.IP, opts map[s return nil, nil, types.InternalErrorf("could not find bitmask in datastore for %s on address %v request from pool %s: %v", k.String(), prefAddress, poolID, err) } - ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range) + // In order to request for a serial ip address allocation, callers can pass in the option to request + // IP allocation serially or first available IP in the subnet + var serial bool + if opts != nil { + if val, ok := opts[ipamapi.AllocSerialPrefix]; ok { + serial = (val == "true") + } + } + ip, err := a.getAddress(p.Pool, bm, prefAddress, p.Range, serial) if err != nil { return nil, nil, err } @@ -522,7 +530,7 @@ func (a *Allocator) ReleaseAddress(poolID string, address net.IP) error { return bm.Unset(ipToUint64(h)) } -func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddress net.IP, ipr *AddressRange) (net.IP, error) { +func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddress net.IP, ipr *AddressRange, serial bool) (net.IP, error) { var ( ordinal uint64 err error @@ -535,7 +543,7 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres return nil, ipamapi.ErrNoAvailableIPs } if ipr == nil && prefAddress == nil { - ordinal, err = bitmask.SetAny() + ordinal, err = bitmask.SetAny(serial) } else if prefAddress != nil { hostPart, e := types.GetHostPartIP(prefAddress, base.Mask) if e != nil { @@ -544,7 +552,7 @@ func (a *Allocator) getAddress(nw *net.IPNet, bitmask *bitseq.Handle, prefAddres ordinal = ipToUint64(types.GetMinimalIP(hostPart)) err = bitmask.Set(ordinal) } else { - ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End) + ordinal, err = bitmask.SetAnyInRange(ipr.Start, ipr.End, serial) } switch err { diff --git a/components/engine/vendor/github.com/docker/libnetwork/ipamapi/labels.go b/components/engine/vendor/github.com/docker/libnetwork/ipamapi/labels.go new file mode 100644 index 00000000000..e5c7d1cc7ef --- /dev/null +++ b/components/engine/vendor/github.com/docker/libnetwork/ipamapi/labels.go @@ -0,0 +1,10 @@ +package ipamapi + +const ( + // Prefix constant marks the reserved label space for libnetwork + Prefix = "com.docker.network" + + // AllocSerialPrefix constant marks the reserved label space for libnetwork ipam + // allocation ordering.(serial/first available) + AllocSerialPrefix = Prefix + ".ipam.serial" +) diff --git a/components/engine/vendor/github.com/docker/libnetwork/iptables/iptables.go b/components/engine/vendor/github.com/docker/libnetwork/iptables/iptables.go index 3e120059def..5518fcb2782 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/iptables/iptables.go +++ b/components/engine/vendor/github.com/docker/libnetwork/iptables/iptables.go @@ -456,7 +456,7 @@ func RawCombinedOutputNative(args ...string) error { // ExistChain checks if a chain exists func ExistChain(chain string, table Table) bool { - if _, err := Raw("-t", string(table), "-L", chain); err == nil { + if _, err := Raw("-t", string(table), "-nL", chain); err == nil { return true } return false diff --git a/components/engine/vendor/github.com/docker/libnetwork/networkdb/broadcast.go b/components/engine/vendor/github.com/docker/libnetwork/networkdb/broadcast.go index 8317ed03f6c..174023b22b8 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/networkdb/broadcast.go +++ b/components/engine/vendor/github.com/docker/libnetwork/networkdb/broadcast.go @@ -32,7 +32,7 @@ func (nDB *NetworkDB) sendNetworkEvent(nid string, event NetworkEvent_Type, ltim nEvent := NetworkEvent{ Type: event, LTime: ltime, - NodeName: nDB.config.NodeName, + NodeName: nDB.config.NodeID, NetworkID: nid, } @@ -44,7 +44,7 @@ func (nDB *NetworkDB) sendNetworkEvent(nid string, event NetworkEvent_Type, ltim nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{ msg: raw, id: nid, - node: nDB.config.NodeName, + node: nDB.config.NodeID, }) return nil } @@ -72,7 +72,7 @@ func (nDB *NetworkDB) sendNodeEvent(event NodeEvent_Type) error { nEvent := NodeEvent{ Type: event, LTime: nDB.networkClock.Increment(), - NodeName: nDB.config.NodeName, + NodeName: nDB.config.NodeID, } raw, err := encodeMessage(MessageTypeNodeEvent, &nEvent) @@ -129,7 +129,7 @@ func (nDB *NetworkDB) sendTableEvent(event TableEvent_Type, nid string, tname st tEvent := TableEvent{ Type: event, LTime: entry.ltime, - NodeName: nDB.config.NodeName, + NodeName: nDB.config.NodeID, NetworkID: nid, TableName: tname, Key: key, @@ -145,7 +145,7 @@ func (nDB *NetworkDB) sendTableEvent(event TableEvent_Type, nid string, tname st var broadcastQ *memberlist.TransmitLimitedQueue nDB.RLock() - thisNodeNetworks, ok := nDB.networks[nDB.config.NodeName] + thisNodeNetworks, ok := nDB.networks[nDB.config.NodeID] if ok { // The network may have been removed network, networkOk := thisNodeNetworks[nid] @@ -168,7 +168,7 @@ func (nDB *NetworkDB) sendTableEvent(event TableEvent_Type, nid string, tname st id: nid, tname: tname, key: key, - node: nDB.config.NodeName, + node: nDB.config.NodeID, }) return nil } diff --git a/components/engine/vendor/github.com/docker/libnetwork/networkdb/cluster.go b/components/engine/vendor/github.com/docker/libnetwork/networkdb/cluster.go index af6f5d9f7b1..06a7aff4bdb 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/networkdb/cluster.go +++ b/components/engine/vendor/github.com/docker/libnetwork/networkdb/cluster.go @@ -106,7 +106,7 @@ func (nDB *NetworkDB) clusterInit() error { nDB.lastHealthTimestamp = nDB.lastStatsTimestamp config := memberlist.DefaultLANConfig() - config.Name = nDB.config.NodeName + config.Name = nDB.config.NodeID config.BindAddr = nDB.config.BindAddr config.AdvertiseAddr = nDB.config.AdvertiseAddr config.UDPBufferSize = nDB.config.PacketBufferSize @@ -329,7 +329,7 @@ func (nDB *NetworkDB) reapTableEntries() { var nodeNetworks []string // This is best effort, if the list of network changes will be picked up in the next cycle nDB.RLock() - for nid := range nDB.networks[nDB.config.NodeName] { + for nid := range nDB.networks[nDB.config.NodeID] { nodeNetworks = append(nodeNetworks, nid) } nDB.RUnlock() @@ -376,7 +376,7 @@ func (nDB *NetworkDB) reapTableEntries() { func (nDB *NetworkDB) gossip() { networkNodes := make(map[string][]string) nDB.RLock() - thisNodeNetworks := nDB.networks[nDB.config.NodeName] + thisNodeNetworks := nDB.networks[nDB.config.NodeID] for nid := range thisNodeNetworks { networkNodes[nid] = nDB.networkNodes[nid] @@ -388,7 +388,7 @@ func (nDB *NetworkDB) gossip() { if printHealth { healthScore := nDB.memberlist.GetHealthScore() if healthScore != 0 { - logrus.Warnf("NetworkDB stats - healthscore:%d (connectivity issues)", healthScore) + logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore) } nDB.lastHealthTimestamp = time.Now() } @@ -419,7 +419,8 @@ func (nDB *NetworkDB) gossip() { // Collect stats and print the queue info, note this code is here also to have a view of the queues empty network.qMessagesSent += len(msgs) if printStats { - logrus.Infof("NetworkDB stats - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", + logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", + nDB.config.Hostname, nDB.config.NodeID, nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) network.qMessagesSent = 0 @@ -456,7 +457,7 @@ func (nDB *NetworkDB) gossip() { func (nDB *NetworkDB) bulkSyncTables() { var networks []string nDB.RLock() - for nid, network := range nDB.networks[nDB.config.NodeName] { + for nid, network := range nDB.networks[nDB.config.NodeID] { if network.leaving { continue } @@ -522,10 +523,10 @@ func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) { var err error var networks []string for _, node := range nodes { - if node == nDB.config.NodeName { + if node == nDB.config.NodeID { continue } - logrus.Debugf("%s: Initiating bulk sync with node %v", nDB.config.NodeName, node) + logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node) networks = nDB.findCommonNetworks(node) err = nDB.bulkSyncNode(networks, node, true) // if its periodic bulksync stop after the first successful sync @@ -556,7 +557,8 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b unsolMsg = "unsolicited" } - logrus.Debugf("%s: Initiating %s bulk sync for networks %v with node %s", nDB.config.NodeName, unsolMsg, networks, node) + logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s", + nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node) nDB.RLock() mnode := nDB.nodes[node] @@ -608,7 +610,7 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b bsm := BulkSyncMessage{ LTime: nDB.tableClock.Time(), Unsolicited: unsolicited, - NodeName: nDB.config.NodeName, + NodeName: nDB.config.NodeID, Networks: networks, Payload: compound, } @@ -640,7 +642,7 @@ func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited b case <-t.C: logrus.Errorf("Bulk sync to node %s timed out", node) case <-ch: - logrus.Debugf("%s: Bulk sync to node %s took %s", nDB.config.NodeName, node, time.Since(startTime)) + logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime)) } t.Stop() } @@ -677,7 +679,7 @@ OUTER: idx := randomOffset(n) node := nodes[idx] - if node == nDB.config.NodeName { + if node == nDB.config.NodeID { continue } diff --git a/components/engine/vendor/github.com/docker/libnetwork/networkdb/delegate.go b/components/engine/vendor/github.com/docker/libnetwork/networkdb/delegate.go index 28919cf3d22..6553810064e 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/networkdb/delegate.go +++ b/components/engine/vendor/github.com/docker/libnetwork/networkdb/delegate.go @@ -2,7 +2,6 @@ package networkdb import ( "net" - "strings" "time" "github.com/gogo/protobuf/proto" @@ -58,29 +57,6 @@ func (nDB *NetworkDB) checkAndGetNode(nEvent *NodeEvent) *node { return nil } -func (nDB *NetworkDB) purgeSameNode(n *node) { - nDB.Lock() - defer nDB.Unlock() - - prefix := strings.Split(n.Name, "-")[0] - for _, nodes := range []map[string]*node{ - nDB.failedNodes, - nDB.leftNodes, - nDB.nodes, - } { - var nodeNames []string - for name, node := range nodes { - if strings.HasPrefix(name, prefix) && n.Addr.Equal(node.Addr) { - nodeNames = append(nodeNames, name) - } - } - - for _, name := range nodeNames { - delete(nodes, name) - } - } -} - func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { // Update our local clock if the received messages has newer // time. @@ -108,7 +84,6 @@ func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { return false } - nDB.purgeSameNode(n) n.ltime = nEvent.LTime switch nEvent.Type { @@ -140,7 +115,7 @@ func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool { nDB.Lock() defer nDB.Unlock() - if nEvent.NodeName == nDB.config.NodeName { + if nEvent.NodeName == nDB.config.NodeID { return false } @@ -203,7 +178,7 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool { // Ignore the table events for networks that are in the process of going away nDB.RLock() - networks := nDB.networks[nDB.config.NodeName] + networks := nDB.networks[nDB.config.NodeID] network, ok := networks[tEvent.NetworkID] // Check if the owner of the event is still part of the network nodes := nDB.networkNodes[tEvent.NetworkID] @@ -253,7 +228,8 @@ func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent) bool { // If it is a delete event and we did not have a state for it, don't propagate to the application // If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around // most likely the cluster is already aware of it, if not who will sync with this node will catch the state too. - return e.reapTime > reapPeriod/6 + // This also avoids that deletion of entries close to their garbage collection ends up circuling around forever + return e.reapTime > reapEntryInterval/6 } var op opType @@ -292,7 +268,7 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) { } // Ignore messages that this node generated. - if tEvent.NodeName == nDB.config.NodeName { + if tEvent.NodeName == nDB.config.NodeID { return } @@ -305,7 +281,7 @@ func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) { } nDB.RLock() - n, ok := nDB.networks[nDB.config.NodeName][tEvent.NetworkID] + n, ok := nDB.networks[nDB.config.NodeID][tEvent.NetworkID] nDB.RUnlock() // if the network is not there anymore, OR we are leaving the network OR the broadcast queue is not present @@ -424,7 +400,7 @@ func (nDB *NetworkDB) handleMessage(buf []byte, isBulkSync bool) { case MessageTypeCompound: nDB.handleCompound(data, isBulkSync) default: - logrus.Errorf("%s: unknown message type %d", nDB.config.NodeName, mType) + logrus.Errorf("%v(%v): unknown message type %d", nDB.config.Hostname, nDB.config.NodeID, mType) } } @@ -457,7 +433,7 @@ func (d *delegate) LocalState(join bool) []byte { pp := NetworkPushPull{ LTime: d.nDB.networkClock.Time(), - NodeName: d.nDB.config.NodeName, + NodeName: d.nDB.config.NodeID, } for name, nn := range d.nDB.networks { diff --git a/components/engine/vendor/github.com/docker/libnetwork/networkdb/networkdb.go b/components/engine/vendor/github.com/docker/libnetwork/networkdb/networkdb.go index afdf32e2c28..02502cb483a 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/networkdb/networkdb.go +++ b/components/engine/vendor/github.com/docker/libnetwork/networkdb/networkdb.go @@ -11,6 +11,7 @@ import ( "time" "github.com/armon/go-radix" + "github.com/docker/docker/pkg/stringid" "github.com/docker/go-events" "github.com/docker/libnetwork/types" "github.com/hashicorp/memberlist" @@ -151,8 +152,11 @@ type network struct { // Config represents the configuration of the networdb instance and // can be passed by the caller. type Config struct { - // NodeName is the cluster wide unique name for this node. - NodeName string + // NodeID is the node unique identifier of the node when is part of the cluster + NodeID string + + // Hostname is the node hostname. + Hostname string // BindAddr is the IP on which networkdb listens. It can be // 0.0.0.0 to listen on all addresses on the host. @@ -210,7 +214,8 @@ type entry struct { func DefaultConfig() *Config { hostname, _ := os.Hostname() return &Config{ - NodeName: hostname, + NodeID: stringid.TruncateID(stringid.GenerateRandomID()), + Hostname: hostname, BindAddr: "0.0.0.0", PacketBufferSize: 1400, StatsPrintPeriod: 5 * time.Minute, @@ -236,6 +241,7 @@ func New(c *Config) (*NetworkDB, error) { nDB.indexes[byTable] = radix.New() nDB.indexes[byNetwork] = radix.New() + logrus.Debugf("New memberlist node - Node:%v will use memberlist nodeID:%v", c.Hostname, c.NodeID) if err := nDB.clusterInit(); err != nil { return nil, err } @@ -259,8 +265,11 @@ func (nDB *NetworkDB) Join(members []string) error { // stopping timers, canceling goroutines etc. func (nDB *NetworkDB) Close() { if err := nDB.clusterLeave(); err != nil { - logrus.Errorf("Could not close DB %s: %v", nDB.config.NodeName, err) + logrus.Errorf("%v(%v) Could not close DB: %v", nDB.config.Hostname, nDB.config.NodeID, err) } + + //Avoid (*Broadcaster).run goroutine leak + nDB.broadcaster.Close() } // ClusterPeers returns all the gossip cluster peers. @@ -334,7 +343,7 @@ func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error { entry := &entry{ ltime: nDB.tableClock.Increment(), - node: nDB.config.NodeName, + node: nDB.config.NodeID, value: value, } @@ -360,7 +369,7 @@ func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error { entry := &entry{ ltime: nDB.tableClock.Increment(), - node: nDB.config.NodeName, + node: nDB.config.NodeID, value: value, } @@ -402,7 +411,7 @@ func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error { entry := &entry{ ltime: nDB.tableClock.Increment(), - node: nDB.config.NodeName, + node: nDB.config.NodeID, value: value, deleting: true, reapTime: reapEntryInterval, @@ -451,7 +460,7 @@ func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) { // entries owned by remote nodes, we will accept them and we notify the application func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) { // Indicates if the delete is triggered for the local node - isNodeLocal := node == nDB.config.NodeName + isNodeLocal := node == nDB.config.NodeID nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { @@ -496,7 +505,10 @@ func (nDB *NetworkDB) deleteNodeNetworkEntries(nid, node string) { nDB.deleteEntry(nid, tname, key) } - nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value)) + // Notify to the upper layer only entries not already marked for deletion + if !oldEntry.deleting { + nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value)) + } return false }) } @@ -552,10 +564,10 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error { ltime := nDB.networkClock.Increment() nDB.Lock() - nodeNetworks, ok := nDB.networks[nDB.config.NodeName] + nodeNetworks, ok := nDB.networks[nDB.config.NodeID] if !ok { nodeNetworks = make(map[string]*network) - nDB.networks[nDB.config.NodeName] = nodeNetworks + nDB.networks[nDB.config.NodeID] = nodeNetworks } n, ok := nodeNetworks[nid] var entries int @@ -571,8 +583,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error { }, RetransmitMult: 4, } - - nDB.addNetworkNode(nid, nDB.config.NodeName) + nDB.addNetworkNode(nid, nDB.config.NodeID) networkNodes := nDB.networkNodes[nid] nDB.Unlock() @@ -580,7 +591,7 @@ func (nDB *NetworkDB) JoinNetwork(nid string) error { return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) } - logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid) + logrus.Debugf("%v(%v): joined network %s", nDB.config.Hostname, nDB.config.NodeID, nid) if _, err := nDB.bulkSync(networkNodes, true); err != nil { logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err) } @@ -604,12 +615,12 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error { defer nDB.Unlock() // Remove myself from the list of the nodes participating to the network - nDB.deleteNetworkNode(nid, nDB.config.NodeName) + nDB.deleteNetworkNode(nid, nDB.config.NodeID) // Update all the local entries marking them for deletion and delete all the remote entries - nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeName) + nDB.deleteNodeNetworkEntries(nid, nDB.config.NodeID) - nodeNetworks, ok := nDB.networks[nDB.config.NodeName] + nodeNetworks, ok := nDB.networks[nDB.config.NodeID] if !ok { return fmt.Errorf("could not find self node for network %s while trying to leave", nid) } @@ -619,7 +630,7 @@ func (nDB *NetworkDB) LeaveNetwork(nid string) error { return fmt.Errorf("could not find network %s while trying to leave", nid) } - logrus.Debugf("%s: leaving network %s", nDB.config.NodeName, nid) + logrus.Debugf("%v(%v): leaving network %s", nDB.config.Hostname, nDB.config.NodeID, nid) n.ltime = ltime n.reapTime = reapNetworkInterval n.leaving = true @@ -665,7 +676,7 @@ func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string { defer nDB.RUnlock() var networks []string - for nid := range nDB.networks[nDB.config.NodeName] { + for nid := range nDB.networks[nDB.config.NodeID] { if n, ok := nDB.networks[nodeName][nid]; ok { if !n.leaving { networks = append(networks, nid) @@ -681,7 +692,7 @@ func (nDB *NetworkDB) updateLocalNetworkTime() { defer nDB.Unlock() ltime := nDB.networkClock.Increment() - for _, n := range nDB.networks[nDB.config.NodeName] { + for _, n := range nDB.networks[nDB.config.NodeID] { n.ltime = ltime } } @@ -693,7 +704,7 @@ func (nDB *NetworkDB) createOrUpdateEntry(nid, tname, key string, entry interfac _, okNetwork := nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) if !okNetwork { // Add only if it is an insert not an update - n, ok := nDB.networks[nDB.config.NodeName][nid] + n, ok := nDB.networks[nDB.config.NodeID][nid] if ok { n.entriesNumber++ } @@ -708,7 +719,7 @@ func (nDB *NetworkDB) deleteEntry(nid, tname, key string) (bool, bool) { _, okNetwork := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)) if okNetwork { // Remove only if the delete is successful - n, ok := nDB.networks[nDB.config.NodeName][nid] + n, ok := nDB.networks[nDB.config.NodeID][nid] if ok { n.entriesNumber-- } diff --git a/components/engine/vendor/github.com/docker/libnetwork/osl/neigh_linux.go b/components/engine/vendor/github.com/docker/libnetwork/osl/neigh_linux.go index 4e479489fa3..6bf1c16dc5a 100644 --- a/components/engine/vendor/github.com/docker/libnetwork/osl/neigh_linux.go +++ b/components/engine/vendor/github.com/docker/libnetwork/osl/neigh_linux.go @@ -9,6 +9,17 @@ import ( "github.com/vishvananda/netlink" ) +// NeighborSearchError indicates that the neighbor is already present +type NeighborSearchError struct { + ip net.IP + mac net.HardwareAddr + present bool +} + +func (n NeighborSearchError) Error() string { + return fmt.Sprintf("Search neighbor failed for IP %v, mac %v, present in db:%t", n.ip, n.mac, n.present) +} + // NeighOption is a function option type to set interface options type NeighOption func(nh *neigh) @@ -41,7 +52,7 @@ func (n *networkNamespace) DeleteNeighbor(dstIP net.IP, dstMac net.HardwareAddr, nh := n.findNeighbor(dstIP, dstMac) if nh == nil { - return fmt.Errorf("could not find the neighbor entry to delete") + return NeighborSearchError{dstIP, dstMac, false} } if osDelete { @@ -103,26 +114,27 @@ func (n *networkNamespace) DeleteNeighbor(dstIP net.IP, dstMac net.HardwareAddr, } } n.Unlock() - logrus.Debugf("Neighbor entry deleted for IP %v, mac %v", dstIP, dstMac) + logrus.Debugf("Neighbor entry deleted for IP %v, mac %v osDelete:%t", dstIP, dstMac, osDelete) return nil } func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, force bool, options ...NeighOption) error { var ( - iface netlink.Link - err error + iface netlink.Link + err error + neighborAlreadyPresent bool ) // If the namespace already has the neighbor entry but the AddNeighbor is called // because of a miss notification (force flag) program the kernel anyway. nh := n.findNeighbor(dstIP, dstMac) if nh != nil { + neighborAlreadyPresent = true + logrus.Warnf("Neighbor entry already present for IP %v, mac %v neighbor:%+v forceUpdate:%t", dstIP, dstMac, nh, force) if !force { - logrus.Warnf("Neighbor entry already present for IP %v, mac %v", dstIP, dstMac) - return nil + return NeighborSearchError{dstIP, dstMac, true} } - logrus.Warnf("Force kernel update, Neighbor entry already present for IP %v, mac %v", dstIP, dstMac) } nh = &neigh{ @@ -146,8 +158,7 @@ func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, fo if nh.linkDst != "" { iface, err = nlh.LinkByName(nh.linkDst) if err != nil { - return fmt.Errorf("could not find interface with destination name %s: %v", - nh.linkDst, err) + return fmt.Errorf("could not find interface with destination name %s: %v", nh.linkDst, err) } } @@ -167,13 +178,17 @@ func (n *networkNamespace) AddNeighbor(dstIP net.IP, dstMac net.HardwareAddr, fo } if err := nlh.NeighSet(nlnh); err != nil { - return fmt.Errorf("could not add neighbor entry: %v", err) + return fmt.Errorf("could not add neighbor entry:%+v error:%v", nlnh, err) + } + + if neighborAlreadyPresent { + return nil } n.Lock() n.neighbors = append(n.neighbors, nh) n.Unlock() - logrus.Debugf("Neighbor entry added for IP %v, mac %v", dstIP, dstMac) + logrus.Debugf("Neighbor entry added for IP:%v, mac:%v on ifc:%s", dstIP, dstMac, nh.linkName) return nil } diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go index ab1b6d6a474..53f9ffbeee6 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go @@ -574,6 +574,7 @@ func (na *cnmNetworkAllocator) releaseEndpoints(networks []*api.NetworkAttachmen // allocate virtual IP for a single endpoint attachment of the service. func (na *cnmNetworkAllocator) allocateVIP(vip *api.Endpoint_VirtualIP) error { + var opts map[string]string localNet := na.getNetwork(vip.NetworkID) if localNet == nil { return errors.New("networkallocator: could not find local network state") @@ -603,9 +604,13 @@ func (na *cnmNetworkAllocator) allocateVIP(vip *api.Endpoint_VirtualIP) error { return err } } + if localNet.nw.IPAM != nil && localNet.nw.IPAM.Driver != nil { + // set ipam allocation method to serial + opts = setIPAMSerialAlloc(localNet.nw.IPAM.Driver.Options) + } for _, poolID := range localNet.pools { - ip, _, err := ipam.RequestAddress(poolID, addr, nil) + ip, _, err := ipam.RequestAddress(poolID, addr, opts) if err != nil && err != ipamapi.ErrNoAvailableIPs && err != ipamapi.ErrIPOutOfRange { return errors.Wrap(err, "could not allocate VIP from IPAM") } @@ -657,6 +662,7 @@ func (na *cnmNetworkAllocator) deallocateVIP(vip *api.Endpoint_VirtualIP) error // allocate the IP addresses for a single network attachment of the task. func (na *cnmNetworkAllocator) allocateNetworkIPs(nAttach *api.NetworkAttachment) error { var ip *net.IPNet + var opts map[string]string ipam, _, _, err := na.resolveIPAM(nAttach.Network) if err != nil { @@ -686,11 +692,16 @@ func (na *cnmNetworkAllocator) allocateNetworkIPs(nAttach *api.NetworkAttachment } } } + // Set the ipam options if the network has an ipam driver. + if localNet.nw.IPAM != nil && localNet.nw.IPAM.Driver != nil { + // set ipam allocation method to serial + opts = setIPAMSerialAlloc(localNet.nw.IPAM.Driver.Options) + } for _, poolID := range localNet.pools { var err error - ip, _, err = ipam.RequestAddress(poolID, addr, nil) + ip, _, err = ipam.RequestAddress(poolID, addr, opts) if err != nil && err != ipamapi.ErrNoAvailableIPs && err != ipamapi.ErrIPOutOfRange { return errors.Wrap(err, "could not allocate IP from IPAM") } @@ -918,8 +929,16 @@ func (na *cnmNetworkAllocator) allocatePools(n *api.Network) (map[string]string, } gwIP.IP = ip } + if dOptions == nil { + dOptions = make(map[string]string) + } + dOptions[ipamapi.RequestAddressType] = netlabel.Gateway + // set ipam allocation method to serial + dOptions = setIPAMSerialAlloc(dOptions) + defer delete(dOptions, ipamapi.RequestAddressType) + if ic.Gateway != "" || gwIP == nil { - gwIP, _, err = ipam.RequestAddress(poolID, net.ParseIP(ic.Gateway), map[string]string{ipamapi.RequestAddressType: netlabel.Gateway}) + gwIP, _, err = ipam.RequestAddress(poolID, net.ParseIP(ic.Gateway), dOptions) if err != nil { // Rollback by releasing all the resources allocated so far. releasePools(ipam, ipamConfigs[:i], pools) @@ -980,3 +999,14 @@ func IsBuiltInDriver(name string) bool { } return false } + +// setIPAMSerialAlloc sets the ipam allocation method to serial +func setIPAMSerialAlloc(opts map[string]string) map[string]string { + if opts == nil { + opts = make(map[string]string) + } + if _, ok := opts[ipamapi.AllocSerialPrefix]; !ok { + opts[ipamapi.AllocSerialPrefix] = "true" + } + return opts +} diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go index b09ac47c798..19dcbec7726 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go @@ -382,7 +382,7 @@ func (ps *portSpace) allocate(p *api.PortConfig) (err error) { } // Check out an arbitrary port from dynamic port space. - swarmPort, err := ps.dynamicPortSpace.GetID() + swarmPort, err := ps.dynamicPortSpace.GetID(true) if err != nil { return } diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go b/components/engine/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go index ec2c9c0f5e7..afdf2ca4eba 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/state/raft/raft.go @@ -542,6 +542,7 @@ func (n *Node) Run(ctx context.Context) error { n.done() }() + // Flag that indicates if this manager node is *currently* the raft leader. wasLeader := false transferLeadershipLimit := rate.NewLimiter(rate.Every(time.Minute), 1) @@ -563,10 +564,13 @@ func (n *Node) Run(ctx context.Context) error { return errors.Wrap(err, "failed to save entries to storage") } + // If the memory store lock has been held for too long, + // transferring leadership is an easy way to break out of it. if wasLeader && (rd.SoftState == nil || rd.SoftState.RaftState == raft.StateLeader) && n.memoryStore.Wedged() && transferLeadershipLimit.Allow() { + log.G(ctx).Error("Attempting to transfer leadership") if !n.opts.DisableStackDump { signal.DumpStacks("") } @@ -612,6 +616,8 @@ func (n *Node) Run(ctx context.Context) error { if rd.SoftState != nil { if wasLeader && rd.SoftState.RaftState != raft.StateLeader { wasLeader = false + log.G(ctx).Error("soft state changed, node no longer a leader, resetting and cancelling all waits") + if atomic.LoadUint32(&n.signalledLeadership) == 1 { atomic.StoreUint32(&n.signalledLeadership, 0) n.leadershipBroadcast.Publish(IsFollower) @@ -630,6 +636,7 @@ func (n *Node) Run(ctx context.Context) error { // cancelAll, or by its own check of signalledLeadership. n.wait.cancelAll() } else if !wasLeader && rd.SoftState.RaftState == raft.StateLeader { + // Node just became a leader. wasLeader = true } } @@ -1478,7 +1485,7 @@ func (n *Node) registerNode(node *api.RaftMember) error { return nil } -// ProposeValue calls Propose on the raft and waits +// ProposeValue calls Propose on the underlying raft library(etcd/raft) and waits // on the commit log action before returning a result func (n *Node) ProposeValue(ctx context.Context, storeAction []api.StoreAction, cb func()) error { ctx, cancel := n.WithContext(ctx) @@ -1654,11 +1661,14 @@ func (n *Node) saveToStorage( return nil } -// processInternalRaftRequest sends a message to nodes participating -// in the raft to apply a log entry and then waits for it to be applied -// on the server. It will block until the update is performed, there is -// an error or until the raft node finalizes all the proposals on node -// shutdown. +// processInternalRaftRequest proposes a value to be appended to the raft log. +// It calls Propose() on etcd/raft, which calls back into the raft FSM, +// which then sends a message to each of the participating nodes +// in the raft group to apply a log entry and then waits for it to be applied +// on this node. It will block until the this node: +// 1. Gets the necessary replies back from the participating nodes and also performs the commit itself, or +// 2. There is an error, or +// 3. Until the raft node finalizes all the proposals on node shutdown. func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRaftRequest, cb func()) (proto.Message, error) { n.stopMu.RLock() if !n.IsMember() { @@ -1679,6 +1689,7 @@ func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRa // Do this check after calling register to avoid a race. if atomic.LoadUint32(&n.signalledLeadership) != 1 { + log.G(ctx).Error("node is no longer leader, aborting propose") n.wait.cancel(r.ID) return nil, ErrLostLeadership } @@ -1703,14 +1714,23 @@ func (n *Node) processInternalRaftRequest(ctx context.Context, r *api.InternalRa select { case x, ok := <-ch: if !ok { + // Wait notification channel was closed. This should only happen if the wait was cancelled. + log.G(ctx).Error("wait cancelled") + if atomic.LoadUint32(&n.signalledLeadership) == 1 { + log.G(ctx).Error("wait cancelled but node is still a leader") + } return nil, ErrLostLeadership } return x.(proto.Message), nil case <-waitCtx.Done(): n.wait.cancel(r.ID) - // if channel is closed, wait item was canceled, otherwise it was triggered + // If we can read from the channel, wait item was triggered. Otherwise it was cancelled. x, ok := <-ch if !ok { + log.G(ctx).WithError(waitCtx.Err()).Error("wait context cancelled") + if atomic.LoadUint32(&n.signalledLeadership) == 1 { + log.G(ctx).Error("wait context cancelled but node is still a leader") + } return nil, ErrLostLeadership } return x.(proto.Message), nil @@ -1779,21 +1799,26 @@ func (n *Node) processEntry(ctx context.Context, entry raftpb.Entry) error { } if !n.wait.trigger(r.ID, r) { + log.G(ctx).Errorf("wait not found for raft request id %x", r.ID) + // There was no wait on this ID, meaning we don't have a // transaction in progress that would be committed to the // memory store by the "trigger" call. Either a different node // wrote this to raft, or we wrote it before losing the leader - // position and cancelling the transaction. Create a new - // transaction to commit the data. + // position and cancelling the transaction. This entry still needs + // to be committed since other nodes have already committed it. + // Create a new transaction to commit this entry. // It should not be possible for processInternalRaftRequest // to be running in this situation, but out of caution we // cancel any current invocations to avoid a deadlock. + // TODO(anshul) This call is likely redundant, remove after consideration. n.wait.cancelAll() err := n.memoryStore.ApplyStoreActions(r.Action) if err != nil { log.G(ctx).WithError(err).Error("failed to apply actions from raft") + // TODO(anshul) return err here ? } } return nil diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/state/store/memory.go b/components/engine/vendor/github.com/docker/swarmkit/manager/state/store/memory.go index 62ab9279d4b..01245a6966b 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/state/store/memory.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/state/store/memory.go @@ -83,8 +83,7 @@ func register(os ObjectStoreConfig) { schema.Tables[os.Table.Name] = os.Table } -// timedMutex wraps a sync.Mutex, and keeps track of how long it has been -// locked. +// timedMutex wraps a sync.Mutex, and keeps track of when it was locked. type timedMutex struct { sync.Mutex lockedAt atomic.Value diff --git a/components/engine/vendor/github.com/docker/swarmkit/vendor.conf b/components/engine/vendor/github.com/docker/swarmkit/vendor.conf index abb1a6c2476..8949ea01fc4 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/vendor.conf +++ b/components/engine/vendor/github.com/docker/swarmkit/vendor.conf @@ -24,7 +24,7 @@ github.com/docker/go-connections 3ede32e2033de7505e6500d6c868c2b9ed9f169d github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9 github.com/docker/go-units 954fed01cc617c55d838fa2230073f2cb17386c8 github.com/docker/libkv 9fd56606e928ff1f309808f5d5a0b7a2ef73f9a8 -github.com/docker/libnetwork 19ac3ea7f52bb46e0eb10669756cdae0c441a5b1 +github.com/docker/libnetwork 21544598c53fa36a3c771a8725c643dd2340f845 github.com/docker/libtrust 9cbd2a1374f46905c68a4eb3694a130610adc62a github.com/opencontainers/runc d40db12e72a40109dfcf28539f5ee0930d2f0277 github.com/opencontainers/go-digest 21dfd564fd89c944783d00d069f33e3e7123c448