From d2e7a59b6b2b6143d621a040b1af8387ea2b4572 Mon Sep 17 00:00:00 2001 From: Santhosh Manohar Date: Thu, 13 Jul 2017 16:50:56 -0700 Subject: [PATCH 1/4] Attempt neighbor entry reprogramming on L3 miss When the gc_thresh3 value is reached kenel might remove existing neighbor entries. On an l3 miss try to reprogram the neighbor entry if the peer is valid. Its a best effort attempt because if the arp table is still at gc_thresh3 value, addition will fail. Signed-off-by: Santhosh Manohar (cherry picked from commit 48f0c2851431ae34a480a3fa3d84a5c918b133ec) --- drivers/overlay/ov_network.go | 65 +++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 6be88d9179..68d3eebefc 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -12,6 +12,7 @@ import ( "strings" "sync" "syscall" + "time" "github.com/Sirupsen/logrus" "github.com/docker/docker/pkg/reexec" @@ -705,6 +706,7 @@ func (n *network) initSandbox(restore bool) error { } func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { + t := time.Now() for { msgs, err := nlSock.Receive() if err != nil { @@ -757,23 +759,55 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { continue } - if !n.driver.isSerfAlive() { - continue - } - - mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) - if err != nil { - logrus.Errorf("could not resolve peer %q: %v", ip, err) - continue - } + if n.driver.isSerfAlive() { + mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip) + if err != nil { + logrus.Errorf("could not resolve peer %q: %v", ip, err) + continue + } - if err := n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, true, l2Miss, l3Miss); err != nil { - logrus.Errorf("could not add neighbor entry for missed peer %q: %v", ip, err) + if err := n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, true, l2Miss, l3Miss); err != nil { + logrus.Errorf("could not add neighbor entry for missed peer %q: %v", ip, err) + } + } else { + // If the gc_thresh values are lower kernel might knock off the neighor entries. + // When we get a L3 miss check if its a valid peer and reprogram the neighbor + // entry again. Rate limit it to once attempt every 500ms, just in case a faulty + // container sends a flood of packets to invalid peers + if !l3Miss { + continue + } + if time.Since(t) > 500*time.Millisecond { + t = time.Now() + n.programNeighbor(ip) + } } } } } +func (n *network) programNeighbor(ip net.IP) { + peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip) + if err != nil { + logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip) + return + } + s := n.getSubnetforIPAddr(ip) + if s == nil { + logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip) + return + } + sbox := n.sandbox() + if sbox == nil { + logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip) + return + } + if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { + logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err) + return + } +} + func (d *driver) addNetwork(n *network) { d.Lock() d.networks[n.id] = n @@ -1057,6 +1091,15 @@ func (n *network) contains(ip net.IP) bool { return false } +func (n *network) getSubnetforIPAddr(ip net.IP) *subnet { + for _, s := range n.subnets { + if s.subnetIP.Contains(ip) { + return s + } + } + return nil +} + // getSubnetforIP returns the subnet to which the given IP belongs func (n *network) getSubnetforIP(ip *net.IPNet) *subnet { for _, s := range n.subnets { From fcd07689f7689372e5175a4af341f7c44a6751fd Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Sat, 5 Aug 2017 12:04:20 -0700 Subject: [PATCH 2/4] Revert "Avoid peerUpdate logic in swarm mode" This reverts commit de61c2f25d20647fa46aa1a0a3651ebe02d94e18. Signed-off-by: Flavio Crisciani (cherry picked from commit 8d6355b5c234dfd2d06ea9f3e20b5f3e7d6812c8) --- drivers/overlay/peerdb.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/overlay/peerdb.go b/drivers/overlay/peerdb.go index 96b383ce60..6551e7bcad 100644 --- a/drivers/overlay/peerdb.go +++ b/drivers/overlay/peerdb.go @@ -207,12 +207,6 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPM } func (d *driver) peerDbUpdateSandbox(nid string) { - // The following logic is useful only in non swarm mode - // In swarm mode instead the programmig will come directly from networkDB - if !d.isSerfAlive() { - return - } - d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { From c90514a7f79922cf4965b3109f13177138b56c51 Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Thu, 27 Jul 2017 11:43:13 -0700 Subject: [PATCH 3/4] Funnel peerAdd and peerDelete in a channel Remove the need for the wait group and avoid new locks Added utility to print the method name and the caller name Signed-off-by: Flavio Crisciani (cherry picked from commit 0a9aaf80ce2d3c5a65e710f4120b806791afc33b) --- common/caller.go | 29 +++++++++++ common/caller_test.go | 49 ++++++++++++++++++ drivers/overlay/joinleave.go | 5 +- drivers/overlay/ov_network.go | 5 +- drivers/overlay/ov_serf.go | 11 ++-- drivers/overlay/overlay.go | 20 ++++++-- drivers/overlay/peerdb.go | 96 +++++++++++++++++++++++++++-------- 7 files changed, 175 insertions(+), 40 deletions(-) create mode 100644 common/caller.go create mode 100644 common/caller_test.go diff --git a/common/caller.go b/common/caller.go new file mode 100644 index 0000000000..0dec3bc0bc --- /dev/null +++ b/common/caller.go @@ -0,0 +1,29 @@ +package common + +import ( + "runtime" + "strings" +) + +func callerInfo(i int) string { + ptr, _, _, ok := runtime.Caller(i) + fName := "unknown" + if ok { + f := runtime.FuncForPC(ptr) + if f != nil { + // f.Name() is like: github.com/docker/libnetwork/common.MethodName + tmp := strings.Split(f.Name(), ".") + if len(tmp) > 0 { + fName = tmp[len(tmp)-1] + } + } + } + + return fName +} + +// CallerName returns the name of the function at the specified level +// level == 0 means current method name +func CallerName(level int) string { + return callerInfo(2 + level) +} diff --git a/common/caller_test.go b/common/caller_test.go new file mode 100644 index 0000000000..babfbb7bdb --- /dev/null +++ b/common/caller_test.go @@ -0,0 +1,49 @@ +package common + +import "testing" + +func fun1() string { + return CallerName(0) +} + +func fun2() string { + return CallerName(1) +} + +func fun3() string { + return fun4() +} + +func fun4() string { + return CallerName(0) +} + +func fun5() string { + return fun6() +} + +func fun6() string { + return CallerName(1) +} + +func TestCaller(t *testing.T) { + funName := fun1() + if funName != "fun1" { + t.Fatalf("error on fun1 caller %s", funName) + } + + funName = fun2() + if funName != "TestCaller" { + t.Fatalf("error on fun2 caller %s", funName) + } + + funName = fun3() + if funName != "fun4" { + t.Fatalf("error on fun2 caller %s", funName) + } + + funName = fun5() + if funName != "fun5" { + t.Fatalf("error on fun5 caller %s", funName) + } +} diff --git a/drivers/overlay/joinleave.go b/drivers/overlay/joinleave.go index cdbb428281..31c311f4fc 100644 --- a/drivers/overlay/joinleave.go +++ b/drivers/overlay/joinleave.go @@ -120,8 +120,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, } } - d.peerDbAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, - net.ParseIP(d.advertiseAddress), true) + d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true, false, false, true) if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil { logrus.Warn(err) @@ -205,7 +204,7 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri return } - d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true, false, false) + d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true, false, false, false) } // Leave method is invoked when a Sandbox detaches from an endpoint. diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 68d3eebefc..71b20c9ffb 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -765,10 +765,7 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) { logrus.Errorf("could not resolve peer %q: %v", ip, err) continue } - - if err := n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, true, l2Miss, l3Miss); err != nil { - logrus.Errorf("could not add neighbor entry for missed peer %q: %v", ip, err) - } + n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, true, l2Miss, l3Miss, false) } else { // If the gc_thresh values are lower kernel might knock off the neighor entries. // When we get a L3 miss check if its a valid peer and reprogram the neighbor diff --git a/drivers/overlay/ov_serf.go b/drivers/overlay/ov_serf.go index 9002bce6b7..20954ef237 100644 --- a/drivers/overlay/ov_serf.go +++ b/drivers/overlay/ov_serf.go @@ -120,15 +120,10 @@ func (d *driver) processEvent(u serf.UserEvent) { switch action { case "join": - if err := d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, - net.ParseIP(vtepStr), true, false, false); err != nil { - logrus.Errorf("Peer add failed in the driver: %v\n", err) - } + d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), + true, false, false, false) case "leave": - if err := d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, - net.ParseIP(vtepStr), true); err != nil { - logrus.Errorf("Peer delete failed in the driver: %v\n", err) - } + d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), true) } } diff --git a/drivers/overlay/overlay.go b/drivers/overlay/overlay.go index 8d19b2e1d4..11eda6781b 100644 --- a/drivers/overlay/overlay.go +++ b/drivers/overlay/overlay.go @@ -3,6 +3,7 @@ package overlay //go:generate protoc -I.:../../Godeps/_workspace/src/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork/drivers/overlay,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. overlay.proto import ( + "context" "fmt" "net" "sync" @@ -50,6 +51,8 @@ type driver struct { joinOnce sync.Once localJoinOnce sync.Once keys []*key + peerOpCh chan *peerOperation + peerOpCancel context.CancelFunc sync.Mutex } @@ -64,10 +67,16 @@ func Init(dc driverapi.DriverCallback, config map[string]interface{}) error { peerDb: peerNetworkMap{ mp: map[string]*peerMap{}, }, - secMap: &encrMap{nodes: map[string][]*spi{}}, - config: config, + secMap: &encrMap{nodes: map[string][]*spi{}}, + config: config, + peerOpCh: make(chan *peerOperation), } + // Launch the go routine for processing peer operations + ctx, cancel := context.WithCancel(context.Background()) + d.peerOpCancel = cancel + go d.peerOpRoutine(ctx, d.peerOpCh) + if data, ok := config[netlabel.GlobalKVClient]; ok { var err error dsc, ok := data.(discoverapi.DatastoreConfigData) @@ -161,7 +170,7 @@ func (d *driver) restoreEndpoints() error { } n.incEndpointCount() - d.peerDbAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true) + d.peerAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true, false, false, true) } return nil } @@ -170,6 +179,11 @@ func (d *driver) restoreEndpoints() error { func Fini(drv driverapi.Driver) { d := drv.(*driver) + // Notify the peer go routine to return + if d.peerOpCancel != nil { + d.peerOpCancel() + } + if d.exitCh != nil { waitCh := make(chan struct{}) diff --git a/drivers/overlay/peerdb.go b/drivers/overlay/peerdb.go index 6551e7bcad..47e30f1b27 100644 --- a/drivers/overlay/peerdb.go +++ b/drivers/overlay/peerdb.go @@ -1,12 +1,14 @@ package overlay import ( + "context" "fmt" "net" "sync" "syscall" "github.com/Sirupsen/logrus" + "github.com/docker/libnetwork/common" ) const ovPeerTable = "overlay_peer_table" @@ -59,8 +61,6 @@ func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { return nil } -var peerDbWg sync.WaitGroup - func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { d.peerDb.Lock() nids := []string{} @@ -141,8 +141,6 @@ func (d *driver) peerDbSearch(nid string, peerIP net.IP) (net.HardwareAddr, net. func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) { - peerDbWg.Wait() - d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] if !ok { @@ -173,7 +171,6 @@ func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP) peerEntry { - peerDbWg.Wait() d.peerDb.Lock() pMap, ok := d.peerDb.mp[nid] @@ -215,9 +212,6 @@ func (d *driver) peerDbUpdateSandbox(nid string) { } d.peerDb.Unlock() - peerDbWg.Add(1) - - var peerOps []func() pMap.Lock() for pKeyStr, pEntry := range pMap.mp { var pKey peerKey @@ -233,28 +227,67 @@ func (d *driver) peerDbUpdateSandbox(nid string) { // pointing to the same memory location for every iteration. Make // a copy of pEntry before capturing it in the following closure. entry := pEntry - op := func() { - if err := d.peerAdd(nid, entry.eid, pKey.peerIP, entry.peerIPMask, - pKey.peerMac, entry.vtep, - false, false, false); err != nil { - fmt.Printf("peerdbupdate in sandbox failed for ip %s and mac %s: %v", - pKey.peerIP, pKey.peerMac, err) - } - } - peerOps = append(peerOps, op) + d.peerAdd(nid, entry.eid, pKey.peerIP, entry.peerIPMask, pKey.peerMac, entry.vtep, false, false, false, false) } pMap.Unlock() +} - for _, op := range peerOps { - op() - } +type peerOperation struct { + isAdd bool + networkID string + endpointID string + peerIP net.IP + peerIPMask net.IPMask + peerMac net.HardwareAddr + vtepIP net.IP + updateDB bool + l2Miss bool + l3Miss bool + localPeer bool + callerName string +} - peerDbWg.Done() +func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { + var err error + for { + select { + case <-ctx.Done(): + return + case op := <-ch: + if op.isAdd { + err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.updateDB, op.l2Miss, op.l3Miss, op.localPeer) + } else { + err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) + } + if err != nil { + logrus.Warnf("Peer operation failed:%s op:%v", err, op) + } + } + } } func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, - peerMac net.HardwareAddr, vtep net.IP, updateDb, l2Miss, l3Miss bool) error { + peerMac net.HardwareAddr, vtep net.IP, updateDb, l2Miss, l3Miss, localPeer bool) { + callerName := common.CallerName(1) + d.peerOpCh <- &peerOperation{ + isAdd: true, + networkID: nid, + endpointID: eid, + peerIP: peerIP, + peerIPMask: peerIPMask, + peerMac: peerMac, + vtepIP: vtep, + updateDB: updateDb, + l2Miss: l2Miss, + l3Miss: l3Miss, + localPeer: localPeer, + callerName: callerName, + } +} + +func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, + peerMac net.HardwareAddr, vtep net.IP, updateDb, l2Miss, l3Miss, updateOnlyDB bool) error { if err := validateID(nid, eid); err != nil { return err @@ -262,6 +295,9 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, if updateDb { d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, false) + if updateOnlyDB { + return nil + } } n := d.network(nid) @@ -311,6 +347,22 @@ func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, } func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, + peerMac net.HardwareAddr, vtep net.IP, updateDb bool) { + callerName := common.CallerName(1) + d.peerOpCh <- &peerOperation{ + isAdd: false, + networkID: nid, + endpointID: eid, + peerIP: peerIP, + peerIPMask: peerIPMask, + peerMac: peerMac, + vtepIP: vtep, + updateDB: updateDb, + callerName: callerName, + } +} + +func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, updateDb bool) error { if err := validateID(nid, eid); err != nil { From 178a02e89b40fbeb87b2a55c7469eefcea32e105 Mon Sep 17 00:00:00 2001 From: Flavio Crisciani Date: Sat, 5 Aug 2017 08:42:20 -0700 Subject: [PATCH 4/4] PeerInit for the sandbox init Move the sandbox init logic into the go routine that handles peer operations. This is to avoid deadlocks in the use of the pMap.Lock for the network Signed-off-by: Flavio Crisciani (cherry picked from commit 37502aca3cdbdbc798eef6e24cc3d8707571ca3a) --- agent.go | 7 +-- drivers/overlay/ov_network.go | 4 +- drivers/overlay/peerdb.go | 85 ++++++++++++++++++++++------------- networkdb/networkdb.go | 4 +- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/agent.go b/agent.go index a46a4f34d5..8d46a00c48 100644 --- a/agent.go +++ b/agent.go @@ -738,11 +738,12 @@ func (n *network) addDriverWatches() { return } - agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte) bool { - if nid == n.ID() { + agent.networkDB.WalkTable(table.name, func(nid, key string, value []byte, deleted bool) bool { + // skip the entries that are mark for deletion, this is safe because this function is + // called at initialization time so there is no state to delete + if nid == n.ID() && !deleted { d.EventNotify(driverapi.Create, nid, table.name, key, value) } - return false }) } diff --git a/drivers/overlay/ov_network.go b/drivers/overlay/ov_network.go index 71b20c9ffb..2e3f90b1b2 100644 --- a/drivers/overlay/ov_network.go +++ b/drivers/overlay/ov_network.go @@ -683,10 +683,12 @@ func (n *network) initSandbox(restore bool) error { return fmt.Errorf("could not get network sandbox (oper %t): %v", restore, err) } + // this is needed to let the peerAdd configure the sandbox n.setSandbox(sbox) if !restore { - n.driver.peerDbUpdateSandbox(n.id) + // Initialize the sandbox with all the peers previously received from networkdb + n.driver.initSandboxPeerDB(n.id) } var nlSock *nl.NetlinkSocket diff --git a/drivers/overlay/peerdb.go b/drivers/overlay/peerdb.go index 47e30f1b27..4b41bf7658 100644 --- a/drivers/overlay/peerdb.go +++ b/drivers/overlay/peerdb.go @@ -203,38 +203,31 @@ func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPM return pEntry } -func (d *driver) peerDbUpdateSandbox(nid string) { - d.peerDb.Lock() - pMap, ok := d.peerDb.mp[nid] - if !ok { - d.peerDb.Unlock() - return - } - d.peerDb.Unlock() - - pMap.Lock() - for pKeyStr, pEntry := range pMap.mp { - var pKey peerKey - if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { - fmt.Printf("peer key scan failed: %v", err) - } - - if pEntry.isLocal { - continue - } +// The overlay uses a lazy initialization approach, this means that when a network is created +// and the driver registered the overlay does not allocate resources till the moment that a +// sandbox is actually created. +// At the moment of this call, that happens when a sandbox is initialized, is possible that +// networkDB has already delivered some events of peers already available on remote nodes, +// these peers are saved into the peerDB and this function is used to properly configure +// the network sandbox with all those peers that got previously notified. +// Note also that this method sends a single message on the channel and the go routine on the +// other side, will atomically loop on the whole table of peers and will program their state +// in one single atomic operation. This is fundamental to guarantee consistency, and avoid that +// new peerAdd or peerDelete gets reordered during the sandbox init. +func (d *driver) initSandboxPeerDB(nid string) { + d.peerInit(nid) +} - // Go captures variables by reference. The pEntry could be - // pointing to the same memory location for every iteration. Make - // a copy of pEntry before capturing it in the following closure. - entry := pEntry +type peerOperationType int32 - d.peerAdd(nid, entry.eid, pKey.peerIP, entry.peerIPMask, pKey.peerMac, entry.vtep, false, false, false, false) - } - pMap.Unlock() -} +const ( + peerOperationINIT peerOperationType = iota + peerOperationADD + peerOperationDELETE +) type peerOperation struct { - isAdd bool + opType peerOperationType networkID string endpointID string peerIP net.IP @@ -255,9 +248,12 @@ func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { case <-ctx.Done(): return case op := <-ch: - if op.isAdd { + switch op.opType { + case peerOperationINIT: + err = d.peerInitOp(op.networkID) + case peerOperationADD: err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.updateDB, op.l2Miss, op.l3Miss, op.localPeer) - } else { + case peerOperationDELETE: err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) } if err != nil { @@ -267,11 +263,33 @@ func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { } } +func (d *driver) peerInit(nid string) { + callerName := common.CallerName(1) + d.peerOpCh <- &peerOperation{ + opType: peerOperationINIT, + networkID: nid, + callerName: callerName, + } +} + +func (d *driver) peerInitOp(nid string) error { + return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { + // Local entries do not need to be added + if pEntry.isLocal { + return false + } + + d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, false) + // return false to loop on all entries + return false + }) +} + func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, updateDb, l2Miss, l3Miss, localPeer bool) { callerName := common.CallerName(1) d.peerOpCh <- &peerOperation{ - isAdd: true, + opType: peerOperationADD, networkID: nid, endpointID: eid, peerIP: peerIP, @@ -307,6 +325,9 @@ func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask sbox := n.sandbox() if sbox == nil { + // We are hitting this case for all the events that are arriving before that the sandbox + // is being created. The peer got already added into the database and the sanbox init will + // call the peerDbUpdateSandbox that will configure all these peers from the database return nil } @@ -350,7 +371,7 @@ func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMas peerMac net.HardwareAddr, vtep net.IP, updateDb bool) { callerName := common.CallerName(1) d.peerOpCh <- &peerOperation{ - isAdd: false, + opType: peerOperationDELETE, networkID: nid, endpointID: eid, peerIP: peerIP, diff --git a/networkdb/networkdb.go b/networkdb/networkdb.go index 69be8e99a8..4d6b35e88b 100644 --- a/networkdb/networkdb.go +++ b/networkdb/networkdb.go @@ -467,7 +467,7 @@ func (nDB *NetworkDB) deleteNodeTableEntries(node string) { // WalkTable walks a single table in NetworkDB and invokes the passed // function for each entry in the table passing the network, key, // value. The walk stops if the passed function returns a true. -func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bool) error { +func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte, bool) bool) error { nDB.RLock() values := make(map[string]interface{}) nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool { @@ -480,7 +480,7 @@ func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bo params := strings.Split(k[1:], "/") nid := params[1] key := params[2] - if fn(nid, key, v.(*entry).value) { + if fn(nid, key, v.(*entry).value, v.(*entry).deleting) { return nil } }