Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions drivers/overlay/encryption.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ func (d *driver) setKeys(keys []*key) error {
d.keys = keys
d.secMap = &encrMap{nodes: map[string][]*spi{}}
d.Unlock()
logrus.Debugf("Initial encryption keys: %v", d.keys)
logrus.Debugf("Initial encryption keys: %v", keys)
return nil
}

Expand All @@ -458,6 +458,8 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
)

d.Lock()
defer d.Unlock()

// add new
if newKey != nil {
d.keys = append(d.keys, newKey)
Expand All @@ -471,7 +473,6 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
delIdx = i
}
}
d.Unlock()

if (newKey != nil && newIdx == -1) ||
(primary != nil && priIdx == -1) ||
Expand All @@ -480,17 +481,18 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
"(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx)
}

if priIdx != -1 && priIdx == delIdx {
return types.BadRequestErrorf("attempting to both make a key (index %d) primary and delete it", priIdx)
}

d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) {
rIP := net.ParseIP(rIPs)
return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false
})

d.Lock()
// swap primary
if priIdx != -1 {
swp := d.keys[0]
d.keys[0] = d.keys[priIdx]
d.keys[priIdx] = swp
d.keys[0], d.keys[priIdx] = d.keys[priIdx], d.keys[0]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this change give any specific improvement other than not using swap variable ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, just that this is more idiomatic Go as far as I'm aware.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.

}
// prune
if delIdx != -1 {
Expand All @@ -499,7 +501,6 @@ func (d *driver) updateKeys(newKey, primary, pruneKey *key) error {
}
d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...)
}
d.Unlock()

logrus.Debugf("Updated: %v", d.keys)

Expand Down
55 changes: 32 additions & 23 deletions drivers/overlay/ov_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d
n.subnets = append(n.subnets, s)
}

d.Lock()
defer d.Unlock()
if d.networks[n.id] != nil {
return fmt.Errorf("attempt to create overlay network %v that already exists", n.id)
}

if err := n.writeToStore(); err != nil {
return fmt.Errorf("failed to update data store for network %v: %v", n.id, err)
}
Expand All @@ -217,11 +223,13 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d

if nInfo != nil {
if err := nInfo.TableEventRegister(ovPeerTable, driverapi.EndpointObject); err != nil {
// XXX Undo writeToStore? No method to so. Why?
return err
}
}

d.addNetwork(n)
d.networks[id] = n

return nil
}

Expand All @@ -235,7 +243,15 @@ func (d *driver) DeleteNetwork(nid string) error {
return err
}

n := d.network(nid)
d.Lock()
defer d.Unlock()

// This is similar to d.network(), but we need to keep holding the lock
// until we are done removing this network.
n, ok := d.networks[nid]
if !ok {
n = d.restoreNetworkFromStore(nid)
}
if n == nil {
return fmt.Errorf("could not find network with id %s", nid)
}
Expand All @@ -255,7 +271,7 @@ func (d *driver) DeleteNetwork(nid string) error {
}
// flush the peerDB entries
d.peerFlush(nid)
d.deleteNetwork(nid)
delete(d.networks, nid)

vnis, err := n.releaseVxlanID()
if err != nil {
Expand Down Expand Up @@ -805,32 +821,25 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket, nsPath string) {
}
}

func (d *driver) addNetwork(n *network) {
d.Lock()
d.networks[n.id] = n
d.Unlock()
}

func (d *driver) deleteNetwork(nid string) {
d.Lock()
delete(d.networks, nid)
d.Unlock()
// Restore a network from the store to the driver if it is present.
// Must be called with the driver locked!
func (d *driver) restoreNetworkFromStore(nid string) *network {
n := d.getNetworkFromStore(nid)
if n != nil {
n.driver = d
n.endpoints = endpointTable{}
n.once = &sync.Once{}
d.networks[nid] = n
}
return n
}

func (d *driver) network(nid string) *network {
d.Lock()
defer d.Unlock()
n, ok := d.networks[nid]
d.Unlock()
if !ok {
n = d.getNetworkFromStore(nid)
if n != nil {
n.driver = d
n.endpoints = endpointTable{}
n.once = &sync.Once{}
d.Lock()
d.networks[nid] = n
d.Unlock()
}
n = d.restoreNetworkFromStore(nid)
}

return n
Expand Down
10 changes: 6 additions & 4 deletions drivers/overlay/ovmanager/ovmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,12 @@ func (d *driver) NetworkAllocate(id string, option map[string]string, ipV4Data,
opts[netlabel.OverlayVxlanIDList] = val

d.Lock()
defer d.Unlock()
if _, ok := d.networks[id]; ok {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to do this check (and error out) up above in the beginning of the function rather than towards the end?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good suggestion. Hrm... What prefaces the lock itself is basically input checking. Some of it (the VxlanID allocation and possibly cleanup on error) would access the store and could be lengthy, so holding the lock would slow down other network create operations on different networks. But moving the check to the top would prevent concurrent attempts to create the same network from going through needless effort (and seems more sane stylistically).

So ... bit nicer hygine vs potentially increased parallelism (assuming that the client is actually using said parallelism). I could go either way on this. What do you think?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was mainly looking at it from the stylistically sane perspective and error out early. Agree with the concern around potential hit in parallelism too. So perhaps okay to leave it the way you have now.

n.releaseVxlanID()
return nil, fmt.Errorf("network %s already exists", id)
}
d.networks[id] = n
d.Unlock()

return opts, nil
}
Expand All @@ -137,8 +141,8 @@ func (d *driver) NetworkFree(id string) error {
}

d.Lock()
defer d.Unlock()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actual implementation locks and unlocks only at particular places. And the current change locks at the beginning of the function and unlocks when the function exits. Like you mentioned , will there be any issue for other threads who will be waiting little longer period than it used to be ? Just trying to see if there will be any trade off in performance or some other issue.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There could be a tradeoff in performance here. The only extra operation that gets protected by the lock is network.releaseVxlanID() which does two things:

  1. locks itself temporarily to build a coherent list of VNIs to free
  2. invokes the idm.Release() method for each VNI

The first would be protected against concurrency by the lock anyway. The second, however, invokes the IDM which uses the bitseq package which can end up writing to store. That last bit is the only long blocking operation I can think of here.

Even so, this is an "ovmanager.driver" instance and the only operations it locks on are creation and deletion. I'd assume that our desired rate for creation/deletion would be measured in the order of thousands per second, in which case I highly doubt a serial write-per-op would hurt us.

But (full transparency), I chafe at bad concurrency patterns and want to make it easy to reason about the correctness of the code. As I said in the commit log for this section: I doubt this race is serious.

n, ok := d.networks[id]
d.Unlock()

if !ok {
return fmt.Errorf("overlay network with id %s not found", id)
Expand All @@ -147,9 +151,7 @@ func (d *driver) NetworkFree(id string) error {
// Release all vxlan IDs in one shot.
n.releaseVxlanID()

d.Lock()
delete(d.networks, id)
d.Unlock()

return nil
}
Expand Down