Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -875,8 +875,39 @@ func (m *Manager) rotateRootCAKEK(ctx context.Context, clusterID string) error {
})
}

// getLeaderNodeID is a small helper function returning a string with the
// leader's node ID. it is only used for logging, and should not be relied on
// to give a node ID for actual operational purposes (because it returns errors
// as nicely decorated strings)
func (m *Manager) getLeaderNodeID() string {
// get the current leader ID. this variable tracks the leader *only* for
// the purposes of logging leadership changes, and should not be relied on
// for other purposes
leader, leaderErr := m.raftNode.Leader()
switch leaderErr {
case raft.ErrNoRaftMember:
// this is an unlikely case, but we have to handle it. this means this
// node is not a member of the raft quorum. this won't look very pretty
// in logs ("leadership changed from aslkdjfa to ErrNoRaftMember") but
// it also won't be very common
return "not yet part of a raft cluster"
case raft.ErrNoClusterLeader:
return "no cluster leader"
default:
id, err := m.raftNode.GetNodeIDByRaftID(leader)
// the only possible error here is "ErrMemberUnknown"
if err != nil {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to err != ErrMemberUnknown ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not worth fixing.

Copy link
Copy Markdown
Contributor

@anshulpundir anshulpundir Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

care to elaborate ? @dperny

return "an unknown node"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not very useful in the logs either. Any way to make this somewhat less confusing ?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not. it's an unlikely case that this happens, and if it does happen, you'll need to look at the logs output by the raft library.

Copy link
Copy Markdown
Contributor

@anshulpundir anshulpundir Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me clarify: my suggestion is to find out what is the scenario when we can get a ErrMemberUnknown and try to return a string that explains that scenario. @dperny

}
return id
}
}

// handleLeadershipEvents handles the is leader event or is follower event.
func (m *Manager) handleLeadershipEvents(ctx context.Context, leadershipCh chan events.Event) {
// get the current leader and save it for logging leadership changes in
// this loop
oldLeader := m.getLeaderNodeID()
for {
select {
case leadershipEvent := <-leadershipCh:
Expand All @@ -895,6 +926,12 @@ func (m *Manager) handleLeadershipEvents(ctx context.Context, leadershipCh chan
leaderMetric.Set(0)
}
m.mu.Unlock()

newLeader := m.getLeaderNodeID()
// maybe we should use logrus fields for old and new leader, so
// that users are better able to ingest leadership changes into log
// aggregators?
log.G(ctx).Infof("leadership changed from %v to %v", oldLeader, newLeader)
case <-ctx.Done():
return
}
Expand Down
12 changes: 12 additions & 0 deletions manager/state/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,18 @@ func (n *Node) GetMemberByNodeID(nodeID string) *membership.Member {
return nil
}

// GetNodeIDByRaftID returns the generic Node ID of a member given its raft ID.
// It returns ErrMemberUnknown if the raft ID is unknown.
func (n *Node) GetNodeIDByRaftID(raftID uint64) (string, error) {
if member, ok := n.cluster.Members()[raftID]; ok {
return member.NodeID, nil
}
// this is the only possible error value that should be returned; the
// manager code depends on this. if you need to add more errors later, make
// sure that you update the callers of this method accordingly
return "", ErrMemberUnknown
}

// IsMember checks if the raft node has effectively joined
// a cluster of existing members.
func (n *Node) IsMember() bool {
Expand Down
39 changes: 39 additions & 0 deletions manager/state/raft/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1024,3 +1024,42 @@ func TestStreamRaftMessage(t *testing.T) {
errStr = fmt.Sprintf("Raft message chunk is not of type %d", raftpb.MsgSnap)
assert.Equal(t, errStr, s.Message())
}

// TestGetNodeIDByRaftID tests the GetNodeIDByRaftID function. It's a very
// simple test but those are the kind that make a difference over time
func TestGetNodeIDByRaftID(t *testing.T) {
t.Parallel()

nodes, _ := raftutils.NewRaftCluster(t, tc)
defer raftutils.TeardownCluster(nodes)

// get the member list
members := nodes[1].GetMemberlist()
// get all of the raft ids
raftIDs := make([]uint64, 0, len(members))
for _, member := range members {
raftIDs = append(raftIDs, member.RaftID)
}

// now go and get the nodeID of every raftID
for _, id := range raftIDs {
nodeid, err := nodes[1].GetNodeIDByRaftID(id)
assert.NoError(t, err, "raft ID %v should give us a node ID", id)
// now go through the member manually list and make sure this is
// correct
for _, member := range members {
assert.True(t,
// either both should match, or both should not match. if they
// are different, then there is an error
(member.RaftID == id) == (member.NodeID == nodeid),
"member with id %v has node id %v, but we expected member with id %v to have node id %v",
member.RaftID, member.NodeID, id, nodeid,
)
}
}

// now expect a nonexistent raft member to return ErrNoMember
id, err := nodes[1].GetNodeIDByRaftID(8675309)
assert.Equal(t, err, raft.ErrMemberUnknown)
assert.Empty(t, id)
}