From 5e7b5737d7aeb0a8131db4ca1eb336be37e634d1 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 8 May 2019 12:49:47 +0000 Subject: [PATCH 1/2] status: Log when a pool completes Continuing on the quest to make the controller logs more useful. --- pkg/controller/node/status.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/controller/node/status.go b/pkg/controller/node/status.go index 4ef5b48d64..e20812b939 100644 --- a/pkg/controller/node/status.go +++ b/pkg/controller/node/status.go @@ -34,6 +34,8 @@ func (ctrl *Controller) syncStatusOnly(pool *mcfgv1.MachineConfigPool) error { } func calculateStatus(pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node) mcfgv1.MachineConfigPoolStatus { + previousUpdated := mcfgv1.IsMachineConfigPoolConditionTrue(pool.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) + machineCount := int32(len(nodes)) updatedMachines := getUpdatedMachines(pool.Status.Configuration.Name, nodes) @@ -75,8 +77,12 @@ func calculateStatus(pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node) mcfgv readyMachineCount == machineCount && unavailableMachineCount == 0 { //TODO: update api to only have one condition regarding status of update. - supdated := mcfgv1.NewMachineConfigPoolCondition(mcfgv1.MachineConfigPoolUpdated, corev1.ConditionTrue, fmt.Sprintf("All nodes are updated with %s", pool.Status.Configuration.Name), "") + updatedMsg := fmt.Sprintf("All nodes are updated with %s", pool.Status.Configuration.Name) + supdated := mcfgv1.NewMachineConfigPoolCondition(mcfgv1.MachineConfigPoolUpdated, corev1.ConditionTrue, updatedMsg, "") mcfgv1.SetMachineConfigPoolCondition(&status, *supdated) + if !previousUpdated { + glog.Infof("Pool %s: %s", pool.Name, updatedMsg) + } supdating := mcfgv1.NewMachineConfigPoolCondition(mcfgv1.MachineConfigPoolUpdating, corev1.ConditionFalse, "", "") mcfgv1.SetMachineConfigPoolCondition(&status, *supdating) } else { From ad37518f2374b2742ad51e492eb8f811d2e21be9 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 8 May 2019 13:01:56 +0000 Subject: [PATCH 2/2] controller: Fix "changed" logic to account for nodeReady changes The `nodeChanged` logic was not accounting for the fact that the controller takes readiness into account too. However, rather than change that function, let's just remove it and extend our logic below that which was also effectively doing change detection so it could log it. This also removes the logging from `nodeReady()` which got very noisy in status; we now consistently log just changes in the node controller. However, the change detection logic was also implicitly ignoring nodes which didn't appear to be managed by the MCD at all - think Windows nodes. Let's explicitly skip nodes that don't have a `currentConfig` annotation. --- pkg/controller/node/node_controller.go | 66 +++++++++++++++----------- pkg/controller/node/status.go | 32 +++++++++---- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/pkg/controller/node/node_controller.go b/pkg/controller/node/node_controller.go index 04c991d616..030ce9efd8 100644 --- a/pkg/controller/node/node_controller.go +++ b/pkg/controller/node/node_controller.go @@ -188,13 +188,13 @@ func (ctrl *Controller) updateNode(old, cur interface{}) { oldNode := old.(*corev1.Node) curNode := cur.(*corev1.Node) - if !nodeChanged(oldNode, curNode) { + if !isNodeManaged(curNode) { return } pool, err := ctrl.getPoolForNode(curNode) if err != nil { - glog.Errorf("error finding pools for node: %v", err) + glog.Errorf("error finding pool for node: %v", err) return } if pool == nil { @@ -202,17 +202,49 @@ func (ctrl *Controller) updateNode(old, cur interface{}) { } glog.V(4).Infof("Node %s updated", curNode.Name) - oldReady := isNodeReady(oldNode) - newReady := isNodeReady(curNode) + var changed bool + oldReadyErr := checkNodeReady(oldNode) + newReadyErr := checkNodeReady(curNode) + var oldReady, newReady string + if oldReadyErr != nil { + oldReady = oldReadyErr.Error() + } else { + oldReady = "" + } + if newReadyErr != nil { + newReady = newReadyErr.Error() + } else { + newReady = "" + } if oldReady != newReady { - glog.Infof("Pool %s: node %s is now reporting ready: %v", pool.Name, curNode.Name, newReady) + changed = true + if newReadyErr != nil { + glog.Infof("Pool %s: node %s is now reporting unready: %v", pool.Name, curNode.Name, newReadyErr) + } else { + glog.Infof("Pool %s: node %s is now reporting ready", pool.Name, curNode.Name) + } } // Specifically log when a node has completed an update so the MCC logs are a useful central aggregate of state changes if oldNode.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] != oldNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] && - curNode.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] == curNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] { + curNode.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] == curNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] && + curNode.Annotations[daemonconsts.MachineConfigDaemonStateAnnotationKey] == daemonconsts.MachineConfigDaemonStateDone { glog.Infof("Pool %s: node %s has completed update to %s", pool.Name, curNode.Name, curNode.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey]) + changed = true + } else { + annos := []string{daemonconsts.CurrentMachineConfigAnnotationKey, daemonconsts.DesiredMachineConfigAnnotationKey, daemonconsts.MachineConfigDaemonStateAnnotationKey} + for _, anno := range annos { + if oldNode.Annotations[anno] != curNode.Annotations[anno] { + glog.Infof("Pool %s: node %s changed %s = %s", pool.Name, curNode.Name, anno, curNode.Annotations[anno]) + changed = true + } + } } + + if !changed { + return + } + ctrl.enqueueMachineConfigPool(pool) } @@ -244,28 +276,6 @@ func (ctrl *Controller) deleteNode(obj interface{}) { ctrl.enqueueMachineConfigPool(pool) } -func nodeChanged(old, cur *corev1.Node) bool { - if old.Annotations == nil && cur.Annotations != nil || - old.Annotations != nil && cur.Annotations == nil { - return true - } - - if old.Annotations == nil && cur.Annotations == nil { - return false - } - - if old.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] != cur.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] || - old.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] != cur.Annotations[daemonconsts.DesiredMachineConfigAnnotationKey] { - return true - } - - if old.Annotations[daemonconsts.MachineConfigDaemonStateAnnotationKey] != cur.Annotations[daemonconsts.MachineConfigDaemonStateAnnotationKey] { - return true - } - - return false -} - // getPoolForNode chooses the MachineConfigPool that should be used for a given node. // It disambiguates in the case where e.g. a node has both master/worker roles applied, // and where a custom role may be used. diff --git a/pkg/controller/node/status.go b/pkg/controller/node/status.go index e20812b939..bc820e7592 100644 --- a/pkg/controller/node/status.go +++ b/pkg/controller/node/status.go @@ -117,6 +117,18 @@ func calculateStatus(pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node) mcfgv return status } +// isNodeManaged checks whether the MCD has ever run on a node +func isNodeManaged(node *corev1.Node) bool { + if node.Annotations == nil { + return false + } + cconfig, ok := node.Annotations[daemonconsts.CurrentMachineConfigAnnotationKey] + if !ok || cconfig == "" { + return false + } + return true +} + // getUpdatedMachines filters the provided nodes to ones whose current == desired // and the "done" flag is set. func getUpdatedMachines(currentConfig string, nodes []*corev1.Node) []*corev1.Node { @@ -159,7 +171,7 @@ func getReadyMachines(currentConfig string, nodes []*corev1.Node) []*corev1.Node return ready } -func isNodeReady(node *corev1.Node) bool { +func checkNodeReady(node *corev1.Node) error { for i := range node.Status.Conditions { cond := &node.Status.Conditions[i] // We consider the node for scheduling only when its: @@ -167,24 +179,24 @@ func isNodeReady(node *corev1.Node) bool { // - NodeOutOfDisk condition status is ConditionFalse, // - NodeNetworkUnavailable condition status is ConditionFalse. if cond.Type == corev1.NodeReady && cond.Status != corev1.ConditionTrue { - glog.Infof("Node %s is reporting NotReady", node.Name) - return false + return fmt.Errorf("Node %s is reporting NotReady", node.Name) } if cond.Type == corev1.NodeOutOfDisk && cond.Status != corev1.ConditionFalse { - glog.Infof("Node %s is reporting OutOfDisk", node.Name) - return false + return fmt.Errorf("Node %s is reporting OutOfDisk", node.Name) } if cond.Type == corev1.NodeNetworkUnavailable && cond.Status != corev1.ConditionFalse { - glog.Infof("Node %s is reporting NetworkUnavailable", node.Name) - return false + return fmt.Errorf("Node %s is reporting NetworkUnavailable", node.Name) } } // Ignore nodes that are marked unschedulable if node.Spec.Unschedulable { - glog.Infof("Node %s is reporting Unschedulable", node.Name) - return false + return fmt.Errorf("Node %s is reporting Unschedulable", node.Name) } - return true + return nil +} + +func isNodeReady(node *corev1.Node) bool { + return checkNodeReady(node) == nil } func getUnavailableMachines(currentConfig string, nodes []*corev1.Node) []*corev1.Node {