diff --git a/cmd/machine-api-operator/start.go b/cmd/machine-api-operator/start.go index e53fd5870..b8bc7f853 100644 --- a/cmd/machine-api-operator/start.go +++ b/cmd/machine-api-operator/start.go @@ -80,10 +80,7 @@ func startControllers(ctx *ControllerContext) error { startOpts.imagesFile, config, - ctx.KubeNamespacedInformerFactory.Core().V1().ServiceAccounts(), ctx.KubeNamespacedInformerFactory.Apps().V1().Deployments(), - ctx.KubeNamespacedInformerFactory.Rbac().V1().ClusterRoles(), - ctx.KubeNamespacedInformerFactory.Rbac().V1().ClusterRoleBindings(), ctx.ClientBuilder.KubeClientOrDie(componentName), ctx.ClientBuilder.OpenshiftClientOrDie(componentName), diff --git a/install/0000_30_machine-api-operator_08_rbac.yaml b/install/0000_30_machine-api-operator_08_rbac.yaml index 996565d18..9badc23c7 100644 --- a/install/0000_30_machine-api-operator_08_rbac.yaml +++ b/install/0000_30_machine-api-operator_08_rbac.yaml @@ -1,13 +1,130 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: machine-api-manager +rules: + - apiGroups: + - cluster.k8s.io + resources: + - '*' + verbs: + - '*' + + - apiGroups: + - machine.openshift.io + resources: + - '*' + verbs: + - '*' + + - apiGroups: + - healthchecking.openshift.io + resources: + - '*' + verbs: + - '*' + + - apiGroups: + - config.openshift.io + resources: + - clusteroperators + - clusteroperators/status + verbs: + - create + - get + - update + + - apiGroups: + - apps + resources: + - deployments + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + + - apiGroups: + - "" + resources: + - events + verbs: + - create + + - apiGroups: + - "" + resources: + - configmaps + verbs: + - get + - list + - watch + - create + - update + - patch + - delete + + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - watch + + - apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + + - apiGroups: + - "" + resources: + - pods/eviction + verbs: + - create + + - apiGroups: + - extensions + resources: + - daemonsets + verbs: + - get + - list + - watch + --- +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1beta1 metadata: - name: default-account-openshift-machine-api -subjects: -- kind: ServiceAccount - name: default - namespace: openshift-machine-api + name: machine-api-manager-rolebinding roleRef: - kind: ClusterRole - name: cluster-admin apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: machine-api-manager +subjects: + - kind: ServiceAccount + name: default + namespace: openshift-machine-api diff --git a/owned-manifests/clusterapi-manager-cluster-role-binding.yaml b/owned-manifests/clusterapi-manager-cluster-role-binding.yaml deleted file mode 100644 index def4603e1..000000000 --- a/owned-manifests/clusterapi-manager-cluster-role-binding.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cluster-api-manager-rolebinding -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cluster-api-manager-role -subjects: -- kind: ServiceAccount - name: default - namespace: {{ .TargetNamespace }} diff --git a/owned-manifests/clusterapi-manager-cluster-role.yaml b/owned-manifests/clusterapi-manager-cluster-role.yaml deleted file mode 100644 index 00da933fb..000000000 --- a/owned-manifests/clusterapi-manager-cluster-role.yaml +++ /dev/null @@ -1,65 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - creationTimestamp: null - name: cluster-api-manager-role -rules: -- apiGroups: - - cluster.k8s.io - resources: - - clusters - - clusters/status - - machines - - machines/status - - machinesets - - machinesets/status - - machinedeployments - - machinedeployments/status - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - -- apiGroups: - - machine.openshift.io - resources: - - clusters - - clusters/status - - machines - - machines/status - - machinesets - - machinesets/status - - machinedeployments - - machinedeployments/status - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - -- apiGroups: - - "" - resources: - - nodes - verbs: - - get - - list - - watch - - create - - update - - patch - - delete - -- apiGroups: - - healthchecking.openshift.io - resources: - - '*' - verbs: - - '*' diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index ea58191b1..e87555f34 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -11,8 +11,6 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" appsinformersv1 "k8s.io/client-go/informers/apps/v1" - coreinformersv1 "k8s.io/client-go/informers/core/v1" - rbacinformersv1 "k8s.io/client-go/informers/rbac/v1" "k8s.io/client-go/kubernetes" coreclientsetv1 "k8s.io/client-go/kubernetes/typed/core/v1" appslisterv1 "k8s.io/client-go/listers/apps/v1" @@ -60,10 +58,7 @@ func New( config string, - serviceAccountInfomer coreinformersv1.ServiceAccountInformer, deployInformer appsinformersv1.DeploymentInformer, - clusterRoleInformer rbacinformersv1.ClusterRoleInformer, - clusterRoleBindingInformer rbacinformersv1.ClusterRoleBindingInformer, kubeClient kubernetes.Interface, osClient osclientset.Interface, @@ -82,10 +77,7 @@ func New( queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "machineapioperator"), } - serviceAccountInfomer.Informer().AddEventHandler(optr.eventHandler()) deployInformer.Informer().AddEventHandler(optr.eventHandler()) - clusterRoleInformer.Informer().AddEventHandler(optr.eventHandler()) - clusterRoleBindingInformer.Informer().AddEventHandler(optr.eventHandler()) optr.config = config optr.syncHandler = optr.sync diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 6075dda81..9962db1e2 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -51,24 +51,6 @@ func (optr *Operator) syncAll(config OperatorConfig) error { } func (optr *Operator) syncClusterAPIController(config OperatorConfig) error { - crBytes, err := PopulateTemplate(&config, filepath.Join(ownedManifestsDir, "clusterapi-manager-cluster-role.yaml")) - if err != nil { - return err - } - cr := resourceread.ReadClusterRoleV1OrDie(crBytes) - _, _, err = resourceapply.ApplyClusterRole(optr.kubeClient.RbacV1(), cr) - if err != nil { - return err - } - crbBytes, err := PopulateTemplate(&config, filepath.Join(ownedManifestsDir, "clusterapi-manager-cluster-role-binding.yaml")) - if err != nil { - return err - } - crb := resourceread.ReadClusterRoleBindingV1OrDie(crbBytes) - _, _, err = resourceapply.ApplyClusterRoleBinding(optr.kubeClient.RbacV1(), crb) - if err != nil { - return err - } controllerBytes, err := PopulateTemplate(&config, filepath.Join(ownedManifestsDir, "clusterapi-manager-controllers.yaml")) if err != nil { return err diff --git a/test/e2e/main.go b/test/e2e/main.go index 03d5b9fca..fc536c90d 100644 --- a/test/e2e/main.go +++ b/test/e2e/main.go @@ -115,5 +115,13 @@ func runSuite() error { return err } glog.Info("PASS: ExpectAutoscalerScalesOut") + + glog.Info("RUN: ExpectNodeToBeDrainedBeforeMachineIsDeleted") + if err := testConfig.ExpectNodeToBeDrainedBeforeMachineIsDeleted(); err != nil { + glog.Errorf("FAIL: ExpectNodeToBeDrainedBeforeMachineIsDeleted: %v", err) + return err + } + glog.Info("PASS: ExpectNodeToBeDrainedBeforeMachineIsDeleted") + return nil } diff --git a/test/e2e/operator_expectations.go b/test/e2e/operator_expectations.go index ac3e624a7..e50d4b5ae 100644 --- a/test/e2e/operator_expectations.go +++ b/test/e2e/operator_expectations.go @@ -16,18 +16,23 @@ import ( kappsapi "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + kpolicyapi "k8s.io/api/policy/v1beta1" resource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" ) const ( - waitShort = 1 * time.Minute - waitMedium = 3 * time.Minute - waitLong = 10 * time.Minute + waitShort = 1 * time.Minute + waitMedium = 3 * time.Minute + waitLong = 15 * time.Minute + workerRoleLabel = "node-role.kubernetes.io/worker" ) func (tc *testConfig) ExpectOperatorAvailable() error { @@ -608,3 +613,481 @@ func (tc *testConfig) ExpectAutoscalerScalesOut() error { return len(nodeList.Items) == clusterInitialTotalNodes, nil }) } + +func (tc *testConfig) ExpectNodeToBeDrainedBeforeDeletingMachine() error { + listOptions := client.ListOptions{ + Namespace: namespace, + } + + var machine mapiv1beta1.Machine + var nodeName string + var node *corev1.Node + + glog.Info("Get machineList with at least one machine with NodeRef set") + if err := wait.PollImmediate(1*time.Second, waitShort, func() (bool, error) { + machineList := mapiv1beta1.MachineList{} + if err := tc.client.List(context.TODO(), &listOptions, &machineList); err != nil { + glog.Errorf("error querying api for machineList object: %v, retrying...", err) + return false, nil + } + for _, machineItem := range machineList.Items { + // empty or non-worker role skipped + if machineItem.Labels["sigs.k8s.io/cluster-api-machine-role"] == "worker" { + if machineItem.Status.NodeRef != nil && machineItem.Status.NodeRef.Name != "" { + machine = machineItem + nodeName = machineItem.Status.NodeRef.Name + return true, nil + } + } + } + return false, fmt.Errorf("no machine found with NodeRef not set") + }); err != nil { + return err + } + + glog.Info("Get nodeList") + if err := wait.PollImmediate(1*time.Second, waitShort, func() (bool, error) { + nodeList := corev1.NodeList{} + if err := tc.client.List(context.TODO(), &listOptions, &nodeList); err != nil { + glog.Errorf("error querying api for nodeList object: %v, retrying...", err) + return false, nil + } + for _, nodeItem := range nodeList.Items { + if nodeItem.Name == nodeName { + node = &nodeItem + break + } + } + if node == nil { + return false, fmt.Errorf("node %q not found", nodeName) + } + return true, nil + }); err != nil { + return err + } + + glog.Info("Delete machine and observe node draining") + if err := tc.client.Delete(context.TODO(), &machine); err != nil { + return fmt.Errorf("unable to delete machine %q", machine.Name) + } + + return wait.PollImmediate(time.Second, waitShort, func() (bool, error) { + eventList := corev1.EventList{} + if err := tc.client.List(context.TODO(), &listOptions, &eventList); err != nil { + glog.Errorf("error querying api for eventList object: %v, retrying...", err) + return false, nil + } + + glog.Infof("Fetching delete machine and node drained events") + var nodeDrainedEvent *corev1.Event + var machineDeletedEvent *corev1.Event + for _, eventItem := range eventList.Items { + if eventItem.Reason == "Deleted" && eventItem.Message == fmt.Sprintf("Node %q drained", nodeName) { + nodeDrainedEvent = &eventItem + continue + } + // always take the newest 'machine deleted' event + if eventItem.Reason == "Deleted" && eventItem.Message == fmt.Sprintf("Deleted machine %v", machine.Name) { + machineDeletedEvent = &eventItem + } + } + + if nodeDrainedEvent == nil { + glog.Infof("Unable to find %q node drained event", nodeName) + return false, nil + } + + if machineDeletedEvent == nil { + glog.Infof("Unable to find %q machine deleted event", machine.Name) + return false, nil + } + + glog.Infof("Node %q drained event recorded: %#v", nodeName, *nodeDrainedEvent) + + if machineDeletedEvent.FirstTimestamp.Before(&nodeDrainedEvent.FirstTimestamp) { + err := fmt.Errorf("machine %q deleted before node %q got drained", machine.Name, nodeName) + glog.Error(err) + return true, err + } + + return true, nil + }) +} + +var nodeDrainLabels = map[string]string{ + workerRoleLabel: "", + "node-draining-test": "", +} + +func machineFromMachineset(machineset *mapiv1beta1.MachineSet) *mapiv1beta1.Machine { + randomUUID := string(uuid.NewUUID()) + + machine := &mapiv1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: machineset.Namespace, + Name: "machine-" + randomUUID[:6], + Labels: machineset.Labels, + }, + Spec: machineset.Spec.Template.Spec, + } + if machine.Spec.ObjectMeta.Labels == nil { + machine.Spec.ObjectMeta.Labels = map[string]string{} + } + for key := range nodeDrainLabels { + if _, exists := machine.Spec.ObjectMeta.Labels[key]; exists { + continue + } + machine.Spec.ObjectMeta.Labels[key] = nodeDrainLabels[key] + } + return machine +} + +func replicationControllerWorkload(namespace string) *corev1.ReplicationController { + var replicas int32 = 20 + return &corev1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pdb-workload", + Namespace: namespace, + }, + Spec: corev1.ReplicationControllerSpec{ + Replicas: &replicas, + Selector: map[string]string{ + "app": "nginx", + }, + Template: &corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nginx", + Labels: map[string]string{ + "app": "nginx", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "work", + Image: "busybox", + Command: []string{"sleep", "10h"}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + "cpu": resource.MustParse("50m"), + "memory": resource.MustParse("50Mi"), + }, + }, + }, + }, + NodeSelector: nodeDrainLabels, + Tolerations: []corev1.Toleration{ + { + Key: "kubemark", + Operator: corev1.TolerationOpExists, + }, + }, + }, + }, + }, + } +} + +func podDisruptionBudget(namespace string) *kpolicyapi.PodDisruptionBudget { + maxUnavailable := intstr.FromInt(1) + return &kpolicyapi.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nginx-pdb", + Namespace: namespace, + }, + Spec: kpolicyapi.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "nginx", + }, + }, + MaxUnavailable: &maxUnavailable, + }, + } +} + +// 1. create two machines (without machineset) and wait until nodes are registered and ready +// 1. create rc +// 1. create pdb +// 1. pick a node that has at least half of the rc pods +// 1. drain node +// 1. observe the machine object is not deleted before a node is drained, +// i.e. as long as there is at least one pod running on the drained node, +// the machine object can not be deleted + +func isNodeReady(node *corev1.Node) bool { + for _, c := range node.Status.Conditions { + if c.Type == corev1.NodeReady { + return c.Status == corev1.ConditionTrue + } + } + return false +} + +func (tc *testConfig) ExpectNodeToBeDrainedBeforeMachineIsDeleted() error { + delObjects := make(map[string]runtime.Object) + + defer func() { + // Remove resources + for key := range delObjects { + glog.Infof("Deleting object %q", key) + if err := tc.client.Delete(context.TODO(), delObjects[key]); err != nil { + glog.Errorf("Unable to delete object %q: %v", key, err) + } + } + }() + + // Take the first worker machineset (assuming only worker machines are backed by machinesets) + machinesets := mapiv1beta1.MachineSetList{} + if err := wait.PollImmediate(1*time.Second, 1*time.Minute, func() (bool, error) { + if err := tc.client.List(context.TODO(), &client.ListOptions{}, &machinesets); err != nil { + glog.Errorf("Error querying api for machineset object: %v, retrying...", err) + return false, nil + } + if len(machinesets.Items) < 1 { + glog.Errorf("Expected at least one machineset, have none") + return false, nil + } + return true, nil + }); err != nil { + return err + } + + // Create two machines + machine1 := machineFromMachineset(&machinesets.Items[0]) + machine1.Name = "machine1" + + if err := tc.client.Create(context.TODO(), machine1); err != nil { + return fmt.Errorf("unable to create machine %q: %v", machine1.Name, err) + } + + delObjects["machine1"] = machine1 + + machine2 := machineFromMachineset(&machinesets.Items[0]) + machine2.Name = "machine2" + + if err := tc.client.Create(context.TODO(), machine2); err != nil { + return fmt.Errorf("unable to create machine %q: %v", machine2.Name, err) + } + + delObjects["machine2"] = machine2 + + // Wait until both new nodes are ready + if err := wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { + nodes := corev1.NodeList{} + listOpt := &client.ListOptions{} + listOpt.MatchingLabels(nodeDrainLabels) + if err := tc.client.List(context.TODO(), listOpt, &nodes); err != nil { + glog.Errorf("Error querying api for Node object: %v, retrying...", err) + return false, nil + } + // expecting nodeGroupSize nodes + nodeCounter := 0 + for _, node := range nodes.Items { + if _, exists := node.Labels[workerRoleLabel]; !exists { + continue + } + + if !isNodeReady(&node) { + continue + } + + nodeCounter++ + } + + if nodeCounter < 2 { + glog.Errorf("Expecting 2 nodes with %#v labels in Ready state, got %v", nodeDrainLabels, nodeCounter) + return false, nil + } + + glog.Infof("Expected number (2) of nodes with %v label in Ready state found", nodeDrainLabels) + return true, nil + }); err != nil { + return err + } + + rc := replicationControllerWorkload(namespace) + if err := tc.client.Create(context.TODO(), rc); err != nil { + return fmt.Errorf("unable to create RC %q: %v", rc.Name, err) + } + + delObjects["rc"] = rc + + pdb := podDisruptionBudget(namespace) + if err := tc.client.Create(context.TODO(), pdb); err != nil { + return fmt.Errorf("unable to create PDB %q: %v", pdb.Name, err) + } + + delObjects["pdb"] = pdb + + // Wait until all replicas are ready + if err := wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { + rcObj := corev1.ReplicationController{} + key := types.NamespacedName{ + Namespace: rc.Namespace, + Name: rc.Name, + } + if err := tc.client.Get(context.TODO(), key, &rcObj); err != nil { + glog.Errorf("Error querying api RC %q object: %v, retrying...", rc.Name, err) + return false, nil + } + if rcObj.Status.ReadyReplicas == 0 { + glog.Infof("Waiting for at least one RC ready replica (%v/%v)", rcObj.Status.ReadyReplicas, rcObj.Status.Replicas) + return false, nil + } + glog.Infof("Waiting for RC ready replicas (%v/%v)", rcObj.Status.ReadyReplicas, rcObj.Status.Replicas) + if rcObj.Status.Replicas != rcObj.Status.ReadyReplicas { + return false, nil + } + return true, nil + }); err != nil { + return err + } + + // All pods are distributed evenly among all nodes so it's fine to drain + // random node and observe reconciliation of pods on the other one. + if err := tc.client.Delete(context.TODO(), machine1); err != nil { + return fmt.Errorf("unable to delete machine %q: %v", machine1.Name, err) + } + + delete(delObjects, "machine1") + + // We still should be able to list the machine as until rc.replicas-1 are running on the other node + var drainedNodeName string + if err := wait.PollImmediate(1*time.Second, 10*time.Minute, func() (bool, error) { + machine := mapiv1beta1.Machine{} + + key := types.NamespacedName{ + Namespace: machine1.Namespace, + Name: machine1.Name, + } + if err := tc.client.Get(context.TODO(), key, &machine); err != nil { + glog.Errorf("Error querying api machine %q object: %v, retrying...", machine1.Name, err) + return false, nil + } + if machine.Status.NodeRef == nil || machine.Status.NodeRef.Kind != "Node" { + glog.Error("Machine %q not linked to a node", machine.Name) + } + + drainedNodeName = machine.Status.NodeRef.Name + node := corev1.Node{} + + if err := tc.client.Get(context.TODO(), types.NamespacedName{Name: drainedNodeName}, &node); err != nil { + glog.Errorf("Error querying api node %q object: %v, retrying...", drainedNodeName, err) + return false, nil + } + + if !node.Spec.Unschedulable { + glog.Errorf("Node %q is expected to be marked as unschedulable, it is not", node.Name) + return false, nil + } + + glog.Infof("Node %q is mark unschedulable as expected", node.Name) + + pods := corev1.PodList{} + listOpt := &client.ListOptions{} + listOpt.MatchingLabels(rc.Spec.Selector) + if err := tc.client.List(context.TODO(), listOpt, &pods); err != nil { + glog.Errorf("Error querying api for Pods object: %v, retrying...", err) + return false, nil + } + + // expecting nodeGroupSize nodes + podCounter := 0 + for _, pod := range pods.Items { + if pod.Spec.NodeName != machine.Status.NodeRef.Name { + continue + } + if !pod.DeletionTimestamp.IsZero() { + continue + } + podCounter++ + } + + glog.Infof("Have %v pods scheduled to node %q", podCounter, machine.Status.NodeRef.Name) + + // Verify we have enough pods running as well + rcObj := corev1.ReplicationController{} + key = types.NamespacedName{ + Namespace: rc.Namespace, + Name: rc.Name, + } + if err := tc.client.Get(context.TODO(), key, &rcObj); err != nil { + glog.Errorf("Error querying api RC %q object: %v, retrying...", rc.Name, err) + return false, nil + } + + // The point of the test is to make sure majority of the pods is rescheduled + // to other nodes. Pod disruption budget makes sure at most one pod + // owned by the RC is not Ready. So no need to test it. Though, usefull to have it printed. + glog.Infof("RC ReadyReplicas/Replicas: %v/%v", rcObj.Status.ReadyReplicas, rcObj.Status.Replicas) + + // This makes sure at most one replica is not ready + if rcObj.Status.Replicas-rcObj.Status.ReadyReplicas > 1 { + return false, fmt.Errorf("pod disruption budget not respecpted, node was not properly drained") + } + + // Depends on timing though a machine can be deleted even before there is only + // one pod left on the node (that is being evicted). + if podCounter > 2 { + glog.Infof("Expecting at most 2 pods to be scheduled to drained node %v, got %v", machine.Status.NodeRef.Name, podCounter) + return false, nil + } + + glog.Info("Expected result: all pods from the RC up to last one or two got scheduled to a different node while respecting PDB") + return true, nil + }); err != nil { + return err + } + + // Validate the machine is deleted + if err := wait.PollImmediate(5*time.Second, 1*time.Minute, func() (bool, error) { + machine := mapiv1beta1.Machine{} + + key := types.NamespacedName{ + Namespace: machine1.Namespace, + Name: machine1.Name, + } + err := tc.client.Get(context.TODO(), key, &machine) + if err == nil { + glog.Errorf("Machine %q not yet deleted", machine1.Name) + return false, nil + } + + if !strings.Contains(err.Error(), "not found") { + glog.Errorf("Error querying api machine %q object: %v, retrying...", machine1.Name, err) + return false, nil + } + + glog.Infof("Machine %q successfully deleted", machine1.Name) + return true, nil + }); err != nil { + return err + } + + // Validate underlying node is removed as well + if err := wait.PollImmediate(5*time.Second, waitLong, func() (bool, error) { + node := corev1.Node{} + + key := types.NamespacedName{ + Name: drainedNodeName, + } + err := tc.client.Get(context.TODO(), key, &node) + if err == nil { + glog.Errorf("Node %q not yet deleted", drainedNodeName) + return false, nil + } + + if !strings.Contains(err.Error(), "not found") { + glog.Errorf("Error querying api node %q object: %v, retrying...", drainedNodeName, err) + return false, nil + } + + glog.Infof("Node %q successfully deleted", drainedNodeName) + return true, nil + }); err != nil { + return err + } + + return nil +}