From 012010755fc136f0dc412b05af9b216dbe369081 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 20 Jan 2022 17:45:58 -0500 Subject: [PATCH] test/helpers: use `RetryOnConflict` for node writes I saw this race in a PR: ``` Error: Expected nil, but got: &errors.StatusError{ErrStatus:v1.Status{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ListMeta:v1.ListMeta{SelfLink:"", ResourceVersion:"", Continue:"", RemainingItemCount:(*int64)(nil)}, Status:"Failure", Message:"Operation cannot be fulfilled on nodes \"ci-op-pjcj27z5-ff089-5jw6w-worker-b-dhsmk\": the object has been modified; please apply your changes to the latest version and try again", Reason:"Conflict", Details:(*v1.StatusDetails)(0xc000450cc0), Code:409}} Test: TestKernelType ``` A general problem with our tests is that they tend to be written in "one shot" mode but in cases like this we need to do retries and reconciliation. (Actually in this case it'd be better to format a strategic patch I think, but for now let's just do a standard `RetryOnConflict` as is done elsewhere) --- test/helpers/utils.go | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/test/helpers/utils.go b/test/helpers/utils.go index a8762de639..bb20e7b0c2 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -22,6 +22,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/util/retry" "k8s.io/apimachinery/pkg/util/wait" ) @@ -232,18 +233,29 @@ func LabelRandomNodeFromPool(t *testing.T, cs *framework.ClientSet, pool, label // G404: Use of weak random number generator (math/rand instead of crypto/rand) // #nosec infraNode := nodes[rand.Intn(len(nodes))] - infraNode.Labels[label] = "" - - _, err = cs.Nodes().Update(context.TODO(), &infraNode, metav1.UpdateOptions{}) + infraNodeName := infraNode.Name + err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { + infraNode, err := cs.Nodes().Get(context.TODO(), infraNodeName, metav1.GetOptions{}) + if err != nil { + return err + } + infraNode.Labels[label] = "" + _, err = cs.Nodes().Update(context.TODO(), infraNode, metav1.UpdateOptions{}) + return err + }) require.Nil(t, err, "unable to label worker node %s with infra: %s", infraNode.Name, err) - return func() { - updatedNode, err := cs.Nodes().Get(context.TODO(), infraNode.Name, metav1.GetOptions{}) - require.Nil(t, err, "unable to get node to update: %s", err) - - delete(updatedNode.Labels, label) - _, err = cs.Nodes().Update(context.TODO(), updatedNode, metav1.UpdateOptions{}) + return func() { + err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + infraNode, err := cs.Nodes().Get(context.TODO(), infraNodeName, metav1.GetOptions{}) + if err != nil { + return err + } + delete(infraNode.Labels, label) + _, err = cs.Nodes().Update(context.TODO(), infraNode, metav1.UpdateOptions{}) + return err + }) require.Nil(t, err, "unable to remove label from node %s: %s", infraNode.Name, err) } }