diff --git a/test/extended/operators/cluster.go b/test/extended/operators/cluster.go index 4351f9fd2142..764b8d7da649 100644 --- a/test/extended/operators/cluster.go +++ b/test/extended/operators/cluster.go @@ -18,10 +18,11 @@ import ( var _ = g.Describe("[Feature:Platform] Managed cluster should", func() { defer g.GinkgoRecover() - g.It("have no crashlooping pods in core namespaces over two minutes", func() { + g.It("have no crashlooping pods in core namespaces over four minutes", func() { c, err := e2e.LoadClientset() o.Expect(err).NotTo(o.HaveOccurred()) + restartingContainers := make(map[containerName]int) podsWithProblems := make(map[string]*corev1.Pod) var lastPending map[string]*corev1.Pod wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { @@ -63,7 +64,7 @@ var _ = g.Describe("[Feature:Platform] Managed cluster should", func() { case hasCreateContainerError(pod): case hasImagePullError(pod): case isCrashLooping(pod): - case hasExcessiveRestarts(pod): + case hasExcessiveRestarts(pod, 2, restartingContainers): case hasFailingContainer(pod): default: continue @@ -172,10 +173,24 @@ func isCrashLooping(pod *corev1.Pod) bool { return false } -func hasExcessiveRestarts(pod *corev1.Pod) bool { +type containerName struct { + namespace string + name string + container string +} + +func hasExcessiveRestarts(pod *corev1.Pod, excessiveCount int, counts map[containerName]int) bool { for _, status := range append(append([]corev1.ContainerStatus{}, pod.Status.InitContainerStatuses...), pod.Status.ContainerStatuses...) { - if status.RestartCount > 5 { - pod.Status.Message = fmt.Sprintf("container %s has restarted more than 5 times", status.Name) + name := containerName{namespace: pod.Namespace, name: pod.Name, container: status.Name} + count, ok := counts[name] + if !ok { + counts[name] = int(status.RestartCount) + continue + } + + current := int(status.RestartCount) - count + if current >= excessiveCount { + pod.Status.Message = fmt.Sprintf("container %s has restarted %d times (>= %d) within the allowed interval", status.Name, current, excessiveCount) return true } } diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 4192bef6edfc..eb2df04bc5e8 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -330,7 +330,7 @@ var annotations = map[string]string{ "[Top Level] [Feature:Platform] Managed cluster should ensure control plane operators do not make themselves unevictable": "[Top Level] [Feature:Platform] Managed cluster should ensure control plane operators do not make themselves unevictable [Skipped:ibmcloud] [Suite:openshift/conformance/parallel]", "[Top Level] [Feature:Platform] Managed cluster should ensure control plane pods do not run in best-effort QoS": "[Top Level] [Feature:Platform] Managed cluster should ensure control plane pods do not run in best-effort QoS [Skipped:ibmcloud] [Suite:openshift/conformance/parallel]", "[Top Level] [Feature:Platform] Managed cluster should ensure pods use downstream images from our release image with proper ImagePullPolicy": "[Top Level] [Feature:Platform] Managed cluster should ensure pods use downstream images from our release image with proper ImagePullPolicy [Suite:openshift/conformance/parallel]", - "[Top Level] [Feature:Platform] Managed cluster should have no crashlooping pods in core namespaces over two minutes": "[Top Level] [Feature:Platform] Managed cluster should have no crashlooping pods in core namespaces over two minutes [Suite:openshift/conformance/parallel]", + "[Top Level] [Feature:Platform] Managed cluster should have no crashlooping pods in core namespaces over four minutes": "[Top Level] [Feature:Platform] Managed cluster should have no crashlooping pods in core namespaces over four minutes [Suite:openshift/conformance/parallel]", "[Top Level] [Feature:Platform] Managed cluster should have operators on the cluster version": "[Top Level] [Feature:Platform] Managed cluster should have operators on the cluster version [Suite:openshift/conformance/parallel]", "[Top Level] [Feature:Platform] Managed cluster should recover when operator-owned objects are deleted [Disruptive]": "[Top Level] [Feature:Platform] Managed cluster should recover when operator-owned objects are deleted [Disruptive] [Serial] [Suite:openshift]", "[Top Level] [Feature:Platform] Managed cluster should should expose cluster services outside the cluster": "[Top Level] [Feature:Platform] Managed cluster should should expose cluster services outside the cluster [Suite:openshift/conformance/parallel]",