From c1679b352d94492ee41a1c805544bd2231457842 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Mon, 4 May 2020 16:07:50 -0400 Subject: [PATCH 1/2] UPSTREAM: 90740: Balance node usage before creating victim pods in preemption e2e --- .../test/e2e/scheduling/preemption.go | 100 ++++++++++-------- .../test/e2e/scheduling/priorities.go | 32 ++++-- 2 files changed, 78 insertions(+), 54 deletions(-) diff --git a/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go b/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go index 50f3a26f1b38..821b0d21750f 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go +++ b/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go @@ -55,6 +55,8 @@ type priorityPair struct { value int32 } +var testExtendedResource = v1.ResourceName("scheduling.k8s.io/foo") + var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { var cs clientset.Interface var nodeList *v1.NodeList @@ -75,6 +77,10 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { for _, pair := range priorityPairs { cs.SchedulingV1().PriorityClasses().Delete(context.TODO(), pair.name, *metav1.NewDeleteOptions(0)) } + for _, node := range nodeList.Items { + delete(node.Status.Capacity, testExtendedResource) + cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + } }) ginkgo.BeforeEach(func() { @@ -103,30 +109,20 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { // the high priority pod. ginkgo.It("validates basic preemption works", func() { var podRes v1.ResourceList + // Create one pod per node that uses a lot of the node's resources. ginkgo.By("Create pods that use 60% of node resources.") pods := make([]*v1.Pod, 0, len(nodeList.Items)) - allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{}) - framework.ExpectNoError(err) + // Now create victim pods on each of the node with lower priority for i, node := range nodeList.Items { - currentCPUUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource) - framework.Logf("Current cpu and memory usage %v, %v", currentCPUUsage, currentMemUsage) - cpuAllocatable, found := node.Status.Allocatable["cpu"] - framework.ExpectEqual(found, true) - milliCPU := cpuAllocatable.MilliValue() - milliCPU = int64(float64(milliCPU-currentCPUUsage) * float64(0.6)) - memAllocatable, found := node.Status.Allocatable["memory"] - framework.ExpectEqual(found, true) - memory := memAllocatable.Value() - memory = int64(float64(memory-currentMemUsage) * float64(0.6)) - // If a node is already heavily utilized let not's create a pod there. - if milliCPU <= 0 || memory <= 0 { - framework.Logf("Node is heavily utilized, let's not create a pod here") - continue - } + // Update each node to advertise 3 available extended resources + node.Status.Capacity[testExtendedResource] = resource.MustParse("3") + node, err := cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + framework.ExpectNoError(err) + + // Request 2 of the available resources for the victim pods podRes = v1.ResourceList{} - podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI) - podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI) + podRes[testExtendedResource] = resource.MustParse("2") // make the first pod low priority and the rest medium priority. priorityName := mediumPriorityClassName @@ -138,10 +134,23 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: priorityName, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, + }, }, - NodeName: node.Name, })) - framework.Logf("Created pod: %v", pods[i].Name) + framework.Logf("Created pod: %v with resources: %+v", pods[i].Name, pods[i].Spec.Containers[0].Resources) } if len(pods) < 2 { framework.Failf("We need at least two pods to be created but" + @@ -162,8 +171,8 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: highPriorityClassName, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, }, - NodeName: pods[0].Spec.NodeName, }) preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(context.TODO(), pods[0].Name, metav1.GetOptions{}) @@ -174,7 +183,6 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { framework.ExpectNoError(err) gomega.Expect(livePod.DeletionTimestamp).To(gomega.BeNil()) } - framework.ExpectEqual(podPreempted, true) }) @@ -183,30 +191,19 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { // this critical pod. ginkgo.It("validates lower priority pod preemption by critical pod", func() { var podRes v1.ResourceList + // Create one pod per node that uses a lot of the node's resources. - ginkgo.By("Create pods that use 60% of node resources.") + ginkgo.By("Create pods that use 2/3 of node resources.") pods := make([]*v1.Pod, 0, len(nodeList.Items)) - allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{}) - framework.ExpectNoError(err) for i, node := range nodeList.Items { - currentCPUUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource) - framework.Logf("Current cpu usage and memory usage is %v, %v", currentCPUUsage, currentMemUsage) - cpuAllocatable, found := node.Status.Allocatable["cpu"] - framework.ExpectEqual(found, true) - milliCPU := cpuAllocatable.MilliValue() - milliCPU = int64(float64(milliCPU-currentCPUUsage) * float64(0.6)) - memAllocatable, found := node.Status.Allocatable["memory"] - framework.ExpectEqual(found, true) - memory := memAllocatable.Value() - memory = int64(float64(memory-currentMemUsage) * float64(0.6)) + // Update each node to advertise 3 available extended resources + node.Status.Capacity[testExtendedResource] = resource.MustParse("3") + node, err := cs.CoreV1().Nodes().UpdateStatus(context.TODO(), &node, metav1.UpdateOptions{}) + framework.ExpectNoError(err) + + // Request 2 of the available resources for the victim pods podRes = v1.ResourceList{} - // If a node is already heavily utilized let not's create a pod there. - if milliCPU <= 0 || memory <= 0 { - framework.Logf("Node is heavily utilized, let's not create a pod there") - continue - } - podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI) - podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI) + podRes[testExtendedResource] = resource.MustParse("2") // make the first pod low priority and the rest medium priority. priorityName := mediumPriorityClassName @@ -218,8 +215,21 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: priorityName, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, + }, }, - NodeName: node.Name, })) framework.Logf("Created pod: %v", pods[i].Name) } @@ -250,8 +260,8 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() { PriorityClassName: scheduling.SystemClusterCritical, Resources: &v1.ResourceRequirements{ Requests: podRes, + Limits: podRes, }, - NodeName: pods[0].Spec.NodeName, }) defer func() { diff --git a/vendor/k8s.io/kubernetes/test/e2e/scheduling/priorities.go b/vendor/k8s.io/kubernetes/test/e2e/scheduling/priorities.go index e96ff7d0a14a..a661e6fcd878 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/scheduling/priorities.go +++ b/vendor/k8s.io/kubernetes/test/e2e/scheduling/priorities.go @@ -465,16 +465,30 @@ func createBalancedPodForNodes(f *framework.Framework, cs clientset.Interface, n needCreateResource[v1.ResourceMemory] = *resource.NewQuantity(int64((ratio-memFraction)*float64(memAllocatableVal)), resource.BinarySI) - err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()), - *initPausePod(f, pausePodConfig{ - Name: "", - Labels: balancePodLabel, - Resources: &v1.ResourceRequirements{ - Limits: needCreateResource, - Requests: needCreateResource, + podConfig := &pausePodConfig{ + Name: "", + Labels: balancePodLabel, + Resources: &v1.ResourceRequirements{ + Limits: needCreateResource, + Requests: needCreateResource, + }, + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchFields: []v1.NodeSelectorRequirement{ + {Key: "metadata.name", Operator: v1.NodeSelectorOpIn, Values: []string{node.Name}}, + }, + }, + }, + }, }, - NodeName: node.Name, - }), true, framework.Logf) + }, + } + + err := testutils.StartPods(cs, 1, ns, string(uuid.NewUUID()), + *initPausePod(f, *podConfig), true, framework.Logf) if err != nil { return err From c584cec9b3eb92c4887de7f40f931f6591c0a683 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Mon, 4 May 2020 16:11:57 -0400 Subject: [PATCH 2/2] Reenable preemption e2es --- .../util/annotate/generated/zz_generated.annotations.go | 8 ++++---- test/extended/util/annotate/rules.go | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index a8ed3edc39f8..6198f9e0a223 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -1148,10 +1148,10 @@ var annotations = map[string]string{ "[Top Level] [sig-scheduling] SchedulerPredicates [Serial] validates that taints-tolerations is respected if not matching": "validates that taints-tolerations is respected if not matching [Suite:openshift/conformance/serial] [Suite:k8s]", "[Top Level] [sig-scheduling] SchedulerPredicates [Serial] validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP [Conformance]": "validates that there exists conflict between pods with same hostPort and protocol but one using 0.0.0.0 hostIP [Conformance] [Slow] [Suite:k8s]", "[Top Level] [sig-scheduling] SchedulerPredicates [Serial] validates that there is no conflict between pods with same hostPort but different hostIP and protocol [Conformance]": "validates that there is no conflict between pods with same hostPort but different hostIP and protocol [Conformance] [Suite:openshift/conformance/serial/minimal] [Suite:k8s]", - "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] PodTopologySpread Preemption validates proper pods are preempted": "validates proper pods are preempted [Disabled:Broken] [Suite:k8s]", - "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] PreemptionExecutionPath runs ReplicaSets to verify preemption running path": "runs ReplicaSets to verify preemption running path [Disabled:Broken] [Suite:k8s]", - "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] validates basic preemption works": "validates basic preemption works [Disabled:Broken] [Suite:k8s]", - "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] validates lower priority pod preemption by critical pod": "validates lower priority pod preemption by critical pod [Disabled:Broken] [Suite:k8s]", + "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] PodTopologySpread Preemption validates proper pods are preempted": "validates proper pods are preempted [Suite:openshift/conformance/serial] [Suite:k8s]", + "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] PreemptionExecutionPath runs ReplicaSets to verify preemption running path": "runs ReplicaSets to verify preemption running path [Suite:openshift/conformance/serial] [Suite:k8s]", + "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] validates basic preemption works": "validates basic preemption works [Suite:openshift/conformance/serial] [Suite:k8s]", + "[Top Level] [sig-scheduling] SchedulerPreemption [Serial] validates lower priority pod preemption by critical pod": "validates lower priority pod preemption by critical pod [Suite:openshift/conformance/serial] [Suite:k8s]", "[Top Level] [sig-scheduling] SchedulerPriorities [Serial] Pod should avoid nodes that have avoidPod annotation": "Pod should avoid nodes that have avoidPod annotation [Suite:openshift/conformance/serial] [Suite:k8s]", "[Top Level] [sig-scheduling] SchedulerPriorities [Serial] Pod should be preferably scheduled to nodes pod can tolerate": "Pod should be preferably scheduled to nodes pod can tolerate [Suite:openshift/conformance/serial] [Suite:k8s]", "[Top Level] [sig-scheduling] SchedulerPriorities [Serial] Pod should be scheduled to node that don't match the PodAntiAffinity terms": "Pod should be scheduled to node that don't match the PodAntiAffinity terms [Suite:openshift/conformance/serial] [Suite:k8s]", diff --git a/test/extended/util/annotate/rules.go b/test/extended/util/annotate/rules.go index d50f3c1c492f..3489e508855a 100644 --- a/test/extended/util/annotate/rules.go +++ b/test/extended/util/annotate/rules.go @@ -92,9 +92,6 @@ var ( // A fix is in progress: https://github.com/openshift/origin/pull/24709 `Multi-AZ Clusters should spread the pods of a replication controller across zones`, - - // Workloads: https://bugzilla.redhat.com/show_bug.cgi?id=1731263 - `SchedulerPreemption`, }, // tests that may work, but we don't support them "[Disabled:Unsupported]": {