From 6ab23cef7c9ee0d04bdcc285afa157cc602803a3 Mon Sep 17 00:00:00 2001
From: ravisantoshgudimetla <ravisantoshgudimetla@gmail.com>
Date: Thu, 6 Jun 2019 12:15:51 -0400
Subject: [PATCH 1/2] UPSTREAM: <carry>: Modify scheduler preemption tests

---
 .../test/e2e/scheduling/preemption.go         | 184 ++++++++++++------
 1 file changed, 120 insertions(+), 64 deletions(-)
diff --git a/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go b/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go
index 8cd402596f39..e0327b195279 100644
--- a/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go
+++ b/vendor/k8s.io/kubernetes/test/e2e/scheduling/preemption.go
@@ -90,45 +90,67 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 		var podRes v1.ResourceList
 		// Create one pod per node that uses a lot of the node's resources.
 		By("Create pods that use 60% of node resources.")
-		pods := make([]*v1.Pod, len(nodeList.Items))
+		pods := make([]*v1.Pod, 0)
 		allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
+		framework.ExpectNoError(err)
 		for i, node := range nodeList.Items {
 			currentCpuUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource)
 			framework.Logf("Current cpu and memory usage %v, %v", currentCpuUsage, currentMemUsage)
-			currentNode, err := cs.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
-			framework.ExpectNoError(err)
-			cpuAllocatable, found := currentNode.Status.Allocatable["cpu"]
+			cpuAllocatable, found := node.Status.Allocatable["cpu"]
 			Expect(found).To(Equal(true))
 			milliCPU := cpuAllocatable.MilliValue()
-			milliCPU = milliCPU * 40 / 100
-			memAllocatable, found := currentNode.Status.Allocatable["memory"]
+			// Just to be tolerant use 0.6 of resources available on the node
+			milliCPU = int64(float64(milliCPU-currentCpuUsage) * float64(0.6))
+			memAllocatable, found := node.Status.Allocatable["memory"]
 			Expect(found).To(Equal(true))
 			memory := memAllocatable.Value()
-			memory = memory * 60 / 100
+			// Just to be tolerant use 0.6 of resources available on the node
+			memory = int64(float64(memory-currentMemUsage) * float64(0.6))
 			podRes = v1.ResourceList{}
+			// If a node is already heavily utilized let not's create a pod there.
+			if milliCPU <= 0 {
+				framework.Logf("Node is heavily utilized, let's not create a pod here")
+				continue
+			}
 			podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
-			podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
-
 			// make the first pod low priority and the rest medium priority.
 			priorityName := mediumPriorityClassName
 			if i == 0 {
 				priorityName = lowPriorityClassName
 			}
-			pods[i] = createPausePod(f, pausePodConfig{
-				Name:              fmt.Sprintf("pod%d-%v", i, priorityName),
+			currentPod := fmt.Sprintf("pod%d-%v", i, priorityName)
+			pods = append(pods, createPausePod(f, pausePodConfig{
+				Name:              currentPod,
 				PriorityClassName: priorityName,
 				Resources: &v1.ResourceRequirements{
 					Requests: podRes,
 				},
-			})
-			framework.Logf("Created pod: %v", pods[i].Name)
+				NodeName: node.Name,
+			}))
+			framework.Logf("Created pod: %v", currentPod)
+		}
+		if len(pods) < 2 {
+			framework.Skipf("We need atleast two pods to be created but" +
+				"all nodes are already heavily utilized, so preemption tests cannot be run")
 		}
 		By("Wait for pods to be scheduled.")
+		//podRes = v1.ResourceList{}
+		lowerPriorityPodExists := false
+		if pods[0].Spec.PriorityClassName == lowPriorityClassName {
+			lowerPriorityPodExists = true
+		}
 		for _, pod := range pods {
 			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
 		}
+		if lowerPriorityPodExists {
+			// We want this pod to be preempted
+			podRes = pods[0].Spec.Containers[0].Resources.Requests
+		} else {
+			// All the pods are medium priority pods, so it doesn't matter which one gets preempted.
+			podRes = pods[1].Spec.Containers[0].Resources.Requests
+		}
 
-		By("Run a high priority pod that use 60% of a node resources.")
+		By("Run a high priority pod that has same requirements as that of lower priority pod")
 		// Create a high priority pod and make sure it is scheduled.
 		runPausePod(f, pausePodConfig{
 			Name:              "preemptor-pod",
@@ -137,17 +159,25 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 				Requests: podRes,
 			},
 		})
-		// Make sure that the lowest priority pod is deleted.
-		preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
-		podDeleted := (err != nil && errors.IsNotFound(err)) ||
-			(err == nil && preemptedPod.DeletionTimestamp != nil)
-		Expect(podDeleted).To(BeTrue())
-		// Other pods (mid priority ones) should be present.
-		for i := 1; i < len(pods); i++ {
-			livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
-			framework.ExpectNoError(err)
-			Expect(livePod.DeletionTimestamp).To(BeNil())
+		podPreempted := false
+		if lowerPriorityPodExists {
+			// Make sure that the lowest priority pod is deleted.
+			preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
+			podPreempted = (err != nil && errors.IsNotFound(err)) ||
+				(err == nil && preemptedPod.DeletionTimestamp != nil)
+		} else {
+			// This means one of the medium priority pods got preempted
+			for i := 0; i < len(pods); i++ {
+				midPriority, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
+				podPreempted := (err != nil && errors.IsNotFound(err)) ||
+					(err == nil && midPriority.DeletionTimestamp != nil)
+				if podPreempted {
+					// We have atleast one pod that got preempted because of our pod
+					break
+				}
+			}
 		}
+		Expect(podPreempted).To(BeTrue())
 	})
 
 	// This test verifies that when a critical pod is created and no node with
@@ -156,46 +186,68 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 	It("validates lower priority pod preemption by critical pod", func() {
 		var podRes v1.ResourceList
 		// Create one pod per node that uses a lot of the node's resources.
-		By("Create pods that use 60% of node resources.")
-		pods := make([]*v1.Pod, len(nodeList.Items))
+		By("Create pods that use most of node resources.")
+		pods := make([]*v1.Pod, 0)
 		allPods, err := cs.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{})
+		framework.ExpectNoError(err)
 		for i, node := range nodeList.Items {
 			currentCpuUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource)
 			framework.Logf("Current cpu usage and memory usage is %v, %v", currentCpuUsage, currentMemUsage)
-			currentNode, err := cs.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
-			framework.ExpectNoError(err)
-			cpuAllocatable, found := currentNode.Status.Allocatable["cpu"]
+			cpuAllocatable, found := node.Status.Allocatable["cpu"]
 			Expect(found).To(Equal(true))
 			milliCPU := cpuAllocatable.MilliValue()
-			milliCPU = milliCPU * 40 / 100
-			memAllocatable, found := currentNode.Status.Allocatable["memory"]
+			/// Just to be tolerant use 0.6 of resources available on the node
+			milliCPU = int64(float64(milliCPU-currentCpuUsage) * float64(0.6))
+			memAllocatable, found := node.Status.Allocatable["memory"]
 			Expect(found).To(Equal(true))
 			memory := memAllocatable.Value()
-			memory = memory * 60 / 100
+			// Just to be tolerant use 0.6 of resources available on the node
+			memory = int64(float64(memory-currentMemUsage) * float64(0.6))
 			podRes = v1.ResourceList{}
+			// If a node is already heavily utilized let not's create a pod there.
+			if milliCPU <= 0 {
+				framework.Logf("Node is heavily utilized, let's not create a pod there")
+				continue
+			}
 			podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
-			podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
 
 			// make the first pod low priority and the rest medium priority.
 			priorityName := mediumPriorityClassName
 			if i == 0 {
 				priorityName = lowPriorityClassName
 			}
-			pods[i] = createPausePod(f, pausePodConfig{
-				Name:              fmt.Sprintf("pod%d-%v", i, priorityName),
+			currentPod := fmt.Sprintf("pod%d-%v", i, priorityName)
+			pods = append(pods, createPausePod(f, pausePodConfig{
+				Name:              currentPod,
 				PriorityClassName: priorityName,
 				Resources: &v1.ResourceRequirements{
 					Requests: podRes,
 				},
-			})
-			framework.Logf("Created pod: %v", pods[i].Name)
+				NodeName: node.Name,
+			}))
+			framework.Logf("Created pod: %v", currentPod)
+		}
+		if len(pods) < 2 {
+			framework.Skipf("We need atleast two pods to be created but" +
+				"all nodes are already heavily utilized, so preemption tests cannot be run")
 		}
 		By("Wait for pods to be scheduled.")
+		//podRes = v1.ResourceList{}
+		lowerPriorityPodExists := false
+		if pods[0].Spec.PriorityClassName == lowPriorityClassName {
+			lowerPriorityPodExists = true
+		}
 		for _, pod := range pods {
 			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
 		}
-
-		By("Run a critical pod that use 60% of a node resources.")
+		if lowerPriorityPodExists {
+			// We want this pod to be preempted
+			podRes = pods[0].Spec.Containers[0].Resources.Requests
+		} else {
+			// All the pods are medium priority pods, so it doesn't matter which one gets preempted.
+			podRes = pods[1].Spec.Containers[0].Resources.Requests
+		}
+		By("Run a critical pod that use same resources as that of a lower priority pod")
 		// Create a critical pod and make sure it is scheduled.
 		runPausePod(f, pausePodConfig{
 			Name:              "critical-pod",
@@ -205,22 +257,31 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 				Requests: podRes,
 			},
 		})
-		// Make sure that the lowest priority pod is deleted.
-		preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
+
 		defer func() {
 			// Clean-up the critical pod
 			err := f.ClientSet.CoreV1().Pods(metav1.NamespaceSystem).Delete("critical-pod", metav1.NewDeleteOptions(0))
 			framework.ExpectNoError(err)
 		}()
-		podDeleted := (err != nil && errors.IsNotFound(err)) ||
-			(err == nil && preemptedPod.DeletionTimestamp != nil)
-		Expect(podDeleted).To(BeTrue())
-		// Other pods (mid priority ones) should be present.
-		for i := 1; i < len(pods); i++ {
-			livePod, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
-			framework.ExpectNoError(err)
-			Expect(livePod.DeletionTimestamp).To(BeNil())
+		podPreempted := false
+		if lowerPriorityPodExists {
+			// Make sure that the lowest priority pod is deleted.
+			preemptedPod, err := cs.CoreV1().Pods(pods[0].Namespace).Get(pods[0].Name, metav1.GetOptions{})
+			podPreempted = (err != nil && errors.IsNotFound(err)) ||
+				(err == nil && preemptedPod.DeletionTimestamp != nil)
+		} else {
+			// This means one of the medium priority pods got preempted
+			for i := 0; i < len(pods); i++ {
+				midPriority, err := cs.CoreV1().Pods(pods[i].Namespace).Get(pods[i].Name, metav1.GetOptions{})
+				podPreempted := (err != nil && errors.IsNotFound(err)) ||
+					(err == nil && midPriority.DeletionTimestamp != nil)
+				if podPreempted {
+					// We have atleast one pod that got preempted because of our pod
+					break
+				}
+			}
 		}
+		Expect(podPreempted).To(BeTrue())
 	})
 
 	// This test verifies that when a high priority pod is pending and its
@@ -242,17 +303,18 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 			node := nodeList.Items[i]
 			currentCpuUsage, currentMemUsage := getCurrentPodUsageOnTheNode(node.Name, allPods.Items, podRequestedResource)
 			framework.Logf("Current cpu usage and memory usage is %v, %v", currentCpuUsage, currentMemUsage)
-			currentNode, err := cs.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
-			framework.ExpectNoError(err)
-			cpuAllocatable, found := currentNode.Status.Allocatable["cpu"]
+			cpuAllocatable, found := node.Status.Allocatable["cpu"]
 			Expect(found).To(Equal(true))
 			milliCPU := cpuAllocatable.MilliValue()
-			milliCPU = milliCPU * 10 / 100
-			memAllocatable, found := currentNode.Status.Allocatable["memory"]
+			// Just to be tolerant use 0.6 of resources available on the node
+			milliCPU = int64(float64(milliCPU-currentMemUsage) * float64(0.6))
+			memAllocatable, found := node.Status.Allocatable["memory"]
 			Expect(found).To(BeTrue())
 			memory := memAllocatable.Value()
-			memory = memory * 10 / 100
+			// Just to be tolerant use 0.6 of resources available on the node
+			memory = int64(float64(memory-currentMemUsage) * float64(0.6))
 			podRes = v1.ResourceList{}
+
 			podRes[v1.ResourceCPU] = *resource.NewMilliQuantity(int64(milliCPU), resource.DecimalSI)
 			podRes[v1.ResourceMemory] = *resource.NewQuantity(int64(memory), resource.BinarySI)
 
@@ -271,6 +333,7 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 				Resources: &v1.ResourceRequirements{
 					Requests: podRes,
 				},
+				NodeName: node.Name,
 				Affinity: &v1.Affinity{
 					PodAntiAffinity: &v1.PodAntiAffinity{
 						RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
@@ -313,11 +376,6 @@ var _ = SIGDescribe("SchedulerPreemption [Serial]", func() {
 			}
 		}()
 
-		By("Wait for pods to be scheduled.")
-		for _, pod := range pods {
-			framework.ExpectNoError(framework.WaitForPodRunningInNamespace(cs, pod))
-		}
-
 		By("Run a high priority pod with node affinity to the first node.")
 		// Create a high priority pod and make sure it is scheduled.
 		runPausePod(f, pausePodConfig{
@@ -644,10 +702,8 @@ func getCurrentPodUsageOnTheNode(nodeName string, pods []v1.Pod, resource *v1.Re
 	totalRequestedCpuResource := resource.Requests.Cpu().MilliValue()
 	totalRequestedMemResource := resource.Requests.Memory().Value()
 	for _, pod := range pods {
-		if pod.Spec.NodeName == nodeName {
-			if v1qos.GetPodQOS(&pod) == v1.PodQOSBestEffort {
-				continue
-			}
+		if pod.Spec.NodeName != nodeName || v1qos.GetPodQOS(&pod) == v1.PodQOSBestEffort {
+			continue
 		}
 		result := getNonZeroRequests(&pod)
 		totalRequestedCpuResource += result.MilliCPU

From 7db1d5e9749dc1694d0d5398d0d81770dc1f2288 Mon Sep 17 00:00:00 2001
From: ravisantoshgudimetla <ravisantoshgudimetla@gmail.com>
Date: Thu, 6 Jun 2019 12:16:19 -0400
Subject: [PATCH 2/2] Re-enable scheduler preemption tests

---
 test/extended/util/test.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/extended/util/test.go b/test/extended/util/test.go
index 2d4070ac9503..23eb8b413630 100644
--- a/test/extended/util/test.go
+++ b/test/extended/util/test.go
@@ -389,7 +389,6 @@ var (
 			`should be rejected when no endpoints exist`,                                 // https://bugzilla.redhat.com/show_bug.cgi?id=1711605
 			`PreemptionExecutionPath runs ReplicaSets to verify preemption running path`, // https://bugzilla.redhat.com/show_bug.cgi?id=1711606
 			`TaintBasedEvictions`, // https://bugzilla.redhat.com/show_bug.cgi?id=1711608
-			`SchedulerPreemption`, // https://bugzilla.redhat.com/show_bug.cgi?id=1717198
 
 			`\[Driver: iscsi\]`, // https://bugzilla.redhat.com/show_bug.cgi?id=1711627
 			`\[Driver: ceph\]\[Feature:Volumes\] \[Testpattern: Pre-provisioned PV \(default fs\)\] subPath should verify container cannot write to subpath`,