knative · knative-prow-robot · Aug 24, 2021 · Apr 14, 2021 · Aug 19, 2021 · Aug 19, 2021
diff --git a/pkg/queue/health/handler.go b/pkg/queue/health/handler.go
@@ -65,6 +65,6 @@ func ProbeHandler(healthState *State, prober func() bool, isAggressive bool, tra
 				return false
 			}
 			return true
-		}, isAggressive, w)
+		}, w)
 	}
 }
diff --git a/pkg/queue/health/health_state.go b/pkg/queue/health/health_state.go
@@ -90,10 +90,8 @@ func (h *State) drainFinished() {
 }
 
 // HandleHealthProbe handles the probe according to the current state of the
-// health server. If isAggressive is false and prober has succeeded previously,
-// the function returns success without probing user-container again (until
-// shutdown).
-func (h *State) HandleHealthProbe(prober func() bool, isAggressive bool, w http.ResponseWriter) {
+// health server.
+func (h *State) HandleHealthProbe(prober func() bool, w http.ResponseWriter) {
 	sendAlive := func() {
 		io.WriteString(w, queue.Name)
 	}
@@ -107,8 +105,6 @@ func (h *State) HandleHealthProbe(prober func() bool, isAggressive bool, w http.
 	}
 
 	switch {
-	case !isAggressive && h.isAlive():
-		sendAlive()
 	case h.isShuttingDown():
 		sendShuttingDown()
 	case prober != nil && !prober():

diff --git a/pkg/queue/health/health_state_test.go b/pkg/queue/health/health_state_test.go
@@ -67,79 +67,58 @@ func TestHealthStateHealthHandler(t *testing.T) {
 		alive        bool
 		shuttingDown bool
 		prober       func() bool
-		isAggressive bool
 		wantStatus   int
 		wantBody     string
 	}{{
-		name:         "alive: true, K-Probe",
-		alive:        true,
-		isAggressive: false,
-		wantStatus:   http.StatusOK,
-		wantBody:     queue.Name,
+		name:       "alive: true",
+		alive:      true,
+		wantStatus: http.StatusOK,
+		wantBody:   queue.Name,
 	}, {
-		name:         "alive: false, prober: true, K-Probe",
-		prober:       func() bool { return true },
-		isAggressive: false,
-		wantStatus:   http.StatusOK,
-		wantBody:     queue.Name,
+		name:       "alive: false, prober: true",
+		prober:     func() bool { return true },
+		wantStatus: http.StatusOK,
+		wantBody:   queue.Name,
 	}, {
-		name:         "alive: false, prober: false, K-Probe",
-		prober:       func() bool { return false },
-		isAggressive: false,
-		wantStatus:   http.StatusServiceUnavailable,
+		name:       "alive: false, prober: false",
+		prober:     func() bool { return false },
+		wantStatus: http.StatusServiceUnavailable,
 	}, {
-		name:         "alive: false, no prober, K-Probe",
-		isAggressive: false,
-		wantStatus:   http.StatusOK,
-		wantBody:     queue.Name,
+		name:       "alive: false, no prober",
+		wantStatus: http.StatusOK,
+		wantBody:   queue.Name,
 	}, {
-		name:         "shuttingDown: true, K-Probe",
+		name:         "shuttingDown: true",
 		shuttingDown: true,
-		isAggressive: false,
 		wantStatus:   http.StatusGone,
 	}, {
-		name:         "no prober, shuttingDown: false",
-		isAggressive: true,
-		wantStatus:   http.StatusOK,
-		wantBody:     queue.Name,
+		name:       "no prober, shuttingDown: false",
+		wantStatus: http.StatusOK,
+		wantBody:   queue.Name,
 	}, {
 		name:         "prober: true, shuttingDown: true",
 		shuttingDown: true,
 		prober:       func() bool { return true },
-		isAggressive: true,
 		wantStatus:   http.StatusGone,
 	}, {
-		name:         "prober: true, shuttingDown: false",
-		prober:       func() bool { return true },
-		isAggressive: true,
-		wantStatus:   http.StatusOK,
-		wantBody:     queue.Name,
+		name:       "prober: true, shuttingDown: false",
+		prober:     func() bool { return true },
+		wantStatus: http.StatusOK,
+		wantBody:   queue.Name,
 	}, {
-		name:         "prober: false, shuttingDown: false",
-		prober:       func() bool { return false },
-		isAggressive: true,
-		wantStatus:   http.StatusServiceUnavailable,
+		name:       "prober: false, shuttingDown: false",
+		prober:     func() bool { return false },
+		wantStatus: http.StatusServiceUnavailable,
 	}, {
 		name:         "prober: false, shuttingDown: true",
 		shuttingDown: true,
 		prober:       func() bool { return false },
-		isAggressive: true,
 		wantStatus:   http.StatusGone,
 	}, {
-		name:         "alive: true, prober: false, shuttingDown: false",
-		alive:        true,
-		prober:       func() bool { return false },
-		isAggressive: true,
-		wantStatus:   http.StatusServiceUnavailable,
-	}, {
-		name:         "aggressive: false, alive: true, prober: false, shuttingDown: false",
-		alive:        true,
-		prober:       func() bool { return false },
-		isAggressive: false,
-		// The current behaviour is that if aggressive is false (ie probePeriod > 0),
-		// we actually don't probe once the first probe succeeds.
-		wantStatus: http.StatusOK,
-		wantBody:   queue.Name,
+		name:       "alive: true, prober: false, shuttingDown: false",
+		alive:      true,
+		prober:     func() bool { return false },
+		wantStatus: http.StatusServiceUnavailable,
 	}}
 
 	for _, test := range tests {
@@ -149,7 +128,7 @@ func TestHealthStateHealthHandler(t *testing.T) {
 			state.shuttingDown = test.shuttingDown
 
 			rr := httptest.NewRecorder()
-			state.HandleHealthProbe(test.prober, test.isAggressive, rr)
+			state.HandleHealthProbe(test.prober, rr)
 
 			if got, want := rr.Code, test.wantStatus; got != want {
 				t.Errorf("handler returned wrong status code: got %v want %v", got, want)

diff --git a/test/conformance/runtime/readiness_probe_test.go b/test/conformance/runtime/readiness_probe_test.go
@@ -148,96 +148,58 @@ func TestProbeRuntime(t *testing.T) {
 }
 
 // This test validates the behaviour of readiness probes *after* initial
-// startup. Note: the current behaviour is not ideal: when periodSeconds > 0 we
-// ignore readiness probes after startup. Also, when a pod goes unready after
-// startup and there are no other pods in the revision we hang, potentially
-// forever, which may not be what a user wants. The goal of this test is
-// largely to describe the current behaviour, so that we can confidently change it.
-// See:
-//  - https://github.com/knative/serving/issues/10764
-//  - https://github.com/knative/serving/issues/10765
+// startup. When a pod goes unready after startup and there are no other pods
+// in the revision we hang, potentially forever, which may or may not be what a
+// user wants.
+// The goal of this test is largely to describe the current behaviour, so that
+// we can confidently change it.
+// See https://github.com/knative/serving/issues/10765.
 func TestProbeRuntimeAfterStartup(t *testing.T) {
 	t.Parallel()
 
-	t.Run("with periodSeconds = 0 (QP optimisation enabled)", func(t *testing.T) {
-		t.Parallel()
-		clients := test.Setup(t)
-		names := test.ResourceNames{Service: test.ObjectNameForTest(t), Image: test.Readiness}
-		test.EnsureTearDown(t, clients, &names)
+	for _, period := range []int32{0, 1} {
+		period := period
+		t.Run(fmt.Sprintf("periodSeconds=%d", period), func(t *testing.T) {
+			t.Parallel()
+			clients := test.Setup(t)
+			names := test.ResourceNames{Service: test.ObjectNameForTest(t), Image: test.Readiness}
+			test.EnsureTearDown(t, clients, &names)
+
+			url, client := waitReadyThenStartFailing(t, clients, names, period)
+			if err := wait.PollImmediate(1*time.Second, readinessPropagationTime, func() (bool, error) {
+				startFailing, err := http.NewRequest(http.MethodGet, url.String(), nil)
+				if err != nil {
+					return false, err
+				}
 
-		// When periodSeconds = 0 we expect that once readiness propagates
-		// there will be no ready pods, and therefore the request will time
-		// out.
-		// (see https://github.com/knative/serving/issues/10765).
-		url, client := waitReadyThenStartFailing(t, clients, names, 0)
-		if err := wait.PollImmediate(1*time.Second, readinessPropagationTime, func() (bool, error) {
-			startFailing, err := http.NewRequest(http.MethodGet, url.String(), nil)
-			if err != nil {
-				return false, err
-			}
+				// 5 Seconds is enough to be confident the request is timing out.
+				client.Client.Timeout = 5 * time.Second
 
-			// 5 Seconds is enough to be confident the request is timing out.
-			client.Client.Timeout = 5 * time.Second
+				resp, err := client.Do(startFailing, func(err error) (bool, error) {
+					if isTimeout(err) {
+						// We're actually expecting a timeout here, so don't retry on timeouts.
+						return false, nil
+					}
 
-			resp, err := client.Do(startFailing, func(err error) (bool, error) {
+					return spoof.DefaultErrorRetryChecker(err)
+				})
 				if isTimeout(err) {
-					// We're actually expecting a timeout here, so don't retry on timeouts.
+					// We expect to eventually time out, so this is the success case.
+					return true, nil
+				} else if err != nil {
+					// Other errors are not expected.
+					return false, err
+				} else if resp.StatusCode == http.StatusOK {
+					// We'll continue to get 200s for a while until readiness propagates.
 					return false, nil
 				}
 
-				return spoof.DefaultErrorRetryChecker(err)
-			})
-			if isTimeout(err) {
-				// We expect to eventually time out, so this is the success case.
-				return true, nil
-			} else if err != nil {
-				// Other errors are not expected.
-				return false, err
-			} else if resp.StatusCode == http.StatusOK {
-				// We'll continue to get 200s for a while until readiness propagates.
-				return false, nil
-			}
-
-			return false, errors.New("Received non-200 status code (expected to eventually time out)")
-		}); err != nil {
-			t.Fatal("Expected to eventually see request timeout due to all pods becoming unready, but got:", err)
-		}
-	})
-
-	t.Run("with periodSeconds > 0 (no QP optimisation)", func(t *testing.T) {
-		t.Parallel()
-		clients := test.Setup(t)
-		names := test.ResourceNames{Service: test.ObjectNameForTest(t), Image: test.Readiness}
-		test.EnsureTearDown(t, clients, &names)
-
-		// When periodSeconds > 0 we ignore readinessProbes after startup, so
-		// this request will continue to work, even though all pods are failing
-		// readiness.
-		// (see https://github.com/knative/serving/issues/10764).
-		url, _ := waitReadyThenStartFailing(t, clients, names, 1)
-		if err := wait.PollImmediate(1*time.Second, readinessPropagationTime, func() (bool, error) {
-			if _, err := pkgtest.CheckEndpointState(
-				context.Background(),
-				clients.KubeClient,
-				t.Logf,
-				url,
-				v1test.RetryingRouteInconsistency(spoof.MatchesAllOf(spoof.IsStatusOK, spoof.MatchesBody(test.HelloWorldText))),
-				"readinessIsReady",
-				test.ServingFlags.ResolvableDomain,
-				test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS),
-			); err != nil {
-				return false, err
+				return false, errors.New("Received non-200 status code (expected to eventually time out)")
+			}); err != nil {
+				t.Fatal("Expected to eventually see request timeout due to all pods becoming unready, but got:", err)
 			}
-
-			// Keep retrying for readinessPropagationTime to allow
-			// readiness to propagate, otherwise we could get a false
-			// positive since ingresses and activator may not have noticed
-			// the readiness change yet.
-			return false, nil
-		}); !errors.Is(err, wait.ErrWaitTimeout) {
-			t.Fatalf("Expected endpoint for Route %s at %s to consistently return success (even though readinessProbe is failing), but got: %v", names.Route, url, err)
-		}
-	})
+		})
+	}
 }
 
 // waitReadyThenStartFailing creates a service, waits for it to pass readiness,