knative · yanweiguo · Jun 28, 2019 · Jun 28, 2019 · Jun 28, 2019 · Jun 29, 2019
diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle.go b/pkg/apis/serving/v1alpha1/revision_lifecycle.go
@@ -21,12 +21,12 @@ import (
 	"strconv"
 	"time"
 
-	"knative.dev/pkg/apis"
-	duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
 	net "github.com/knative/serving/pkg/apis/networking"
 	"github.com/knative/serving/pkg/apis/serving"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"knative.dev/pkg/apis"
+	duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
 )
 
 const (
@@ -164,8 +164,11 @@ func (rs *RevisionStatus) MarkContainerHealthy() {
 	revCondSet.Manage(rs).MarkTrue(RevisionConditionContainerHealthy)
 }
 
-func (rs *RevisionStatus) MarkContainerExiting(exitCode int32, message string) {
+func (rs *RevisionStatus) MarkContainerExiting(exitCode int32, reason, message string) {
 	exitCodeString := fmt.Sprintf("ExitCode%d", exitCode)
+	if reason != "" {
+		exitCodeString = fmt.Sprintf("%s/%s", exitCodeString, reason)
+	}
 	revCondSet.Manage(rs).MarkFalse(RevisionConditionContainerHealthy, exitCodeString, RevisionContainerExitingMessage(message))
 }
 

diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go b/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go
@@ -21,15 +21,15 @@ import (
 
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
-	"knative.dev/pkg/apis"
-	"knative.dev/pkg/apis/duck"
-	duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
-	apitest "knative.dev/pkg/apis/testing"
 	net "github.com/knative/serving/pkg/apis/networking"
 	"github.com/knative/serving/pkg/apis/serving"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime/schema"
+	"knative.dev/pkg/apis"
+	"knative.dev/pkg/apis/duck"
+	duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
+	apitest "knative.dev/pkg/apis/testing"
 )
 
 func TestRevisionDuckTypes(t *testing.T) {
@@ -361,6 +361,29 @@ func TestRevisionNotOwnedStuff(t *testing.T) {
 	}
 }
 
+func TestRevisionContainerExiting(t *testing.T) {
+	r := &RevisionStatus{}
+	r.InitializeConditions()
+	apitest.CheckConditionOngoing(r.duck(), RevisionConditionResourcesAvailable, t)
+	apitest.CheckConditionOngoing(r.duck(), RevisionConditionContainerHealthy, t)
+	apitest.CheckConditionOngoing(r.duck(), RevisionConditionReady, t)
+
+	const (
+		exitCode                = 137
+		reason, message         = "OOMKilled", "Out of memory"
+		wantReason, wantMessage = "ExitCode137/OOMKilled", "Container failed with: Out of memory"
+	)
+	r.MarkContainerExiting(exitCode, reason, message)
+	apitest.CheckConditionFailed(r.duck(), RevisionConditionContainerHealthy, t)
+	apitest.CheckConditionFailed(r.duck(), RevisionConditionReady, t)
+	if got := r.GetCondition(RevisionConditionContainerHealthy); got == nil || got.Reason != wantReason {
+		t.Errorf("RevisionConditionResourcesAvailable = %v, want %v", got.Reason, wantReason)
+	}
+	if got := r.GetCondition(RevisionConditionContainerHealthy); got == nil || got.Message != wantMessage {
+		t.Errorf("RevisionConditionResourcesAvailable = %v, want %v", got.Message, wantMessage)
+	}
+}
+
 func TestRevisionResourcesUnavailable(t *testing.T) {
 	r := &RevisionStatus{}
 	r.InitializeConditions()

diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go
@@ -20,8 +20,6 @@ import (
 	"context"
 	"fmt"
 
-	"knative.dev/pkg/logging"
-	"knative.dev/pkg/logging/logkey"
 	kpav1alpha1 "github.com/knative/serving/pkg/apis/autoscaling/v1alpha1"
 	"github.com/knative/serving/pkg/apis/serving/v1alpha1"
 	"github.com/knative/serving/pkg/reconciler/revision/resources"
@@ -32,12 +30,20 @@ import (
 	"k8s.io/apimachinery/pkg/api/equality"
 	apierrs "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"knative.dev/pkg/logging"
+	"knative.dev/pkg/logging/logkey"
 )
 
+// userFacing is the key used to represent whether the logs should be surfaced
+// to user
+// TODO: Move this to knative/pkg/logging/logkey
+const userFacing = "userfacing"
+
 func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1alpha1.Revision) error {
 	ns := rev.Namespace
 	deploymentName := resourcenames.Deployment(rev)
 	logger := logging.FromContext(ctx).With(zap.String(logkey.Deployment, deploymentName))
+	userFacingLogger := logger.With(zap.Bool(userFacing, true))
 
 	deployment, err := c.deploymentLister.Deployments(ns).Get(deploymentName)
 	if apierrs.IsNotFound(err) {
@@ -66,32 +72,52 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1alpha1.Revi
 	}
 
 	// If a container keeps crashing (no active pods in the deployment although we want some)
-	if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 {
+	if *deployment.Spec.Replicas > deployment.Status.AvailableReplicas {
 		pods, err := c.KubeClientSet.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)})
 		if err != nil {
 			logger.Errorf("Error getting pods: %v", err)
 		} else if len(pods.Items) > 0 {
-			// Arbitrarily grab the very first pod, as they all should be crashing
-			pod := pods.Items[0]
-
-			// Update the revision status if pod cannot be scheduled(possibly resource constraints)
-			// If pod cannot be scheduled then we expect the container status to be empty.
-			for _, cond := range pod.Status.Conditions {
-				if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse {
-					rev.Status.MarkResourcesUnavailable(cond.Reason, cond.Message)
-					break
+			// Should change revision status if all pods are crashing
+			shouldMarkRev := deployment.Status.AvailableReplicas == 0
+			for _, pod := range pods.Items {
+				// Update the revision status if pod cannot be scheduled(possibly resource constraints)
-				// Update the revision status if pod cannot be scheduled(possibly resource constraints)
+				// Update the revision status if pod cannot be scheduled (possibly resource constraints)
-				// Update the revision status if pod cannot be scheduled(possibly resource constraints)
+				// Update the revision status if pod cannot be scheduled (possibly resource constraints)
+				// If pod cannot be scheduled then we expect the container status to be empty.
+				for _, cond := range pod.Status.Conditions {
+					if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse {
+						userFacingLogger.Warnf("Pod %s of revision %s cannot be scheduled: %s/%s", pod.Name, rev.Name, cond.Reason, cond.Message)
+						if shouldMarkRev {
+							logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, cond.Reason, cond.Message)
+							rev.Status.MarkResourcesUnavailable(cond.Reason, cond.Message)
+						}
+						break
+					}
 				}
-			}
 
-			for _, status := range pod.Status.ContainerStatuses {
-				if status.Name == rev.Spec.GetContainer().Name {
-					if t := status.LastTerminationState.Terminated; t != nil {
-						logger.Infof("%s marking exiting with: %d/%s", rev.Name, t.ExitCode, t.Message)
-						rev.Status.MarkContainerExiting(t.ExitCode, t.Message)
-					} else if w := status.State.Waiting; w != nil && hasDeploymentTimedOut(deployment) {
-						logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, w.Reason, w.Message)
-						rev.Status.MarkResourcesUnavailable(w.Reason, w.Message)
+				for _, status := range pod.Status.ContainerStatuses {
+					// This is based on the fact that rev.Spec.GetContainer() returns
+					// the user container.
+					if status.Name == rev.Spec.GetContainer().Name {
+						if t := status.LastTerminationState.Terminated; t != nil {
+							userFacingLogger.Warnf("Container %s in pod %s of revision %s is in terminated status: %s/%s",
+								status.Name, pod.Name, rev.Name, t.Reason, t.Message)
+							if shouldMarkRev {
+								logger.Infof("%s marking exiting with: %d/%s: %s", rev.Name, t.ExitCode, t.Reason, t.Message)
+								rev.Status.MarkContainerExiting(t.ExitCode, t.Reason, t.Message)
+							}
+						} else if w := status.State.Waiting; w != nil && hasDeploymentTimedOut(deployment) {
+							userFacingLogger.Warnf("Container %s in pod %s of revision %s is timeout in waiting status: %s/%s",
+								status.Name, pod.Name, rev.Name, w.Reason, w.Message)
+							if shouldMarkRev {
+								logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, w.Reason, w.Message)
+								rev.Status.MarkResourcesUnavailable(w.Reason, w.Message)
+							}
+						}
+						break
 					}
+				}
+
+				if shouldMarkRev {
+					// Arbitrarily check the very first pod, as they all should be crashing
 					break
 				}
 			}

diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go
@@ -21,10 +21,6 @@ import (
 	"testing"
 
 	caching "github.com/knative/caching/pkg/apis/caching/v1alpha1"
-	"knative.dev/pkg/configmap"
-	"knative.dev/pkg/controller"
-	"knative.dev/pkg/logging"
-	logtesting "knative.dev/pkg/logging/testing"
 	autoscalingv1alpha1 "github.com/knative/serving/pkg/apis/autoscaling/v1alpha1"
 	"github.com/knative/serving/pkg/apis/networking"
 	"github.com/knative/serving/pkg/apis/serving/v1alpha1"
@@ -40,11 +36,15 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/runtime"
 	clientgotesting "k8s.io/client-go/testing"
+	"knative.dev/pkg/configmap"
+	"knative.dev/pkg/controller"
+	"knative.dev/pkg/logging"
+	logtesting "knative.dev/pkg/logging/testing"
 
-	. "knative.dev/pkg/reconciler/testing"
 	. "github.com/knative/serving/pkg/reconciler/testing/v1alpha1"
 	. "github.com/knative/serving/pkg/testing"
 	. "github.com/knative/serving/pkg/testing/v1alpha1"
+	. "knative.dev/pkg/reconciler/testing"
 )
 
 // This is heavily based on the way the OpenShift Ingress controller tests its reconciliation method.
@@ -437,13 +437,13 @@ func TestReconcile(t *testing.T) {
 			rev("foo", "pod-error",
 				withK8sServiceName("a-pod-error"), WithLogURL, AllUnknownConditions, MarkActive),
 			kpa("foo", "pod-error"), // PA can't be ready, since no traffic.
-			pod("foo", "pod-error", WithFailingContainer("user-container", 5, "I failed man!")),
+			pod("foo", "pod-error", WithFailingContainer("user-container", 5, "Failed", "I failed man!")),
 			deploy("foo", "pod-error"),
 			image("foo", "pod-error"),
 		},
 		WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
 			Object: rev("foo", "pod-error",
-				WithLogURL, AllUnknownConditions, MarkContainerExiting(5, "I failed man!")),
+				WithLogURL, AllUnknownConditions, MarkContainerExiting(5, "Failed", "I failed man!")),
 		}},
 		Key: "foo/pod-error",
 	}, {

diff --git a/pkg/testing/functional.go b/pkg/testing/functional.go
@@ -246,13 +246,14 @@ type PodOption func(*corev1.Pod)
 
 // WithFailingContainer sets the .Status.ContainerStatuses on the pod to
 // include a container named accordingly to fail with the given state.
-func WithFailingContainer(name string, exitCode int, message string) PodOption {
+func WithFailingContainer(name string, exitCode int, reason, message string) PodOption {
 	return func(pod *corev1.Pod) {
 		pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
 			Name: name,
 			LastTerminationState: corev1.ContainerState{
 				Terminated: &corev1.ContainerStateTerminated{
 					ExitCode: int32(exitCode),
+					Reason:   reason,
 					Message:  message,
 				},
 			},

diff --git a/pkg/testing/v1alpha1/revision.go b/pkg/testing/v1alpha1/revision.go
@@ -133,9 +133,9 @@ func MarkContainerMissing(rev *v1alpha1.Revision) {
 }
 
 // MarkContainerExiting calls .Status.MarkContainerExiting on the Revision.
-func MarkContainerExiting(exitCode int32, message string) RevisionOption {
+func MarkContainerExiting(exitCode int32, reason, message string) RevisionOption {
 	return func(r *v1alpha1.Revision) {
-		r.Status.MarkContainerExiting(exitCode, message)
+		r.Status.MarkContainerExiting(exitCode, reason, message)
 	}
 }