diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle.go b/pkg/apis/serving/v1alpha1/revision_lifecycle.go index 4e8102262510..784aa37df8ca 100644 --- a/pkg/apis/serving/v1alpha1/revision_lifecycle.go +++ b/pkg/apis/serving/v1alpha1/revision_lifecycle.go @@ -21,12 +21,12 @@ import ( "strconv" "time" - "knative.dev/pkg/apis" - duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" net "github.com/knative/serving/pkg/apis/networking" "github.com/knative/serving/pkg/apis/serving" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "knative.dev/pkg/apis" + duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" ) const ( @@ -164,8 +164,11 @@ func (rs *RevisionStatus) MarkContainerHealthy() { revCondSet.Manage(rs).MarkTrue(RevisionConditionContainerHealthy) } -func (rs *RevisionStatus) MarkContainerExiting(exitCode int32, message string) { +func (rs *RevisionStatus) MarkContainerExiting(exitCode int32, reason, message string) { exitCodeString := fmt.Sprintf("ExitCode%d", exitCode) + if reason != "" { + exitCodeString = fmt.Sprintf("%s/%s", exitCodeString, reason) + } revCondSet.Manage(rs).MarkFalse(RevisionConditionContainerHealthy, exitCodeString, RevisionContainerExitingMessage(message)) } diff --git a/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go b/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go index 46a1fdaaebe1..920019befd4e 100644 --- a/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go +++ b/pkg/apis/serving/v1alpha1/revision_lifecycle_test.go @@ -21,15 +21,15 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" - "knative.dev/pkg/apis" - "knative.dev/pkg/apis/duck" - duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" - apitest "knative.dev/pkg/apis/testing" net "github.com/knative/serving/pkg/apis/networking" "github.com/knative/serving/pkg/apis/serving" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" + "knative.dev/pkg/apis" + "knative.dev/pkg/apis/duck" + duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1" + apitest "knative.dev/pkg/apis/testing" ) func TestRevisionDuckTypes(t *testing.T) { @@ -361,6 +361,29 @@ func TestRevisionNotOwnedStuff(t *testing.T) { } } +func TestRevisionContainerExiting(t *testing.T) { + r := &RevisionStatus{} + r.InitializeConditions() + apitest.CheckConditionOngoing(r.duck(), RevisionConditionResourcesAvailable, t) + apitest.CheckConditionOngoing(r.duck(), RevisionConditionContainerHealthy, t) + apitest.CheckConditionOngoing(r.duck(), RevisionConditionReady, t) + + const ( + exitCode = 137 + reason, message = "OOMKilled", "Out of memory" + wantReason, wantMessage = "ExitCode137/OOMKilled", "Container failed with: Out of memory" + ) + r.MarkContainerExiting(exitCode, reason, message) + apitest.CheckConditionFailed(r.duck(), RevisionConditionContainerHealthy, t) + apitest.CheckConditionFailed(r.duck(), RevisionConditionReady, t) + if got := r.GetCondition(RevisionConditionContainerHealthy); got == nil || got.Reason != wantReason { + t.Errorf("RevisionConditionResourcesAvailable = %v, want %v", got.Reason, wantReason) + } + if got := r.GetCondition(RevisionConditionContainerHealthy); got == nil || got.Message != wantMessage { + t.Errorf("RevisionConditionResourcesAvailable = %v, want %v", got.Message, wantMessage) + } +} + func TestRevisionResourcesUnavailable(t *testing.T) { r := &RevisionStatus{} r.InitializeConditions() diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go index 88cf96881556..5710945044d5 100644 --- a/pkg/reconciler/revision/reconcile_resources.go +++ b/pkg/reconciler/revision/reconcile_resources.go @@ -20,8 +20,6 @@ import ( "context" "fmt" - "knative.dev/pkg/logging" - "knative.dev/pkg/logging/logkey" kpav1alpha1 "github.com/knative/serving/pkg/apis/autoscaling/v1alpha1" "github.com/knative/serving/pkg/apis/serving/v1alpha1" "github.com/knative/serving/pkg/reconciler/revision/resources" @@ -32,12 +30,20 @@ import ( "k8s.io/apimachinery/pkg/api/equality" apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/logging" + "knative.dev/pkg/logging/logkey" ) +// userFacing is the key used to represent whether the logs should be surfaced +// to user +// TODO: Move this to knative/pkg/logging/logkey +const userFacing = "userfacing" + func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1alpha1.Revision) error { ns := rev.Namespace deploymentName := resourcenames.Deployment(rev) logger := logging.FromContext(ctx).With(zap.String(logkey.Deployment, deploymentName)) + userFacingLogger := logger.With(zap.Bool(userFacing, true)) deployment, err := c.deploymentLister.Deployments(ns).Get(deploymentName) if apierrs.IsNotFound(err) { @@ -66,32 +72,52 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1alpha1.Revi } // If a container keeps crashing (no active pods in the deployment although we want some) - if *deployment.Spec.Replicas > 0 && deployment.Status.AvailableReplicas == 0 { + if *deployment.Spec.Replicas > deployment.Status.AvailableReplicas { pods, err := c.KubeClientSet.CoreV1().Pods(ns).List(metav1.ListOptions{LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector)}) if err != nil { logger.Errorf("Error getting pods: %v", err) } else if len(pods.Items) > 0 { - // Arbitrarily grab the very first pod, as they all should be crashing - pod := pods.Items[0] - - // Update the revision status if pod cannot be scheduled(possibly resource constraints) - // If pod cannot be scheduled then we expect the container status to be empty. - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { - rev.Status.MarkResourcesUnavailable(cond.Reason, cond.Message) - break + // Should change revision status if all pods are crashing + shouldMarkRev := deployment.Status.AvailableReplicas == 0 + for _, pod := range pods.Items { + // Update the revision status if pod cannot be scheduled(possibly resource constraints) + // If pod cannot be scheduled then we expect the container status to be empty. + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { + userFacingLogger.Warnf("Pod %s of revision %s cannot be scheduled: %s/%s", pod.Name, rev.Name, cond.Reason, cond.Message) + if shouldMarkRev { + logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, cond.Reason, cond.Message) + rev.Status.MarkResourcesUnavailable(cond.Reason, cond.Message) + } + break + } } - } - for _, status := range pod.Status.ContainerStatuses { - if status.Name == rev.Spec.GetContainer().Name { - if t := status.LastTerminationState.Terminated; t != nil { - logger.Infof("%s marking exiting with: %d/%s", rev.Name, t.ExitCode, t.Message) - rev.Status.MarkContainerExiting(t.ExitCode, t.Message) - } else if w := status.State.Waiting; w != nil && hasDeploymentTimedOut(deployment) { - logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, w.Reason, w.Message) - rev.Status.MarkResourcesUnavailable(w.Reason, w.Message) + for _, status := range pod.Status.ContainerStatuses { + // This is based on the fact that rev.Spec.GetContainer() returns + // the user container. + if status.Name == rev.Spec.GetContainer().Name { + if t := status.LastTerminationState.Terminated; t != nil { + userFacingLogger.Warnf("Container %s in pod %s of revision %s is in terminated status: %s/%s", + status.Name, pod.Name, rev.Name, t.Reason, t.Message) + if shouldMarkRev { + logger.Infof("%s marking exiting with: %d/%s: %s", rev.Name, t.ExitCode, t.Reason, t.Message) + rev.Status.MarkContainerExiting(t.ExitCode, t.Reason, t.Message) + } + } else if w := status.State.Waiting; w != nil && hasDeploymentTimedOut(deployment) { + userFacingLogger.Warnf("Container %s in pod %s of revision %s is timeout in waiting status: %s/%s", + status.Name, pod.Name, rev.Name, w.Reason, w.Message) + if shouldMarkRev { + logger.Infof("%s marking resources unavailable with: %s: %s", rev.Name, w.Reason, w.Message) + rev.Status.MarkResourcesUnavailable(w.Reason, w.Message) + } + } + break } + } + + if shouldMarkRev { + // Arbitrarily check the very first pod, as they all should be crashing break } } diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index ba64ecd90a4a..83c2ee57ac3b 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -21,10 +21,6 @@ import ( "testing" caching "github.com/knative/caching/pkg/apis/caching/v1alpha1" - "knative.dev/pkg/configmap" - "knative.dev/pkg/controller" - "knative.dev/pkg/logging" - logtesting "knative.dev/pkg/logging/testing" autoscalingv1alpha1 "github.com/knative/serving/pkg/apis/autoscaling/v1alpha1" "github.com/knative/serving/pkg/apis/networking" "github.com/knative/serving/pkg/apis/serving/v1alpha1" @@ -40,11 +36,15 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" clientgotesting "k8s.io/client-go/testing" + "knative.dev/pkg/configmap" + "knative.dev/pkg/controller" + "knative.dev/pkg/logging" + logtesting "knative.dev/pkg/logging/testing" - . "knative.dev/pkg/reconciler/testing" . "github.com/knative/serving/pkg/reconciler/testing/v1alpha1" . "github.com/knative/serving/pkg/testing" . "github.com/knative/serving/pkg/testing/v1alpha1" + . "knative.dev/pkg/reconciler/testing" ) // This is heavily based on the way the OpenShift Ingress controller tests its reconciliation method. @@ -437,13 +437,13 @@ func TestReconcile(t *testing.T) { rev("foo", "pod-error", withK8sServiceName("a-pod-error"), WithLogURL, AllUnknownConditions, MarkActive), kpa("foo", "pod-error"), // PA can't be ready, since no traffic. - pod("foo", "pod-error", WithFailingContainer("user-container", 5, "I failed man!")), + pod("foo", "pod-error", WithFailingContainer("user-container", 5, "Failed", "I failed man!")), deploy("foo", "pod-error"), image("foo", "pod-error"), }, WantStatusUpdates: []clientgotesting.UpdateActionImpl{{ Object: rev("foo", "pod-error", - WithLogURL, AllUnknownConditions, MarkContainerExiting(5, "I failed man!")), + WithLogURL, AllUnknownConditions, MarkContainerExiting(5, "Failed", "I failed man!")), }}, Key: "foo/pod-error", }, { diff --git a/pkg/testing/functional.go b/pkg/testing/functional.go index 2cb78d295812..e717f1451ba2 100644 --- a/pkg/testing/functional.go +++ b/pkg/testing/functional.go @@ -246,13 +246,14 @@ type PodOption func(*corev1.Pod) // WithFailingContainer sets the .Status.ContainerStatuses on the pod to // include a container named accordingly to fail with the given state. -func WithFailingContainer(name string, exitCode int, message string) PodOption { +func WithFailingContainer(name string, exitCode int, reason, message string) PodOption { return func(pod *corev1.Pod) { pod.Status.ContainerStatuses = []corev1.ContainerStatus{{ Name: name, LastTerminationState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: int32(exitCode), + Reason: reason, Message: message, }, }, diff --git a/pkg/testing/v1alpha1/revision.go b/pkg/testing/v1alpha1/revision.go index b0580f221c7a..902b31415c02 100644 --- a/pkg/testing/v1alpha1/revision.go +++ b/pkg/testing/v1alpha1/revision.go @@ -133,9 +133,9 @@ func MarkContainerMissing(rev *v1alpha1.Revision) { } // MarkContainerExiting calls .Status.MarkContainerExiting on the Revision. -func MarkContainerExiting(exitCode int32, message string) RevisionOption { +func MarkContainerExiting(exitCode int32, reason, message string) RevisionOption { return func(r *v1alpha1.Revision) { - r.Status.MarkContainerExiting(exitCode, message) + r.Status.MarkContainerExiting(exitCode, reason, message) } }