diff --git a/config/core/configmaps/deployment.yaml b/config/core/configmaps/deployment.yaml index 5813cb87fd8b..026760ef1104 100644 --- a/config/core/configmaps/deployment.yaml +++ b/config/core/configmaps/deployment.yaml @@ -22,7 +22,7 @@ metadata: app.kubernetes.io/component: controller app.kubernetes.io/version: devel annotations: - knative.dev/example-checksum: "720ddb97" + knative.dev/example-checksum: "b99000ec" data: # This is the Go import path for the binary that is containerized # and substituted here. @@ -123,3 +123,12 @@ data: # selector: # use-gvisor: "please" runtime-class-name: "" + + # pod-is-always-schedulable can be used to define that Pods in the system will always be + # scheduled, and a Revision should not be marked unschedulable. + # Setting this to `true` makes sense if you have cluster-autoscaling set up for your cluster + # where unschedulable Pods trigger the addition of a new Node and are therefore a short and + # transient state. + # + # See https://github.com/knative/serving/issues/14862 + pod-is-always-schedulable: "false" diff --git a/pkg/deployment/config.go b/pkg/deployment/config.go index f8f16586547b..990fcdeff00c 100644 --- a/pkg/deployment/config.go +++ b/pkg/deployment/config.go @@ -78,6 +78,9 @@ const ( defaultAffinityTypeValue = PreferSpreadRevisionOverNodes RuntimeClassNameKey = "runtime-class-name" + + // pod-is-always-schedulable + podIsAlwaysSchedulableKey = "pod-is-always-schedulable" ) var ( @@ -200,6 +203,8 @@ func NewConfigFromMap(configMap map[string]string) (*Config, error) { cm.AsString(queueSidecarRooCAKey, &nc.QueueSidecarRootCA), cm.AsString(RuntimeClassNameKey, &runtimeClassNames), + + cm.AsBool(podIsAlwaysSchedulableKey, &nc.PodIsAlwaysSchedulable), ); err != nil { return nil, err } @@ -309,4 +314,7 @@ type Config struct { // RuntimeClassNames specifies which runtime the Pod will use RuntimeClassNames map[string]RuntimeClassNameLabelSelector + + // PodIsAlwaysSchedulable specifies whether pods are considered to be always schedulable + PodIsAlwaysSchedulable bool } diff --git a/pkg/deployment/config_test.go b/pkg/deployment/config_test.go index 16f3d2ac6f55..c44a8782290e 100644 --- a/pkg/deployment/config_test.go +++ b/pkg/deployment/config_test.go @@ -456,6 +456,22 @@ kata: return string(b) }(), }, + }, { + name: "controller configuration with always schedulable pods", + wantConfig: &Config{ + PodIsAlwaysSchedulable: true, + RegistriesSkippingTagResolving: sets.New("kind.local", "ko.local", "dev.local"), + DigestResolutionTimeout: digestResolutionTimeoutDefault, + QueueSidecarImage: defaultSidecarImage, + QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, + QueueSidecarTokenAudiences: sets.New(""), + ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, + }, + data: map[string]string{ + podIsAlwaysSchedulableKey: "true", + QueueSidecarImageKey: defaultSidecarImage, + }, }} for _, tt := range configTests { diff --git a/pkg/reconciler/revision/config/store.go b/pkg/reconciler/revision/config/store.go index 2f5efab67430..6f9827769a53 100644 --- a/pkg/reconciler/revision/config/store.go +++ b/pkg/reconciler/revision/config/store.go @@ -41,7 +41,11 @@ type Config struct { // FromContext loads the configuration from the context. func FromContext(ctx context.Context) *Config { - return ctx.Value(cfgKey{}).(*Config) + x, ok := ctx.Value(cfgKey{}).(*Config) + if ok { + return x + } + return nil } // ToContext persists the configuration to the context. diff --git a/pkg/reconciler/revision/reconcile_resources.go b/pkg/reconciler/revision/reconcile_resources.go index 9bfcde661d5e..91962a3d253e 100644 --- a/pkg/reconciler/revision/reconcile_resources.go +++ b/pkg/reconciler/revision/reconcile_resources.go @@ -102,10 +102,12 @@ func (c *Reconciler) reconcileDeployment(ctx context.Context, rev *v1.Revision) // Update the revision status if pod cannot be scheduled (possibly resource constraints) // If pod cannot be scheduled then we expect the container status to be empty. - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { - rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) - break + if !config.FromContext(ctx).Deployment.PodIsAlwaysSchedulable { + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodScheduled && cond.Status == corev1.ConditionFalse { + rev.Status.MarkResourcesAvailableFalse(cond.Reason, cond.Message) + break + } } } diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index 0cf3205a17ae..6f1b5b011d5e 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -631,6 +631,31 @@ func TestReconcile(t *testing.T) { Object: pa("foo", "pod-schedule-error", WithReachabilityUnreachable), }}, Key: "foo/pod-schedule-error", + }, { + Name: "surface no pod schedule errors if pod-is-always-schedulable is true", + // Test the propagation of the scheduling errors of Pod into the + // revision is not happening when treat-pod-as-always-schedulable + // is enabled. + Objects: []runtime.Object{ + Revision("foo", "pod-no-schedule-error", + WithLogURL, + MarkActivating("Deploying", ""), + WithRoutingState(v1.RoutingStateActive, fc), + withDefaultContainerStatuses(), + MarkDeploying("Deploying"), + WithRevisionObservedGeneration(1), + MarkContainerHealthyUnknown("Deploying"), + ), + pa("foo", "pod-no-schedule-error", WithReachabilityReachable), // PA can't be ready, since no traffic. + pod(t, "foo", "pod-no-schedule-error", WithUnschedulableContainer("Insufficient energy", "Unschedulable")), + deploy(t, "foo", "pod-no-schedule-error"), + image("foo", "pod-no-schedule-error"), + }, + + Ctx: config.ToContext(context.Background(), reconcilerTestConfig(func(c *config.Config) { + c.Deployment.PodIsAlwaysSchedulable = true + })), + Key: "foo/pod-no-schedule-error", }, { Name: "ready steady state", // Test the transition that Reconcile makes when Endpoints become ready on the @@ -893,11 +918,16 @@ func TestReconcile(t *testing.T) { resolver: &nopResolver{}, } + cfg := config.FromContext(ctx) + if cfg == nil { + cfg = reconcilerTestConfig() + } + return revisionreconciler.NewReconciler(ctx, logging.FromContext(ctx), servingclient.Get(ctx), listers.GetRevisionLister(), controller.GetEventRecorder(ctx), r, controller.Options{ ConfigStore: &testConfigStore{ - config: reconcilerTestConfig(), + config: cfg, }, }) })) @@ -1113,8 +1143,8 @@ func (t *testConfigStore) ToContext(ctx context.Context) context.Context { var _ pkgreconciler.ConfigStore = (*testConfigStore)(nil) -func reconcilerTestConfig() *config.Config { - return &config.Config{ +func reconcilerTestConfig(mutateFuncs ...func(*config.Config)) *config.Config { + cfg := &config.Config{ Config: &defaultconfig.Config{ Defaults: &defaultconfig.Defaults{}, Autoscaler: &autoscalerconfig.Config{ @@ -1129,4 +1159,9 @@ func reconcilerTestConfig() *config.Config { Logging: &logging.Config{}, Network: &netcfg.Config{}, } + + for _, f := range mutateFuncs { + f(cfg) + } + return cfg }