From 595a552c0a9ba818efcedac99432a6f3e5fa5952 Mon Sep 17 00:00:00 2001 From: lvjing2 Date: Tue, 4 Jun 2019 09:30:27 +0800 Subject: [PATCH 1/3] check activating timeout to scale zeor --- pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go | 11 ++++++++ pkg/autoscaler/config.go | 5 ++++ pkg/reconciler/autoscaling/kpa/kpa.go | 14 ++++++++-- pkg/reconciler/autoscaling/kpa/scaler.go | 19 ++++++++----- .../knative/pkg/apis/condition_set.go | 27 +++++++++++++++++++ 5 files changed, 67 insertions(+), 9 deletions(-) diff --git a/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go b/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go index 6606ac54058d..48e2e606ca96 100644 --- a/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go +++ b/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go @@ -165,6 +165,11 @@ func (pas *PodAutoscalerStatus) MarkInactive(reason, message string) { podCondSet.Manage(pas.duck()).MarkFalse(PodAutoscalerConditionActive, reason, message) } +// MarkActivatingTime marks the PA as activating timeout. +func (pas *PodAutoscalerStatus) MarkActivatingTimeout(reason, message string) { + podCondSet.Manage(pas.duck()).MarkTimeout(PodAutoscalerConditionActive, reason, message) +} + // MarkResourceNotOwned changes the "Active" condition to false to reflect that the // resource of the given kind and name has already been created, and we do not own it. func (pas *PodAutoscalerStatus) MarkResourceNotOwned(kind, name string) { @@ -191,6 +196,12 @@ func (pas *PodAutoscalerStatus) CanMarkInactive(idlePeriod time.Duration) bool { return pas.inStatusFor(corev1.ConditionTrue, idlePeriod) } +// IsActivatingTimeout checks whether the pod autoscaler has been in activating +// for at least the specified timeout period. +func (pas *PodAutoscalerStatus) IsActivatingTimeout(activatingTimeout time.Duration) bool { + return pas.inStatusFor(corev1.ConditionUnknown, activatingTimeout) +} + // inStatusFor returns true if the PodAutoscalerStatus's Active condition has stayed in // the specified status for at least the specified duration. Otherwise it returns false, // including when the status is undetermined (Active condition is not found.) diff --git a/pkg/autoscaler/config.go b/pkg/autoscaler/config.go index 88e68a2725f7..6e23c291b910 100644 --- a/pkg/autoscaler/config.go +++ b/pkg/autoscaler/config.go @@ -51,6 +51,7 @@ type Config struct { TickInterval time.Duration ScaleToZeroGracePeriod time.Duration + ActivatingTimeout time.Duration } // TargetConcurrency calculates the target concurrency for a given container-concurrency @@ -142,6 +143,10 @@ func NewConfigFromMap(data map[string]string) (*Config, error) { key: "tick-interval", field: &lc.TickInterval, defaultValue: 2 * time.Second, + }, { + key: "activating-timeout", + field: &lc.ActivatingTimeout, + defaultValue: 2 * time.Minute, }} { if raw, ok := data[dur.key]; !ok { *dur.field = dur.defaultValue diff --git a/pkg/reconciler/autoscaling/kpa/kpa.go b/pkg/reconciler/autoscaling/kpa/kpa.go index 2a04e5675670..eb3b2914ed1a 100644 --- a/pkg/reconciler/autoscaling/kpa/kpa.go +++ b/pkg/reconciler/autoscaling/kpa/kpa.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "strconv" + "time" "github.com/knative/pkg/controller" "github.com/knative/pkg/logging" @@ -214,9 +215,15 @@ func (c *Reconciler) reconcile(ctx context.Context, pa *pav1alpha1.PodAutoscaler return perrors.Wrap(err, "error reporting metrics") } + timeout := config.FromContext(ctx).Autoscaler.ActivatingTimeout + if pa.Status.IsActivating() { + logger.Infof("enqueue delay for check activating timeout after %v.", timeout) + c.scaler.enqueueCB(pa, timeout) + } + // computeActiveCondition decides if we need to change the SKS mode, // and returns true if the status has changed. - if changed := computeActiveCondition(pa, want, got); changed { + if changed := computeActiveCondition(pa, want, got, timeout); changed { _, err := c.reconcileSKS(ctx, pa) if err != nil { return perrors.Wrap(err, "error re-reconciling SKS") @@ -374,7 +381,7 @@ func reportMetrics(pa *pav1alpha1.PodAutoscaler, want int32, got int) error { // computeActiveCondition updates the status of PA, depending on scales desired and present. // computeActiveCondition returns true if it thinks SKS needs an update. -func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int) (ret bool) { +func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int, timeout time.Duration) (ret bool) { minReady := activeThreshold(pa) switch { @@ -386,6 +393,9 @@ func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int) ( ret = pa.Status.IsInactive() // If we were inactive and became activating. pa.Status.MarkActivating( "Queued", "Requests to the target are being buffered as resources are provisioned.") + if pa.Status.IsActivatingTimeout(timeout) { + pa.Status.MarkActivatingTimeout("Timeout", "The target is timeout for activating.") + } case got >= minReady: // SKS should already be active. diff --git a/pkg/reconciler/autoscaling/kpa/scaler.go b/pkg/reconciler/autoscaling/kpa/scaler.go index 6c35ba2e0f41..2ac6b2add59a 100644 --- a/pkg/reconciler/autoscaling/kpa/scaler.go +++ b/pkg/reconciler/autoscaling/kpa/scaler.go @@ -163,10 +163,15 @@ func scaleResourceArgs(pa *pav1alpha1.PodAutoscaler) (*schema.GroupVersionResour } func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale int32, config *autoscaler.Config) (int32, bool) { - if desiredScale != 0 { + if desiredScale > 0 { return desiredScale, true } + if desiredScale < 0 && !pa.Status.IsActivating() { + ks.logger.Info("Metrics are not yet being collected.") + return desiredScale, false + } + // We should only scale to zero when three of the following conditions are true: // a) enable-scale-to-zero from configmap is true // b) The PA has been active for at least the stable window, after which it gets marked inactive @@ -179,6 +184,11 @@ func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale i if pa.Status.IsActivating() { // Active=Unknown // Don't scale-to-zero during activation if min, _ := pa.ScaleBounds(); min == 0 { + if pa.Status.IsActivatingTimeout(config.ActivatingTimeout) { + ks.logger.Infof("activating timeout after %v", config.ActivatingTimeout) + return 0, true + } + ks.logger.Info("activating, don't scale to zero") return scaleUnknown, false } } else if pa.Status.IsReady() { // Active=True @@ -188,7 +198,7 @@ func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale i if pa.Status.CanMarkInactive(config.StableWindow) { // We do not need to enqueue PA here, since this will // make SKS reconcile and when it's done, PA will be reconciled again. - return desiredScale, false + return 0, false } // Otherwise, scale down to 1 until the idle period elapses and re-enqueue // the PA for reconciliation at that time. @@ -254,11 +264,6 @@ func (ks *scaler) applyScale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, func (ks *scaler) Scale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, desiredScale int32) (int32, error) { logger := logging.FromContext(ctx) - if desiredScale < 0 { - logger.Debug("Metrics are not yet being collected.") - return desiredScale, nil - } - desiredScale, shouldApplyScale := ks.handleScaleToZero(pa, desiredScale, config.FromContext(ctx).Autoscaler) if !shouldApplyScale { return desiredScale, nil diff --git a/vendor/github.com/knative/pkg/apis/condition_set.go b/vendor/github.com/knative/pkg/apis/condition_set.go index d4c70098e1fe..d09cc37e0ece 100644 --- a/vendor/github.com/knative/pkg/apis/condition_set.go +++ b/vendor/github.com/knative/pkg/apis/condition_set.go @@ -27,6 +27,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + ConditionTimeout corev1.ConditionStatus = "Timeout" +) + // Conditions is the interface for a Resource that implements the getter and // setter for accessing a Condition collection. // +k8s:deepcopy-gen=true @@ -71,6 +75,9 @@ type ConditionManager interface { // MarkFalse sets the status of t and the happy condition to False. MarkFalse(t ConditionType, reason, messageFormat string, messageA ...interface{}) + // MarkTimeout sets the status of t and the happy condition to Timeout. + MarkTimeout(t ConditionType, reason, messageFormat string, messageA ...interface{}) + // InitializeConditions updates all Conditions in the ConditionSet to Unknown // if not set. InitializeConditions() @@ -296,6 +303,26 @@ func (r conditionsImpl) MarkFalse(t ConditionType, reason, messageFormat string, } } +// MarkTimeout sets the status of t and the happy condition to timeout. +func (r conditionsImpl) MarkTimeout(t ConditionType, reason, messageFormat string, messageA ...interface{}) { + types := []ConditionType{t} + for _, cond := range r.dependents { + if cond == t { + types = append(types, r.happy) + } + } + + for _, t := range types { + r.SetCondition(Condition{ + Type: t, + Status: ConditionTimeout, + Reason: reason, + Message: fmt.Sprintf(messageFormat, messageA...), + Severity: r.severity(t), + }) + } +} + // InitializeConditions updates all Conditions in the ConditionSet to Unknown // if not set. func (r conditionsImpl) InitializeConditions() { From a7f2118770bf5eadb85cacecf67da502a6f0c2dd Mon Sep 17 00:00:00 2001 From: lvjing2 Date: Tue, 4 Jun 2019 09:32:00 +0800 Subject: [PATCH 2/3] not mark activating timeout --- pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go | 5 ---- pkg/reconciler/autoscaling/kpa/kpa.go | 3 --- .../knative/pkg/apis/condition_set.go | 27 ------------------- 3 files changed, 35 deletions(-) diff --git a/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go b/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go index 48e2e606ca96..3ce6d98ad75e 100644 --- a/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go +++ b/pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go @@ -165,11 +165,6 @@ func (pas *PodAutoscalerStatus) MarkInactive(reason, message string) { podCondSet.Manage(pas.duck()).MarkFalse(PodAutoscalerConditionActive, reason, message) } -// MarkActivatingTime marks the PA as activating timeout. -func (pas *PodAutoscalerStatus) MarkActivatingTimeout(reason, message string) { - podCondSet.Manage(pas.duck()).MarkTimeout(PodAutoscalerConditionActive, reason, message) -} - // MarkResourceNotOwned changes the "Active" condition to false to reflect that the // resource of the given kind and name has already been created, and we do not own it. func (pas *PodAutoscalerStatus) MarkResourceNotOwned(kind, name string) { diff --git a/pkg/reconciler/autoscaling/kpa/kpa.go b/pkg/reconciler/autoscaling/kpa/kpa.go index eb3b2914ed1a..bcb30a32abdc 100644 --- a/pkg/reconciler/autoscaling/kpa/kpa.go +++ b/pkg/reconciler/autoscaling/kpa/kpa.go @@ -393,9 +393,6 @@ func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int, t ret = pa.Status.IsInactive() // If we were inactive and became activating. pa.Status.MarkActivating( "Queued", "Requests to the target are being buffered as resources are provisioned.") - if pa.Status.IsActivatingTimeout(timeout) { - pa.Status.MarkActivatingTimeout("Timeout", "The target is timeout for activating.") - } case got >= minReady: // SKS should already be active. diff --git a/vendor/github.com/knative/pkg/apis/condition_set.go b/vendor/github.com/knative/pkg/apis/condition_set.go index d09cc37e0ece..d4c70098e1fe 100644 --- a/vendor/github.com/knative/pkg/apis/condition_set.go +++ b/vendor/github.com/knative/pkg/apis/condition_set.go @@ -27,10 +27,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -const ( - ConditionTimeout corev1.ConditionStatus = "Timeout" -) - // Conditions is the interface for a Resource that implements the getter and // setter for accessing a Condition collection. // +k8s:deepcopy-gen=true @@ -75,9 +71,6 @@ type ConditionManager interface { // MarkFalse sets the status of t and the happy condition to False. MarkFalse(t ConditionType, reason, messageFormat string, messageA ...interface{}) - // MarkTimeout sets the status of t and the happy condition to Timeout. - MarkTimeout(t ConditionType, reason, messageFormat string, messageA ...interface{}) - // InitializeConditions updates all Conditions in the ConditionSet to Unknown // if not set. InitializeConditions() @@ -303,26 +296,6 @@ func (r conditionsImpl) MarkFalse(t ConditionType, reason, messageFormat string, } } -// MarkTimeout sets the status of t and the happy condition to timeout. -func (r conditionsImpl) MarkTimeout(t ConditionType, reason, messageFormat string, messageA ...interface{}) { - types := []ConditionType{t} - for _, cond := range r.dependents { - if cond == t { - types = append(types, r.happy) - } - } - - for _, t := range types { - r.SetCondition(Condition{ - Type: t, - Status: ConditionTimeout, - Reason: reason, - Message: fmt.Sprintf(messageFormat, messageA...), - Severity: r.severity(t), - }) - } -} - // InitializeConditions updates all Conditions in the ConditionSet to Unknown // if not set. func (r conditionsImpl) InitializeConditions() { From faa3dd583c02a25589afbbd448f0c407432b2e86 Mon Sep 17 00:00:00 2001 From: lvjing2 Date: Tue, 4 Jun 2019 12:56:44 +0800 Subject: [PATCH 3/3] check activating timeout for all --- pkg/autoscaler/config_test.go | 12 +++++++++++ pkg/reconciler/autoscaling/kpa/kpa.go | 7 +++--- pkg/reconciler/autoscaling/kpa/scaler.go | 27 ++++++++++++------------ 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pkg/autoscaler/config_test.go b/pkg/autoscaler/config_test.go index b6765df2892c..98df9b2300da 100644 --- a/pkg/autoscaler/config_test.go +++ b/pkg/autoscaler/config_test.go @@ -79,6 +79,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, want: &Config{ EnableScaleToZero: true, @@ -91,6 +92,7 @@ func TestNewConfig(t *testing.T) { TickInterval: 2 * time.Second, PanicWindowPercentage: 10.0, PanicThresholdPercentage: 200.0, + ActivatingTimeout: 2 * time.Minute, }, }, { name: "with toggles on", @@ -104,6 +106,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, want: &Config{ EnableScaleToZero: true, @@ -116,6 +119,7 @@ func TestNewConfig(t *testing.T) { TickInterval: 2 * time.Second, PanicWindowPercentage: 10.0, PanicThresholdPercentage: 200.0, + ActivatingTimeout: 2 * time.Minute, }, }, { name: "with toggles on strange casing", @@ -129,6 +133,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, want: &Config{ EnableScaleToZero: true, @@ -141,6 +146,7 @@ func TestNewConfig(t *testing.T) { TickInterval: 2 * time.Second, PanicWindowPercentage: 10.0, PanicThresholdPercentage: 200.0, + ActivatingTimeout: 2 * time.Minute, }, }, { name: "with toggles explicitly off", @@ -154,6 +160,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, want: &Config{ ContainerConcurrencyTargetPercentage: 0.5, @@ -165,6 +172,7 @@ func TestNewConfig(t *testing.T) { TickInterval: 2 * time.Second, PanicWindowPercentage: 10.0, PanicThresholdPercentage: 200.0, + ActivatingTimeout: 2 * time.Minute, }, }, { name: "with explicit grace period", @@ -179,6 +187,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, want: &Config{ ContainerConcurrencyTargetPercentage: 0.5, @@ -190,6 +199,7 @@ func TestNewConfig(t *testing.T) { TickInterval: 2 * time.Second, PanicWindowPercentage: 10.0, PanicThresholdPercentage: 200.0, + ActivatingTimeout: 2 * time.Minute, }, }, { name: "malformed float", @@ -202,6 +212,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, wantErr: true, }, { @@ -215,6 +226,7 @@ func TestNewConfig(t *testing.T) { "tick-interval": "2s", "panic-window-percentage": "10", "panic-threshold-percentage": "200", + "activating-timeout": "2m", }, wantErr: true, }} diff --git a/pkg/reconciler/autoscaling/kpa/kpa.go b/pkg/reconciler/autoscaling/kpa/kpa.go index bcb30a32abdc..47a97127682b 100644 --- a/pkg/reconciler/autoscaling/kpa/kpa.go +++ b/pkg/reconciler/autoscaling/kpa/kpa.go @@ -21,7 +21,6 @@ import ( "fmt" "reflect" "strconv" - "time" "github.com/knative/pkg/controller" "github.com/knative/pkg/logging" @@ -215,15 +214,15 @@ func (c *Reconciler) reconcile(ctx context.Context, pa *pav1alpha1.PodAutoscaler return perrors.Wrap(err, "error reporting metrics") } - timeout := config.FromContext(ctx).Autoscaler.ActivatingTimeout if pa.Status.IsActivating() { + timeout := config.FromContext(ctx).Autoscaler.ActivatingTimeout logger.Infof("enqueue delay for check activating timeout after %v.", timeout) c.scaler.enqueueCB(pa, timeout) } // computeActiveCondition decides if we need to change the SKS mode, // and returns true if the status has changed. - if changed := computeActiveCondition(pa, want, got, timeout); changed { + if changed := computeActiveCondition(pa, want, got); changed { _, err := c.reconcileSKS(ctx, pa) if err != nil { return perrors.Wrap(err, "error re-reconciling SKS") @@ -381,7 +380,7 @@ func reportMetrics(pa *pav1alpha1.PodAutoscaler, want int32, got int) error { // computeActiveCondition updates the status of PA, depending on scales desired and present. // computeActiveCondition returns true if it thinks SKS needs an update. -func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int, timeout time.Duration) (ret bool) { +func computeActiveCondition(pa *pav1alpha1.PodAutoscaler, want int32, got int) (ret bool) { minReady := activeThreshold(pa) switch { diff --git a/pkg/reconciler/autoscaling/kpa/scaler.go b/pkg/reconciler/autoscaling/kpa/scaler.go index 2ac6b2add59a..c00c3848ddf9 100644 --- a/pkg/reconciler/autoscaling/kpa/scaler.go +++ b/pkg/reconciler/autoscaling/kpa/scaler.go @@ -163,15 +163,10 @@ func scaleResourceArgs(pa *pav1alpha1.PodAutoscaler) (*schema.GroupVersionResour } func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale int32, config *autoscaler.Config) (int32, bool) { - if desiredScale > 0 { + if desiredScale != 0 { return desiredScale, true } - if desiredScale < 0 && !pa.Status.IsActivating() { - ks.logger.Info("Metrics are not yet being collected.") - return desiredScale, false - } - // We should only scale to zero when three of the following conditions are true: // a) enable-scale-to-zero from configmap is true // b) The PA has been active for at least the stable window, after which it gets marked inactive @@ -184,11 +179,6 @@ func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale i if pa.Status.IsActivating() { // Active=Unknown // Don't scale-to-zero during activation if min, _ := pa.ScaleBounds(); min == 0 { - if pa.Status.IsActivatingTimeout(config.ActivatingTimeout) { - ks.logger.Infof("activating timeout after %v", config.ActivatingTimeout) - return 0, true - } - ks.logger.Info("activating, don't scale to zero") return scaleUnknown, false } } else if pa.Status.IsReady() { // Active=True @@ -198,7 +188,7 @@ func (ks *scaler) handleScaleToZero(pa *pav1alpha1.PodAutoscaler, desiredScale i if pa.Status.CanMarkInactive(config.StableWindow) { // We do not need to enqueue PA here, since this will // make SKS reconcile and when it's done, PA will be reconciled again. - return 0, false + return desiredScale, false } // Otherwise, scale down to 1 until the idle period elapses and re-enqueue // the PA for reconciliation at that time. @@ -263,8 +253,19 @@ func (ks *scaler) applyScale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, // Scale attempts to scale the given PA's target reference to the desired scale. func (ks *scaler) Scale(ctx context.Context, pa *pav1alpha1.PodAutoscaler, desiredScale int32) (int32, error) { logger := logging.FromContext(ctx) + autoscalerConfig := config.FromContext(ctx).Autoscaler + + if desiredScale < 0 { + // check is activating timeout + if pa.Status.IsActivatingTimeout(autoscalerConfig.ActivatingTimeout) { + logger.Infof("Activating pa timeout after %v", autoscalerConfig.ActivatingTimeout) + } else { + logger.Debug("Metrics are not yet being collected.") + return desiredScale, nil + } + } - desiredScale, shouldApplyScale := ks.handleScaleToZero(pa, desiredScale, config.FromContext(ctx).Autoscaler) + desiredScale, shouldApplyScale := ks.handleScaleToZero(pa, desiredScale, autoscalerConfig) if !shouldApplyScale { return desiredScale, nil }