-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Do not modify config-map in autoscale test. #2144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,8 +40,7 @@ const ( | |
| ) | ||
|
|
||
| var ( | ||
| initialScaleToZeroThreshold string | ||
| initialScaleToZeroGracePeriod string | ||
| scaleToZeroThreshold time.Duration | ||
| ) | ||
|
|
||
| func isDeploymentScaledUp() func(d *v1beta1.Deployment) (bool, error) { | ||
|
|
@@ -118,45 +117,22 @@ func generateTraffic(clients *test.Clients, logger *logging.BaseLogger, concurre | |
| return nil | ||
| } | ||
|
|
||
| func getAutoscalerConfigMap(clients *test.Clients) (*v1.ConfigMap, error) { | ||
| return test.GetConfigMap(clients.KubeClient).Get("config-autoscaler", metav1.GetOptions{}) | ||
| } | ||
|
|
||
| func setScaleToZeroThreshold(clients *test.Clients, threshold string, gracePeriod string) error { | ||
| configMap, err := getAutoscalerConfigMap(clients) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| configMap.Data["scale-to-zero-threshold"] = threshold | ||
| configMap.Data["scale-to-zero-grace-period"] = gracePeriod | ||
| _, err = test.GetConfigMap(clients.KubeClient).Update(configMap) | ||
| return err | ||
| } | ||
|
|
||
| func setup(t *testing.T, logger *logging.BaseLogger) *test.Clients { | ||
| clients := Setup(t) | ||
|
|
||
| configMap, err := getAutoscalerConfigMap(clients) | ||
| configMap, err := test.GetConfigMap(clients.KubeClient).Get("config-autoscaler", metav1.GetOptions{}) | ||
| if err != nil { | ||
| logger.Infof("Unable to retrieve the autoscale configMap. Assuming a ScaleToZero value of '5m'. %v", err) | ||
| initialScaleToZeroThreshold = "5m" | ||
| initialScaleToZeroGracePeriod = "2m" | ||
| } else { | ||
| initialScaleToZeroThreshold = configMap.Data["scale-to-zero-threshold"] | ||
| initialScaleToZeroGracePeriod = configMap.Data["scale-to-zero-grace-period"] | ||
| t.Fatalf("Unable to get autoscaler config map: %v", err) | ||
| } | ||
|
|
||
| err = setScaleToZeroThreshold(clients, "1m", "30s") | ||
| scaleToZeroThreshold, err = time.ParseDuration(configMap.Data["scale-to-zero-threshold"]) | ||
| if err != nil { | ||
| t.Fatalf(`Unable to set ScaleToZeroThreshold to '1m'. This will | ||
| cause the test to time out. Failing fast instead. %v`, err) | ||
| t.Fatalf("Unable to parse scale-to-zero-threshold as duration: %v", err) | ||
| } | ||
|
|
||
| return clients | ||
| } | ||
|
|
||
| func tearDown(clients *test.Clients, names test.ResourceNames, logger *logging.BaseLogger) { | ||
| setScaleToZeroThreshold(clients, initialScaleToZeroThreshold, initialScaleToZeroGracePeriod) | ||
| TearDown(clients, names, logger) | ||
| } | ||
|
|
||
|
|
@@ -232,7 +208,8 @@ func TestAutoscaleUpDownUp(t *testing.T) { | |
| clients.KubeClient, | ||
| deploymentName, | ||
| isDeploymentScaledUp(), | ||
| "DeploymentIsScaledUp") | ||
| "DeploymentIsScaledUp", | ||
| 2*time.Minute) | ||
| if err != nil { | ||
| logger.Fatalf(`Unable to observe the Deployment named %s scaling | ||
| up. %s`, deploymentName, err) | ||
|
|
@@ -241,15 +218,13 @@ func TestAutoscaleUpDownUp(t *testing.T) { | |
| logger.Infof(`The autoscaler successfully scales down when devoid of | ||
| traffic.`) | ||
|
|
||
| logger.Infof(`Manually setting ScaleToZeroThreshold to '1m' to facilitate | ||
| faster testing.`) | ||
|
|
||
| logger.Infof("Waiting for scale to zero") | ||
| err = test.WaitForDeploymentState( | ||
| clients.KubeClient, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The log message about "Manually setting ScaleToZeroThreshold" a few lines above this is no longer relevant.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
| deploymentName, | ||
| isDeploymentScaledToZero(), | ||
| "DeploymentScaledToZero") | ||
| "DeploymentScaledToZero", | ||
| scaleToZeroThreshold+2*time.Minute) | ||
| if err != nil { | ||
| logger.Fatalf(`Unable to observe the Deployment named %s scaling | ||
| down. %s`, deploymentName, err) | ||
|
|
@@ -283,7 +258,8 @@ func TestAutoscaleUpDownUp(t *testing.T) { | |
| clients.KubeClient, | ||
| deploymentName, | ||
| isDeploymentScaledUp(), | ||
| "DeploymentScaledUp") | ||
| "DeploymentScaledUp", | ||
| 2*time.Minute) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment with regards to the 2 minute timeout as the previous scaleup block. Lower is better, as long as the tests don't flake with the lower value.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, same deal. All the requests have succeeded. Now we're just giving the autoscaler time to respond. The metrics pipeline has a 60 second window, so 2 minutes is definitely enough to scale up. |
||
| if err != nil { | ||
| logger.Fatalf(`Unable to observe the Deployment named %s scaling | ||
| up. %s`, deploymentName, err) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is reducing the time we wait for initial scaleup from the old timeout of 6 minutes down to 2 minutes. Do we know from previous test runs that this always happens within 2 minutes, so that we're not introducing another potential flake?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm pretty certain that if scale up doesn't happen within 2 minutes, it never will. I saw this while developing these changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I poked through several previous test runs and the scaling up tens to happen in less than one second. I didn't see any cases where it even approached 2 minutes, so this seems fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, what makes it okay is that we aren't starting the time until we have 200 responses from all the requests we sent. So processing time can't affect this. It's just purely metrics pipeline and autoscaler latency.