Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 11 additions & 35 deletions test/e2e/autoscale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ const (
)

var (
initialScaleToZeroThreshold string
initialScaleToZeroGracePeriod string
scaleToZeroThreshold time.Duration
)

func isDeploymentScaledUp() func(d *v1beta1.Deployment) (bool, error) {
Expand Down Expand Up @@ -118,45 +117,22 @@ func generateTraffic(clients *test.Clients, logger *logging.BaseLogger, concurre
return nil
}

func getAutoscalerConfigMap(clients *test.Clients) (*v1.ConfigMap, error) {
return test.GetConfigMap(clients.KubeClient).Get("config-autoscaler", metav1.GetOptions{})
}

func setScaleToZeroThreshold(clients *test.Clients, threshold string, gracePeriod string) error {
configMap, err := getAutoscalerConfigMap(clients)
if err != nil {
return err
}
configMap.Data["scale-to-zero-threshold"] = threshold
configMap.Data["scale-to-zero-grace-period"] = gracePeriod
_, err = test.GetConfigMap(clients.KubeClient).Update(configMap)
return err
}

func setup(t *testing.T, logger *logging.BaseLogger) *test.Clients {
clients := Setup(t)

configMap, err := getAutoscalerConfigMap(clients)
configMap, err := test.GetConfigMap(clients.KubeClient).Get("config-autoscaler", metav1.GetOptions{})
if err != nil {
logger.Infof("Unable to retrieve the autoscale configMap. Assuming a ScaleToZero value of '5m'. %v", err)
initialScaleToZeroThreshold = "5m"
initialScaleToZeroGracePeriod = "2m"
} else {
initialScaleToZeroThreshold = configMap.Data["scale-to-zero-threshold"]
initialScaleToZeroGracePeriod = configMap.Data["scale-to-zero-grace-period"]
t.Fatalf("Unable to get autoscaler config map: %v", err)
}

err = setScaleToZeroThreshold(clients, "1m", "30s")
scaleToZeroThreshold, err = time.ParseDuration(configMap.Data["scale-to-zero-threshold"])
if err != nil {
t.Fatalf(`Unable to set ScaleToZeroThreshold to '1m'. This will
cause the test to time out. Failing fast instead. %v`, err)
t.Fatalf("Unable to parse scale-to-zero-threshold as duration: %v", err)
}

return clients
}

func tearDown(clients *test.Clients, names test.ResourceNames, logger *logging.BaseLogger) {
setScaleToZeroThreshold(clients, initialScaleToZeroThreshold, initialScaleToZeroGracePeriod)
TearDown(clients, names, logger)
}

Expand Down Expand Up @@ -232,7 +208,8 @@ func TestAutoscaleUpDownUp(t *testing.T) {
clients.KubeClient,
deploymentName,
isDeploymentScaledUp(),
"DeploymentIsScaledUp")
"DeploymentIsScaledUp",
2*time.Minute)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is reducing the time we wait for initial scaleup from the old timeout of 6 minutes down to 2 minutes. Do we know from previous test runs that this always happens within 2 minutes, so that we're not introducing another potential flake?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty certain that if scale up doesn't happen within 2 minutes, it never will. I saw this while developing these changes.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I poked through several previous test runs and the scaling up tens to happen in less than one second. I didn't see any cases where it even approached 2 minutes, so this seems fine.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, what makes it okay is that we aren't starting the time until we have 200 responses from all the requests we sent. So processing time can't affect this. It's just purely metrics pipeline and autoscaler latency.

if err != nil {
logger.Fatalf(`Unable to observe the Deployment named %s scaling
up. %s`, deploymentName, err)
Expand All @@ -241,15 +218,13 @@ func TestAutoscaleUpDownUp(t *testing.T) {
logger.Infof(`The autoscaler successfully scales down when devoid of
traffic.`)

logger.Infof(`Manually setting ScaleToZeroThreshold to '1m' to facilitate
faster testing.`)

logger.Infof("Waiting for scale to zero")
err = test.WaitForDeploymentState(
clients.KubeClient,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The log message about "Manually setting ScaleToZeroThreshold" a few lines above this is no longer relevant.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

deploymentName,
isDeploymentScaledToZero(),
"DeploymentScaledToZero")
"DeploymentScaledToZero",
scaleToZeroThreshold+2*time.Minute)
if err != nil {
logger.Fatalf(`Unable to observe the Deployment named %s scaling
down. %s`, deploymentName, err)
Expand Down Expand Up @@ -283,7 +258,8 @@ func TestAutoscaleUpDownUp(t *testing.T) {
clients.KubeClient,
deploymentName,
isDeploymentScaledUp(),
"DeploymentScaledUp")
"DeploymentScaledUp",
2*time.Minute)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment with regards to the 2 minute timeout as the previous scaleup block. Lower is better, as long as the tests don't flake with the lower value.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, same deal. All the requests have succeeded. Now we're just giving the autoscaler time to respond. The metrics pipeline has a 60 second window, so 2 minutes is definitely enough to scale up.

if err != nil {
logger.Fatalf(`Unable to observe the Deployment named %s scaling
up. %s`, deploymentName, err)
Expand Down
3 changes: 2 additions & 1 deletion test/kube_checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package test
import (
"context"
"fmt"
"time"

pkgTest "github.com/knative/pkg/test"
"go.opencensus.io/trace"
Expand All @@ -35,7 +36,7 @@ import (
// from client every interval until inState returns `true` indicating it
// is done, returns an error or timeout. desc will be used to name the metric
// that is emitted to track how long it took for name to get into the state checked by inState.
func WaitForDeploymentState(client *pkgTest.KubeClient, name string, inState func(d *apiv1beta1.Deployment) (bool, error), desc string) error {
func WaitForDeploymentState(client *pkgTest.KubeClient, name string, inState func(d *apiv1beta1.Deployment) (bool, error), desc string, timeout time.Duration) error {
d := client.Kube.ExtensionsV1beta1().Deployments(ServingNamespace)
metricName := fmt.Sprintf("WaitForDeploymentState/%s/%s", name, desc)
_, span := trace.StartSpan(context.Background(), metricName)
Expand Down