From 93c59d407ec63a53159b0cfdef41772d7e314c1f Mon Sep 17 00:00:00 2001 From: Matt Moore Date: Mon, 13 Jul 2020 15:21:52 -0700 Subject: [PATCH 1/2] Assorted fixes to enable chaos duck. This lands a handful of fixes that I uncovered preparing to run controlplane chaos testing during our e2e tests. --- test/e2e-common.sh | 6 ++--- test/e2e-tests.sh | 41 +++++++++++++++-------------------- test/ha/activator_test.go | 4 +++- test/ha/autoscalerhpa_test.go | 3 ++- test/ha/controller_test.go | 3 ++- 5 files changed, 28 insertions(+), 29 deletions(-) diff --git a/test/e2e-common.sh b/test/e2e-common.sh index ab5a6e30cb1a..c7ca252a92c7 100644 --- a/test/e2e-common.sh +++ b/test/e2e-common.sh @@ -512,6 +512,9 @@ function test_setup() { # Clean up kail so it doesn't interfere with job shutting down add_trap "kill $kail_pid || true" EXIT + echo ">> Waiting for Serving components to be running..." + wait_until_pods_running ${SYSTEM_NAMESPACE} || return 1 + local TEST_CONFIG_DIR=${TEST_DIR}/config echo ">> Creating test resources (${TEST_CONFIG_DIR}/)" ko apply ${KO_FLAGS} -f ${TEST_CONFIG_DIR}/ || return 1 @@ -525,9 +528,6 @@ function test_setup() { echo ">> Uploading test images..." ${REPO_ROOT_DIR}/test/upload-test-images.sh || return 1 - echo ">> Waiting for Serving components to be running..." - wait_until_pods_running ${SYSTEM_NAMESPACE} || return 1 - echo ">> Waiting for Cert Manager components to be running..." wait_until_pods_running cert-manager || return 1 diff --git a/test/e2e-tests.sh b/test/e2e-tests.sh index 6fa2b9c1c408..a0fe31398163 100755 --- a/test/e2e-tests.sh +++ b/test/e2e-tests.sh @@ -49,24 +49,19 @@ function wait_for_leader_controller() { return 1 } -function enable_tag_header_based_routing() { - echo -n "Enabling Tag Header Based Routing" - kubectl patch cm config-network -n "${SYSTEM_NAMESPACE}" -p '{"data":{"tagHeaderBasedRouting":"Enabled"}}' +function toggle_feature() { + local FEATURE="$1" + local STATE="$2" + local CONFIG="${3:-config-features}" + echo -n "Setting feature ${FEATURE} to ${STATE}" + kubectl patch cm "${CONFIG}" -n "${SYSTEM_NAMESPACE}" -p '{"data":{"'${FEATURE}'":"'${STATE}'"}}' + # We don't have a good mechanism for positive handoff so sleep :( + echo "Waiting 30s for change to get picked up." + sleep 30 } -function disable_tag_header_based_routing() { - echo -n "Disabling Tag Header Based Routing" - kubectl patch cm config-network -n "${SYSTEM_NAMESPACE}" -p '{"data":{"tagHeaderBasedRouting":"Disabled"}}' -} - -function enable_multi_container_feature() { - echo -n "Enabling Multi Container Feature Flag" - kubectl patch cm config-features -n "${SYSTEM_NAMESPACE}" -p '{"data":{"multi-container":"Enabled"}}' -} - -function disable_multi_container_feature() { - echo -n "Disabling Multi Container Feature Flag" - kubectl patch cm config-features -n "${SYSTEM_NAMESPACE}" -p '{"data":{"multi-container":"Disabled"}}' +function toggle_network_feature() { + toggle_feature "$1" "$2" config-network } # Script entry point. @@ -104,7 +99,7 @@ fi # Keep this in sync with test/ha/ha.go -readonly REPLICAS=2 +readonly REPLICAS=3 readonly BUCKETS=10 @@ -172,15 +167,15 @@ if (( HTTPS )); then turn_off_auto_tls fi -enable_tag_header_based_routing -add_trap "disable_tag_header_based_routing" SIGKILL SIGTERM SIGQUIT +toggle_network_feature tagHeaderBasedRouting Enabled +add_trap "toggle_network_feature tagHeaderBasedRouting Disabled" SIGKILL SIGTERM SIGQUIT go_test_e2e -timeout=2m ./test/e2e/tagheader || failed=1 -disable_tag_header_based_routing +toggle_network_feature tagHeaderBasedRouting Disabled -enable_multi_container_feature -add_trap "disable_multi_container_feature" SIGKILL SIGTERM SIGQUIT +toggle_feature multi-container Enabled +add_trap "toggle_feature multi-container Disabled" SIGKILL SIGTERM SIGQUIT go_test_e2e -timeout=2m ./test/e2e/multicontainer || failed=1 -disable_multi_container_feature +toggle_feature multi-container Disabled # Certificate conformance tests must be run separately # because they need cert-manager specific configurations. diff --git a/test/ha/activator_test.go b/test/ha/activator_test.go index 90c9b477fbe5..4ecc17297e7f 100644 --- a/test/ha/activator_test.go +++ b/test/ha/activator_test.go @@ -23,6 +23,7 @@ import ( "testing" "time" + "knative.dev/networking/pkg/apis/networking" "knative.dev/pkg/ptr" "knative.dev/pkg/system" "knative.dev/pkg/test/logstream" @@ -125,7 +126,8 @@ func testActivatorHA(t *testing.T, gracePeriod *int64, slo float64) { if err := pkgTest.WaitForPodDeleted(clients.KubeClient, activator.Name, system.Namespace()); err != nil { t.Fatalf("Did not observe %s to actually be deleted: %v", activator.Name, err) } - if err := pkgTest.WaitForServiceEndpoints(clients.KubeClient, resourcesScaleToZero.Revision.Name, test.ServingNamespace, test.ServingFlags.Replicas); err != nil { + // Check for the endpoint to appear in the activator's endpoints, since this revision may pick a subset of those endpoints. + if err := pkgTest.WaitForServiceEndpoints(clients.KubeClient, networking.ActivatorServiceName, system.Namespace(), test.ServingFlags.Replicas); err != nil { t.Fatalf("Deployment %s failed to scale up: %v", activatorDeploymentName, err) } if gracePeriod != nil && *gracePeriod == 0 { diff --git a/test/ha/autoscalerhpa_test.go b/test/ha/autoscalerhpa_test.go index 346093ec1b0c..8e912d0701c6 100644 --- a/test/ha/autoscalerhpa_test.go +++ b/test/ha/autoscalerhpa_test.go @@ -21,6 +21,7 @@ package ha import ( "testing" + apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" @@ -64,7 +65,7 @@ func TestAutoscalerHPAHANewRevision(t *testing.T) { for _, leader := range leaders.List() { if err := clients.KubeClient.Kube.CoreV1().Pods(system.Namespace()).Delete(leader, - &metav1.DeleteOptions{}); err != nil { + &metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) { t.Fatalf("Failed to delete pod %s: %v", leader, err) } diff --git a/test/ha/controller_test.go b/test/ha/controller_test.go index b54dab1ef6b7..2d52ad2dbc76 100644 --- a/test/ha/controller_test.go +++ b/test/ha/controller_test.go @@ -21,6 +21,7 @@ package ha import ( "testing" + apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" @@ -57,7 +58,7 @@ func TestControllerHA(t *testing.T) { for _, leader := range leaders.List() { if err := clients.KubeClient.Kube.CoreV1().Pods(system.Namespace()).Delete(leader, - &metav1.DeleteOptions{}); err != nil { + &metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) { t.Fatalf("Failed to delete pod %s: %v", leader, err) } From 3823b0f5ee8cb6f9d32a2c60048ca454be329bfb Mon Sep 17 00:00:00 2001 From: Matt Moore Date: Mon, 13 Jul 2020 15:41:38 -0700 Subject: [PATCH 2/2] Drop sleep to 10s --- test/e2e-tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e-tests.sh b/test/e2e-tests.sh index a0fe31398163..8d1835f227d3 100755 --- a/test/e2e-tests.sh +++ b/test/e2e-tests.sh @@ -56,8 +56,8 @@ function toggle_feature() { echo -n "Setting feature ${FEATURE} to ${STATE}" kubectl patch cm "${CONFIG}" -n "${SYSTEM_NAMESPACE}" -p '{"data":{"'${FEATURE}'":"'${STATE}'"}}' # We don't have a good mechanism for positive handoff so sleep :( - echo "Waiting 30s for change to get picked up." - sleep 30 + echo "Waiting 10s for change to get picked up." + sleep 10 } function toggle_network_feature() {