diff --git a/leaderelection/chaosduck/main.go b/leaderelection/chaosduck/main.go new file mode 100644 index 0000000000..6a41937fa8 --- /dev/null +++ b/leaderelection/chaosduck/main.go @@ -0,0 +1,125 @@ +/* +Copyright 2020 The Knative Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// The chaosduck binary is an e2e testing tool for leader election, which loads +// the leader election configuration within the system namespace and +// periodically kills one of the leader pods for each HA component. +package main + +import ( + "context" + "errors" + "log" + "strings" + "time" + + "golang.org/x/sync/errgroup" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes" + kubeclient "knative.dev/pkg/client/injection/kube/client" + "knative.dev/pkg/controller" + "knative.dev/pkg/injection" + "knative.dev/pkg/injection/sharedmain" + "knative.dev/pkg/signals" + "knative.dev/pkg/system" + "knative.dev/pkg/test/ha" +) + +// components is a mapping from component name to the collection of leader pod names. +type components map[string]sets.String + +// buildComponents crawls the list of leases and builds a mapping from component names +// to the set pod names that hold one or more leases. +func buildComponents(kc kubernetes.Interface) (components, error) { + leases, err := kc.CoordinationV1().Leases(system.Namespace()).List(metav1.ListOptions{}) + if err != nil { + return nil, err + } + + cs := components{} + for _, lease := range leases.Items { + if lease.Spec.HolderIdentity == nil { + log.Printf("Found lease %q held by nobody!", lease.Name) + continue + } + pod := strings.SplitN(*lease.Spec.HolderIdentity, "_", 2)[0] + deploymentName := ha.ExtractDeployment(pod) + if deploymentName == "" { + continue + } + + set, ok := cs[deploymentName] + if !ok { + set = sets.NewString() + cs[deploymentName] = set + } + set.Insert(pod) + } + return cs, nil +} + +// quack will kill one of the components leader pods. +func quack(ctx context.Context, kc kubernetes.Interface, component string, leaders sets.String) error { + tribute, ok := leaders.PopAny() + if !ok { + return errors.New("this should not be possible, since components are only created when they have components") + } + log.Printf("Quacking at %q leader %q", component, tribute) + + return kc.CoreV1().Pods(system.Namespace()).Delete(tribute, &metav1.DeleteOptions{}) +} + +// frequency is the frequency with which we kill off leaders. +const frequency = 30 * time.Second + +func main() { + ctx := signals.NewContext() + + // We don't expect informers to be set up, but we do expect the client to get attached to ctx. + ctx, informers := injection.Default.SetupInformers(ctx, sharedmain.ParseAndGetConfigOrDie()) + if err := controller.StartInformers(ctx.Done(), informers...); err != nil { + log.Fatalf("Failed to start informers %v", err) + } + kc := kubeclient.Get(ctx) + + // Until we are shutdown, build up an index of components and kill + // of a leader at the specified frequency. + for { + select { + case <-time.After(frequency): + components, err := buildComponents(kc) + if err != nil { + log.Printf("Error building components: %v", err) + } + log.Printf("Got components: %#v", components) + + eg, ctx := errgroup.WithContext(ctx) + for name, leaders := range components { + name, leaders := name, leaders + eg.Go(func() error { + return quack(ctx, kc, name, leaders) + }) + } + if err := eg.Wait(); err != nil { + log.Printf("Ended iteration with err: %v", err) + } + + case <-ctx.Done(): + return + } + } +} diff --git a/test/ha/ha.go b/test/ha/ha.go index 2859cc8728..a88e8f1734 100644 --- a/test/ha/ha.go +++ b/test/ha/ha.go @@ -41,7 +41,7 @@ func countingRFind(wr rune, wc int) func(rune) bool { } } -func extractDeployment(pod string) string { +func ExtractDeployment(pod string) string { if x := strings.LastIndexFunc(pod, countingRFind('-', 2)); x != -1 { return pod[:x] } @@ -63,7 +63,7 @@ func GetLeaders(t *testing.T, client *test.KubeClient, deploymentName, namespace pod := strings.SplitN(*lease.Spec.HolderIdentity, "_", 2)[0] // Deconstruct the pod name and look for the deployment. This won't work for very long deployment names. - if extractDeployment(pod) != deploymentName { + if ExtractDeployment(pod) != deploymentName { continue } ret = append(ret, pod) diff --git a/test/ha/ha_test.go b/test/ha/ha_test.go index 41b6ca9da3..4b460ccde2 100644 --- a/test/ha/ha_test.go +++ b/test/ha/ha_test.go @@ -20,10 +20,10 @@ import "testing" func TestExtractDeployment(t *testing.T) { const want = "gke-cluster-michigan-pool-2" - if got := extractDeployment("gke-cluster-michigan-pool-2-03f384a0-2zu1"); got != want { + if got := ExtractDeployment("gke-cluster-michigan-pool-2-03f384a0-2zu1"); got != want { t.Errorf("Deployment = %q, want: %q", got, want) } - if got := extractDeployment("a-b"); got != "" { + if got := ExtractDeployment("a-b"); got != "" { t.Errorf("Deployment = %q, want empty string", got) } }