From 86d243aaec4e6d94e26cdc0b3e093386db413a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Denis=20Krienb=C3=BChl?= Date: Fri, 12 Sep 2025 16:55:23 +0200 Subject: [PATCH 1/3] Avoid unnecesary LB creation if UUID is missing When the k8s.cloudscale.ch/loadbalancer-uuid annotation was present on a service, but the UUID could not be found in via API, the CCM would start creating LBs in a loop until it was killed. The cause of this was the LBs were looked up. The CCM aassumed that if a UUID could not be found, that the LB must not exist, and tried to create one. This would happen in a loop without any way for the CCM to detect that it should stop. This assumption has now been changed. We now check for the LB both by name and UUID, and expect to find at most one. If the service name and the UUID both match different LBs (as might happen if one manipulates the service annotations manually), the CCM will return an error. Ultimately, the CCM has no state it can call its own and infers state from the APIs it talks to (i.e., Kubernetes and the cloudscale.ch API). So if someone were to change both the UUID and the name of the LB using annotations, the CCM would have no choice but to create a new LB. Manipulating the UUID was at one point thought to be a possible feature: One could bring an existing LB into the managament of the CCM. Though there is no code that would prevent this, the docs no longer mention this option, as this has the potential to cause havoc. Changing the name is already discouraged. It is probably savest to not touch the name, nor the UUID, during the lifetime of a service. --- pkg/cloudscale_ccm/lb_mapper.go | 13 ++++++- pkg/cloudscale_ccm/lb_mapper_test.go | 27 +++++++++++++- pkg/cloudscale_ccm/loadbalancer.go | 9 ++--- pkg/internal/limiter/limiter.go | 55 ++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 8 deletions(-) diff --git a/pkg/cloudscale_ccm/lb_mapper.go b/pkg/cloudscale_ccm/lb_mapper.go index 3d1c1c6..b2a0977 100644 --- a/pkg/cloudscale_ccm/lb_mapper.go +++ b/pkg/cloudscale_ccm/lb_mapper.go @@ -20,8 +20,19 @@ func (l *lbMapper) findByServiceInfo( serviceInfo *serviceInfo, ) *limiter.Limiter[cloudscale.LoadBalancer] { + // If we have a UUID, look for both the service and the UUID. Usually + // we expect to only see one, but it is possible for the UUID to point + // to another LB than the service name, in which case we return both + // so the caller can decide if that is sane or not. if uuid := serviceInfo.annotation(LoadBalancerUUID); uuid != "" { - return l.getByUUID(ctx, uuid) + return limiter.Join( + l.getByUUID(ctx, uuid), + l.findByName(ctx, serviceInfo.annotation(LoadBalancerName)), + ).Unique( + func(a *cloudscale.LoadBalancer, b *cloudscale.LoadBalancer) bool { + return a.UUID == b.UUID + }, + ) } return l.findByName(ctx, serviceInfo.annotation(LoadBalancerName)) diff --git a/pkg/cloudscale_ccm/lb_mapper_test.go b/pkg/cloudscale_ccm/lb_mapper_test.go index 0529493..d839781 100644 --- a/pkg/cloudscale_ccm/lb_mapper_test.go +++ b/pkg/cloudscale_ccm/lb_mapper_test.go @@ -37,6 +37,7 @@ func TestFindLoadBalancer(t *testing.T) { lb, err := lbs.One() assert.NoError(t, err) assert.Equal(t, "foo", lb.Name) + assert.Equal(t, 1, lbs.Count()) // Using an ambiguous name s.Annotations[LoadBalancerName] = "clone" @@ -44,12 +45,34 @@ func TestFindLoadBalancer(t *testing.T) { lbs = mapper.findByServiceInfo(t.Context(), i) _, err = lbs.One() assert.Error(t, err) + assert.Equal(t, 2, lbs.Count()) - // Using a uuid + // Using a unique name and a mismatched uuid + s.Annotations[LoadBalancerName] = "foo" s.Annotations[LoadBalancerUUID] = "85dffa20-8097-4d75-afa6-9e4372047ce6" + lbs = mapper.findByServiceInfo(t.Context(), i) + _, err = lbs.One() + assert.Error(t, err) + assert.Equal(t, 2, lbs.Count()) + + // Using a unique name and a missing uuid + s.Annotations[LoadBalancerName] = "foo" + s.Annotations[LoadBalancerUUID] = "00000000-0000-0000-0000-000000000000" + lbs = mapper.findByServiceInfo(t.Context(), i) lb, err = lbs.One() assert.NoError(t, err) - assert.Equal(t, "clone", lb.Name) + assert.Equal(t, "foo", lb.Name) + assert.Equal(t, 1, lbs.Count()) + + // Using a unique name and a matching uuid + s.Annotations[LoadBalancerName] = "foo" + s.Annotations[LoadBalancerUUID] = "c2e4aabd-8c91-46da-b069-71e01f439806" + + lbs = mapper.findByServiceInfo(t.Context(), i) + lb, err = lbs.One() + assert.NoError(t, err) + assert.Equal(t, "foo", lb.Name) + assert.Equal(t, 1, lbs.Count()) } diff --git a/pkg/cloudscale_ccm/loadbalancer.go b/pkg/cloudscale_ccm/loadbalancer.go index b770e8d..f535f44 100644 --- a/pkg/cloudscale_ccm/loadbalancer.go +++ b/pkg/cloudscale_ccm/loadbalancer.go @@ -21,12 +21,11 @@ import ( // them, unless you know what you are doing. const ( // LoadBalancerUUID uniquely identifes the loadbalancer. This annotation - // should not be provided by the customer, unless the adoption of an - // existing load balancer is desired. + // should not be provided by the customer. // - // In all other cases, this value is set by the CCM after creating the - // load balancer, to ensure that we track it with a proper ID and not - // a name that might change without our knowledge. + // Instead, this value is set by the CCM after creating the load balancer, + // to ensure that we track it with a proper ID and not a name that might + // change without our knowledge. LoadBalancerUUID = "k8s.cloudscale.ch/loadbalancer-uuid" // LoadBalancerConfigVersion is set by the CCM when it first handles a diff --git a/pkg/internal/limiter/limiter.go b/pkg/internal/limiter/limiter.go index 542efe0..90ee2f3 100644 --- a/pkg/internal/limiter/limiter.go +++ b/pkg/internal/limiter/limiter.go @@ -18,6 +18,52 @@ func New[T any](err error, elements ...T) *Limiter[T] { } } +func Join[T any](limiters ...*Limiter[T]) *Limiter[T] { + joined := &Limiter[T]{ + Error: nil, + elements: []T{}, + } + + for _, limiter := range limiters { + + // Keep the first error + if joined.Error != nil && limiter.Error != nil { + joined.Error = limiter.Error + } + + // Append the elements + joined.elements = append(joined.elements, limiter.elements...) + } + + return joined +} + +// Unique excludes duplicate elements, according to a comparison function. +// This is meant for small lists as the complexity is O(n^2). +func (t *Limiter[T]) Unique(eq func(a *T, b *T) bool) *Limiter[T] { + result := []T{} + + for _, a := range t.elements { + add := true + + for _, b := range result { + if eq(&a, &b) { + add = false + + break + } + } + + if add { + result = append(result, a) + } + } + + t.elements = result + + return t +} + // All returns the full set of answers. func (t *Limiter[T]) All() ([]T, error) { if t.Error != nil { @@ -68,3 +114,12 @@ func (t *Limiter[T]) AtMostOne() (*T, error) { return &t.elements[0], nil } + +// Count returns the number of elements, if there is no error. +func (t *Limiter[T]) Count() int { + if t.Error != nil { + return 0 + } + + return len(t.elements) +} From 291ad0139da9f6a51e31e5e041fe2ae807f2ca85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Denis=20Krienb=C3=BChl?= Date: Wed, 17 Sep 2025 13:20:24 +0200 Subject: [PATCH 2/3] Add missing semicolon to helpers/run-in-test-cluster --- helpers/run-in-test-cluster | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/helpers/run-in-test-cluster b/helpers/run-in-test-cluster index 2331c5c..428e9e7 100755 --- a/helpers/run-in-test-cluster +++ b/helpers/run-in-test-cluster @@ -19,16 +19,16 @@ function ensure-k8test() { # on the runners and cloning via SSH does not work. # # Therefore we run a separate install block that is only meant for GitHub. - if [[ "${GITHUB_ACTIONS:-}" == "true" ]] then + if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then if test -d k8test; then - source k8test/venv/bin/activate + source k8test/venv/bin/activate return fi mkdir -p ~/.ssh/ && touch ~/.ssh/known_hosts git clone https://github.com/cloudscale-ch/k8test python3 -m venv k8test/venv - source k8test/venv/bin/activate + source k8test/venv/bin/activate pip install poetry poetry install --directory k8test return @@ -74,7 +74,7 @@ function ensure-cluster() { if ! test -f k8test/cluster/ssh.pub; then ssh-keygen -t ed25519 -N '' -f k8test/cluster/ssh fi - + if ! test -f k8test/cluster/admin.conf; then zone="$(random-zone)" @@ -107,7 +107,7 @@ function ensure-cluster() { -e kubelet_extra_args='--cloud-provider=external' \ -e kubernetes="${KUBERNETES}" \ -e cluster_prefix="${CLUSTER_PREFIX}" \ - -e subnet="${SUBNET}" + -e subnet="${SUBNET}" fi } From 42f6a05c57020470b315a11241806a76e366d0ed Mon Sep 17 00:00:00 2001 From: Alain Kaeslin Date: Wed, 17 Sep 2025 13:24:32 +0200 Subject: [PATCH 3/3] Add more cases to TestActualState --- pkg/cloudscale_ccm/reconcile_test.go | 160 +++++++++++++++++---------- 1 file changed, 99 insertions(+), 61 deletions(-) diff --git a/pkg/cloudscale_ccm/reconcile_test.go b/pkg/cloudscale_ccm/reconcile_test.go index b8bd6f2..cf2e703 100644 --- a/pkg/cloudscale_ccm/reconcile_test.go +++ b/pkg/cloudscale_ccm/reconcile_test.go @@ -202,79 +202,117 @@ func TestDesiredService(t *testing.T) { func TestActualState(t *testing.T) { t.Parallel() - server := testkit.NewMockAPIServer() - server.WithLoadBalancers([]cloudscale.LoadBalancer{ + testCases := []struct { + name string + annotations map[string]string + }{ { - UUID: "00000000-0000-0000-0000-000000000000", - Name: "k8test-service-test", - }, - }) - server.On("/v1/load-balancers/pools", 200, []cloudscale.LoadBalancerPool{ - { - Name: "tcp/80", - UUID: "00000000-0000-0000-0000-000000000001", - LoadBalancer: cloudscale.LoadBalancerStub{ - UUID: "00000000-0000-0000-0000-000000000000", + name: "by UUID only", + annotations: map[string]string{ + LoadBalancerUUID: "00000000-0000-0000-0000-000000000000", }, }, - }) - server.On("/v1/load-balancers/pools/00000000-0000-0000-0000-000000000001"+ - "/members", 200, []cloudscale.LoadBalancerPoolMember{ { - Name: "10.0.0.1:8080", - Pool: cloudscale.LoadBalancerPoolStub{ - UUID: "00000000-0000-0000-0000-000000000001", + name: "by Name only", + annotations: map[string]string{ + LoadBalancerName: "k8test-service-test", }, }, - }) - server.On("/v1/load-balancers/listeners", 200, - []cloudscale.LoadBalancerListener{ - { - Name: "tcp/80", - Pool: &cloudscale.LoadBalancerPoolStub{ - UUID: "00000000-0000-0000-0000-000000000001", - }, + { + name: "by matching UUID and Name", + annotations: map[string]string{ + LoadBalancerUUID: "00000000-0000-0000-0000-000000000000", + LoadBalancerName: "k8test-service-test", }, }, - ) - server.On("/v1/load-balancers/health-monitors", 200, - []cloudscale.LoadBalancerHealthMonitor{ - { - Type: "tcp", - Pool: cloudscale.LoadBalancerPoolStub{ - UUID: "00000000-0000-0000-0000-000000000001", - }, + { + name: "by none-matching UUID and Name, name has precedence", + annotations: map[string]string{ + LoadBalancerUUID: "00000000-0000-ffff-0000-000000000000", + LoadBalancerName: "k8test-service-test", }, }, - ) - server.On("/v1/floating-ips", 200, - []cloudscale.FloatingIP{}, - ) - server.Start() - defer server.Close() - - mapper := lbMapper{client: server.Client()} - - s := testkit.NewService("service").V1() - s.Annotations = make(map[string]string) - s.Annotations[LoadBalancerUUID] = "00000000-0000-0000-0000-000000000000" - - i := newServiceInfo(s, "") + } - actual, err := actualLbState(t.Context(), &mapper, i) - assert.NoError(t, err) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - assert.Equal(t, "k8test-service-test", actual.lb.Name) - assert.Len(t, actual.pools, 1) - assert.Len(t, actual.members, 1) - assert.Len(t, actual.listeners, 1) - assert.Len(t, actual.monitors, 1) - - p := actual.pools[0] - assert.Equal(t, "tcp/80", p.Name) - assert.Equal(t, "10.0.0.1:8080", actual.members[p][0].Name) - assert.Equal(t, "tcp/80", actual.listeners[p][0].Name) - assert.Equal(t, "tcp", actual.monitors[p][0].Type) + server := testkit.NewMockAPIServer() + server.WithLoadBalancers([]cloudscale.LoadBalancer{ + { + UUID: "00000000-0000-0000-0000-000000000000", + Name: "k8test-service-test", + }, + }) + server.On("/v1/load-balancers/pools", 200, + []cloudscale.LoadBalancerPool{ + { + Name: "tcp/80", + UUID: "00000000-0000-0000-0000-000000000001", + LoadBalancer: cloudscale.LoadBalancerStub{ + UUID: "00000000-0000-0000-0000-000000000000", + }, + }, + }) + server.On("/v1/load-balancers/pools/"+ + "00000000-0000-0000-0000-000000000001/members", 200, + []cloudscale.LoadBalancerPoolMember{ + { + Name: "10.0.0.1:8080", + Pool: cloudscale.LoadBalancerPoolStub{ + UUID: "00000000-0000-0000-0000-000000000001", + }, + }, + }) + server.On("/v1/load-balancers/listeners", 200, + []cloudscale.LoadBalancerListener{ + { + Name: "tcp/80", + Pool: &cloudscale.LoadBalancerPoolStub{ + UUID: "00000000-0000-0000-0000-000000000001", + }, + }, + }, + ) + server.On("/v1/load-balancers/health-monitors", 200, + []cloudscale.LoadBalancerHealthMonitor{ + { + Type: "tcp", + Pool: cloudscale.LoadBalancerPoolStub{ + UUID: "00000000-0000-0000-0000-000000000001", + }, + }, + }, + ) + server.On("/v1/floating-ips", 200, + []cloudscale.FloatingIP{}, + ) + server.Start() + defer server.Close() + + mapper := lbMapper{client: server.Client()} + + s := testkit.NewService("service").V1() + s.Annotations = tc.annotations + i := newServiceInfo(s, "") + + actual, err := actualLbState(t.Context(), &mapper, i) + assert.NoError(t, err) + + assert.Equal(t, "k8test-service-test", actual.lb.Name) + assert.Len(t, actual.pools, 1) + assert.Len(t, actual.members, 1) + assert.Len(t, actual.listeners, 1) + assert.Len(t, actual.monitors, 1) + + p := actual.pools[0] + assert.Equal(t, "tcp/80", p.Name) + assert.Equal(t, "10.0.0.1:8080", actual.members[p][0].Name) + assert.Equal(t, "tcp/80", actual.listeners[p][0].Name) + assert.Equal(t, "tcp", actual.monitors[p][0].Type) + }) + } } func TestNextLbActionsInvalidCalls(t *testing.T) {