From 930c33b2814d6ed083aa684a92f1fbc0bd6c4df6 Mon Sep 17 00:00:00 2001 From: Joe Lanford Date: Wed, 22 Oct 2025 22:00:24 -0400 Subject: [PATCH] Fix race condition in duplicate InstallPlan prevention Move the listInstallPlans() call to after the muInstallPlan lock is acquired in ensureInstallPlan(). Previously, the check for existing install plans happened before acquiring the lock, which meant that worker 2 could check for install plans before worker 1 had a chance to create one, even with the lock in place. This fixes the race condition by ensuring that the sequence is: 1. Worker 1 acquires lock 2. Worker 1 checks for existing install plans 3. Worker 1 creates new install plan (if needed) 4. Worker 1 releases lock 5. Worker 2 acquires lock 6. Worker 2 checks for existing install plans (now sees worker 1's) 7. Worker 2 reuses existing install plan 8. Worker 2 releases lock Without this fix, both workers could see no existing install plans before either acquired the lock, leading to duplicate install plans being created for the same subscription. --- pkg/controller/operators/catalog/operator.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index c26c3da927..2c07966e77 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -1654,12 +1654,6 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen return nil, nil } - // Check if any existing installplans are creating the same resources - installPlans, err := o.listInstallPlans(namespace) - if err != nil { - return nil, err - } - // There are multiple(2) worker threads process the namespaceQueue. // Both worker can work at the same time when 2 separate updates are made for the namespace. // The following sequence causes 2 installplans are created for a subscription @@ -1680,6 +1674,14 @@ func (o *Operator) ensureInstallPlan(logger *logrus.Entry, namespace string, gen o.muInstallPlan.Lock() defer o.muInstallPlan.Unlock() + // Check if any existing installplans are creating the same resources + // This must be done AFTER acquiring the lock to ensure worker 2 sees + // the installplan created by worker 1 + installPlans, err := o.listInstallPlans(namespace) + if err != nil { + return nil, err + } + for _, installPlan := range installPlans { if installPlan.Spec.Generation == gen { return reference.GetReference(installPlan)