Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions pkg/controller/drain/drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ func (ctrl *Controller) syncNode(key string) error {
if nErr != nil {
klog.Errorf("Error making MCN for Uncordon failure: %v", err)
}
return fmt.Errorf("failed to uncordon node %v: %w", node.Name, err)
return fmt.Errorf("failed to uncordon node %v: %v", node.Name, err)

}

Expand Down Expand Up @@ -359,7 +359,7 @@ func (ctrl *Controller) syncNode(key string) error {
daemonconsts.LastAppliedDrainerAnnotationKey: desiredState,
}
if err := ctrl.setNodeAnnotations(node.Name, annotations); err != nil {
return fmt.Errorf("node %s: failed to set node uncordoned annotation: %w", node.Name, err)
return fmt.Errorf("node %s: failed to set node uncordoned annotation: %v", node.Name, err)
}
ctrlcommon.UpdateStateMetric(ctrlcommon.MCCSubControllerState, "machine-config-controller-drain", desiredVerb, node.Name)
return nil
Expand Down Expand Up @@ -404,7 +404,7 @@ func (ctrl *Controller) drainNode(node *corev1.Node, drainer *drain.Helper) erro
if Nerr != nil {
klog.Errorf("Error making MCN for Cordon Failure: %v", Nerr)
}
return fmt.Errorf("node %s: failed to cordon: %w", node.Name, err)
return fmt.Errorf("node %s: failed to cordon: %v", node.Name, err)
}
ctrl.ongoingDrains[node.Name] = time.Now()
err := upgrademonitor.GenerateAndApplyMachineConfigNodes(&upgrademonitor.Condition{State: v1alpha1.MachineConfigNodeUpdateExecuted, Reason: string(v1alpha1.MachineConfigNodeUpdateCordoned), Message: fmt.Sprintf("Cordoned Node as part of update executed phase")},
Expand Down Expand Up @@ -513,14 +513,14 @@ func (ctrl *Controller) setNodeAnnotations(nodeName string, annotations map[stri

patchBytes, err := strategicpatch.CreateTwoWayMergePatch(oldNode, newNode, corev1.Node{})
if err != nil {
return fmt.Errorf("node %s: failed to create patch for: %w", nodeName, err)
return fmt.Errorf("node %s: failed to create patch for: %v", nodeName, err)
}

_, err = ctrl.kubeClient.CoreV1().Nodes().Patch(context.TODO(), nodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{})
return err
}); err != nil {
// may be conflict if max retries were hit
return fmt.Errorf("node %s: unable to update: %w", nodeName, err)
return fmt.Errorf("node %s: unable to update: %v", nodeName, err)
}
return nil
}
Expand Down Expand Up @@ -564,9 +564,9 @@ func (ctrl *Controller) cordonOrUncordonNode(desired bool, node *corev1.Node, dr
}); err != nil {
if wait.Interrupted(err) {
errs := kubeErrs.NewAggregate([]error{err, lastErr})
return fmt.Errorf("node %s: failed to %s (%d tries): %w", node.Name, verb, ctrl.cfg.CordonOrUncordonBackoff.Steps, errs)
return fmt.Errorf("node %s: failed to %s (%d tries): %v", node.Name, verb, ctrl.cfg.CordonOrUncordonBackoff.Steps, errs)
}
return fmt.Errorf("node %s: failed to %s: %w", node.Name, verb, err)
return fmt.Errorf("node %s: failed to %s: %v", node.Name, verb, err)
}

return nil
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/render/render_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"reflect"
"sort"
"time"

mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
Expand Down Expand Up @@ -431,6 +432,8 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error {
if err != nil {
return err
}
sort.SliceStable(mcs, func(i, j int) bool { return mcs[i].Name < mcs[j].Name })
Copy link
Copy Markdown
Contributor

@cdoern cdoern Dec 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is this for? This seems harmless, but just checking :)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like I remember this existing but it claims you're adding this, do we not do this somewhere else?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unit tests were failing with error finding pools for machineconfig and Action update machineconfigpools has wrong object

the order of elements in the slice was being altered in syncMachineConfigPool from the lister

Copy link
Copy Markdown
Member

@jkyros jkyros Dec 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like I remember this existing but it claims you're adding this, do we not do this somewhere else?

We do it later in like MergeMachineConfigs, but there is some weird non-determinism that we hit a lot in this PR during tests for some reason. see: #4060 (comment)

I have been seeing it intermittently, e.g. #4003 (comment) so I know this PR didn't introduce it, it just seems like it happens way more often here. It might be intended behavior ( like how they intentionally don't want you to depend on go map order so they shuffle it) but I haven't done the due diligence, I'm a slob. 😄

I wasn't being prescriptive on how we accommodate this, I just wanted to prove why it was breaking -- it's possible we could mangle the test fixture to just test for presence not order, but that also might be a weird one-off.


if len(mcs) == 0 {
return ctrl.syncFailingStatus(pool, fmt.Errorf("no MachineConfigs found matching selector %v", selector))
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/operator/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,21 @@ func (optr *Operator) syncMachineOSBuilder(config *renderConfig) error {
// Determines if the Machine OS Builder deployment is in the correct state
// based upon whether we have opted-in pools or not.
func (optr *Operator) reconcileMachineOSBuilder(mob *appsv1.Deployment) error {
// Access current feature gates
fg, err := optr.fgAccessor.CurrentFeatureGates()
if err != nil {
return fmt.Errorf("could not get feature gates: %w", err)
}

if fg == nil {
return fmt.Errorf("received nil feature gates")
}

// Check if OnClusterBuild feature gate is enabled
if !fg.Enabled(configv1.FeatureGateOnClusterBuild) {
Comment thread
dkhater-redhat marked this conversation as resolved.
return nil
}

// First, check if we have any MachineConfigPools opted in.
layeredMCPs, err := optr.getLayeredMachineConfigPools()
if err != nil {
Expand Down
73 changes: 48 additions & 25 deletions test/e2e/mob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,24 @@ func TestMachineOSBuilder(t *testing.T) {

t.Cleanup(createConfigMapForTest(t, cs))

// get the feature gates because we're gating this for now
featureGates, err := cs.ConfigV1Interface.FeatureGates().Get(context.TODO(), "cluster", metav1.GetOptions{})
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you might need to add the FGs to the cs if you want them to be initialized.

Copy link
Copy Markdown
Contributor Author

@dkhater-redhat dkhater-redhat Dec 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does the ClientSet itself store or manage feature gate configurations? I fetch the existing feature gate configurations and use that info to adjust the behavior of the test

require.NoError(t, err, "Failed to retrieve feature gates")

// TODO(jkyros): this should be a helper or we should use whatever the "best practice" way is
// for retrieving the gates during a test, but this works for now
var featureGateEnabled bool
for _, featureGateDetails := range featureGates.Status.FeatureGates {
for _, enabled := range featureGateDetails.Enabled {
if enabled.Name == "OnClusterBuild" {
featureGateEnabled = true

}
}
}

t.Logf("Feature gate OnClusterBuiild enabled: %t", featureGateEnabled)

cleanup := helpers.MakeIdempotent(helpers.CreateMCP(t, cs, mcpName))
t.Cleanup(cleanup)
time.Sleep(10 * time.Second) // Wait a bit to ensure MCP is fully created
Expand All @@ -94,33 +112,38 @@ func TestMachineOSBuilder(t *testing.T) {

// assertion to see if the deployment object is present after setting the label
ctx := context.TODO()
err := wait.PollUntilContextTimeout(ctx, 2*time.Second, time.Minute, true, func(ctx context.Context) (bool, error) {
exists, err := helpers.CheckDeploymentExists(cs, "machine-os-builder", namespace)
return exists, err
})
require.NoError(t, err, "Failed to check the existence of the Machine OS Builder deployment")

// wait for Machine OS Builder pod to start
err = helpers.WaitForPodStart(cs, mobPodNamePrefix, namespace)
require.NoError(t, err, "Failed to start the Machine OS Builder pod")
t.Logf("machine-os-builder deployment exists")

// delete the MachineConfigPool
cleanup()
time.Sleep(20 * time.Second)

// assertion to see if the deployment object is absent after deleting the MCP
err = wait.PollUntilContextTimeout(ctx, 2*time.Second, time.Minute, true, func(ctx context.Context) (bool, error) {
exists, err := helpers.CheckDeploymentExists(cs, "machine-os-builder", namespace)
return !exists, err
return exists, err
})
require.NoError(t, err, "Failed to check the absence of the Machine OS Builder deployment")

// wait for Machine OS Builder pod to stop
err = helpers.WaitForPodStop(cs, mobPodNamePrefix, namespace)
require.NoError(t, err, "Failed to stop the Machine OS Builder pod")
t.Logf("machine-os-builder deployment no longer exists")

_, err = cs.AppsV1Interface.Deployments(ctrlcommon.MCONamespace).Get(context.TODO(), "machine-os-builder", metav1.GetOptions{})
assert.True(t, apierrs.IsNotFound(err), "machine-os-builder deployment still present")
if featureGateEnabled {
require.NoError(t, err, "Failed to check the existence of the Machine OS Builder deployment")

// wait for Machine OS Builder pod to start
err = helpers.WaitForPodStart(cs, mobPodNamePrefix, namespace)
require.NoError(t, err, "Failed to start the Machine OS Builder pod")
t.Logf("machine-os-builder deployment exists")

// delete the MachineConfigPool
cleanup()
time.Sleep(20 * time.Second)

// assertion to see if the deployment object is absent after deleting the MCP
err = wait.PollUntilContextTimeout(ctx, 2*time.Second, time.Minute, true, func(ctx context.Context) (bool, error) {
exists, err := helpers.CheckDeploymentExists(cs, "machine-os-builder", namespace)
return !exists, err
})
require.NoError(t, err, "Failed to check the absence of the Machine OS Builder deployment")

// wait for Machine OS Builder pod to stop
err = helpers.WaitForPodStop(cs, mobPodNamePrefix, namespace)
require.NoError(t, err, "Failed to stop the Machine OS Builder pod")
t.Logf("machine-os-builder deployment no longer exists")

_, err = cs.AppsV1Interface.Deployments(ctrlcommon.MCONamespace).Get(context.TODO(), "machine-os-builder", metav1.GetOptions{})
assert.True(t, apierrs.IsNotFound(err), "machine-os-builder deployment still present")
} else {
require.Error(t, err, "Machine OS Builder deployment exists and it should not, because the feature gate is disabled")
}
}
Empty file modified vendor/k8s.io/code-generator/generate-groups.sh
100644 → 100755
Empty file.
Empty file modified vendor/k8s.io/code-generator/generate-internal-groups.sh
100644 → 100755
Empty file.