From 416a2f7284f3cac76d9914fa80289a7975a541ee Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Mon, 10 Mar 2025 14:56:33 -0400 Subject: [PATCH 1/3] owners: update MCO team list --- test/extended/machine_config/OWNERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/extended/machine_config/OWNERS b/test/extended/machine_config/OWNERS index 08ca82b82b16..87dcdd07da38 100644 --- a/test/extended/machine_config/OWNERS +++ b/test/extended/machine_config/OWNERS @@ -3,8 +3,18 @@ approvers: - dkhater-redhat - yuqi-zhang - cheesesashimi + - umohnani8 + - LorbusChris + - RishabhSaini + - isabella-janssen + - pablintino reviewers: - djoshy - dkhater-redhat - yuqi-zhang - cheesesashimi + - umohnani8 + - LorbusChris + - RishabhSaini + - isabella-janssen + - pablintino \ No newline at end of file From 3a6cbb6cca4449a4674201015c1c53701f2dfdd6 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Wed, 9 Apr 2025 21:05:54 -0400 Subject: [PATCH 2/3] mco-1520: implement origin tests for MachineConfigNode --- test/extended/machine_config/helpers.go | 839 +++++++++++++++++- .../machine_config/machine_config_node.go | 517 +++++++++++ test/extended/testdata/bindata.go | 177 ++++ .../machineconfig/0-infra-mc.yaml | 16 + .../machineconfig/0-master-mc.yaml | 16 + .../machineconfig/1-master-invalid-mc.yaml | 16 + .../machineconfig/1-worker-invalid-mc.yaml | 16 + .../machineconfigpool/infra-mcp.yaml | 11 + .../generated/zz_generated.annotations.go | 14 + zz_generated.manifests/test-reporting.yaml | 14 + 10 files changed, 1635 insertions(+), 1 deletion(-) create mode 100644 test/extended/machine_config/machine_config_node.go create mode 100644 test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml diff --git a/test/extended/machine_config/helpers.go b/test/extended/machine_config/helpers.go index 170377860170..c6085765492d 100644 --- a/test/extended/machine_config/helpers.go +++ b/test/extended/machine_config/helpers.go @@ -3,13 +3,21 @@ package machine_config import ( "context" "encoding/json" + "errors" "fmt" "math/rand" + "os" + "os/exec" + "strings" "time" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/kubernetes" + osconfigv1 "github.com/openshift/api/config/v1" machinev1beta1 "github.com/openshift/api/machine/v1beta1" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1" opv1 "github.com/openshift/api/operator/v1" machineclient "github.com/openshift/client-go/machine/clientset/versioned" machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" @@ -21,7 +29,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" + "k8s.io/apimachinery/pkg/labels" "k8s.io/kubernetes/test/e2e/framework" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/utils/ptr" @@ -35,6 +43,9 @@ const ( cmName = "coreos-bootimages" mapiMasterMachineLabelSelector = "machine.openshift.io/cluster-api-machine-role=master" mapiMachineSetArchAnnotationKey = "capacity.cluster-autoscaler.kubernetes.io/labels" + currentConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig" + desiredConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig" + stateAnnotationKey = "machineconfiguration.openshift.io/state" ) // skipUnlessTargetPlatform skips the test if it is running on the target platform @@ -83,6 +94,13 @@ func skipOnSingleNodeTopology(oc *exutil.CLI) { } } +// `IsSingleNode` returns true if the cluster is using single-node topology and false otherwise +func IsSingleNode(oc *exutil.CLI) bool { + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Error determining cluster infrastructure.") + return infra.Status.ControlPlaneTopology == osconfigv1.SingleReplicaTopologyMode +} + // getRandomMachineSet picks a random machineset present on the cluster func getRandomMachineSet(machineClient *machineclient.Clientset) machinev1beta1.MachineSet { machineSets, err := machineClient.MachineV1beta1().MachineSets("openshift-machine-api").List(context.TODO(), metav1.ListOptions{}) @@ -316,7 +334,826 @@ func ApplyBootImageFixture(oc *exutil.CLI, fixture string) { // Ensure status accounts for the fixture that was applied WaitForMachineConfigurationStatusUpdate(oc) +} + +// `ValidateMCNForNodeInPool` validates the MCN of a node in a given pool. It does the following: +// 1. Get node from desired pool +// 2. Get the MCN for the node +// 3. Validate the MCN against the node properties +// - Check that `mcn.Spec.Pool.Name` matches provided `poolName` +// - Check that `mcn.Name` matches the node name +// - Check that `mcn.Spec.ConfigVersion.Desired` matches the node desired config version +// - Check that `nmcn.Status.ConfigVersion.Current` matches the node current config version +// - Check that `mcn.Status.ConfigVersion.Desired` matches the node desired config version +func ValidateMCNForNodeInPool(oc *exutil.CLI, clientSet *machineconfigclient.Clientset, node corev1.Node, poolName string) error { + // Get node's desired and current config versions + nodeCurrentConfig := node.Annotations[currentConfigAnnotationKey] + nodeDesiredConfig := node.Annotations[desiredConfigAnnotationKey] + + // Get node MCN + framework.Logf("Getting MCN for node '%v'.", node.Name) + mcn, mcnErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if mcnErr != nil { + framework.Logf("Could not get MCN for node '%v'.", node.Name) + return mcnErr + } + + // Check MCN pool name value for default MCPs + framework.Logf("Checking MCN pool name for node '%v' matches pool association '%v'.", node.Name, poolName) + if mcn.Spec.Pool.Name != poolName { + framework.Logf("MCN pool name '%v' does not match node MCP association '%v'.", mcn.Spec.Pool.Name, poolName) + return fmt.Errorf("MCN pool name does not match node MCP association") + } + + // Check MCN name matches node name + framework.Logf("Checking MCN name matches node name '%v'.", node.Name) + if mcn.Name != node.Name { + framework.Logf("MCN name '%v' does not match node name '%v'.", mcn.Name, node.Name) + return fmt.Errorf("MCN name does not match node name") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' desired config version '%v' matches desired config version in MCN spec.", node.Name, nodeDesiredConfig) + if mcn.Spec.ConfigVersion.Desired != nodeDesiredConfig { + framework.Logf("MCN spec desired config version '%v' does not match node desired config version '%v'.", mcn.Spec.ConfigVersion.Desired, nodeDesiredConfig) + return fmt.Errorf("MCN spec desired config version does not match node desired config version") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' current config version '%v' matches current version in MCN status.", node.Name, nodeCurrentConfig) + if mcn.Status.ConfigVersion.Current != nodeCurrentConfig { + framework.Logf("MCN status current config version '%v' does not match node current config version '%v'.", mcn.Status.ConfigVersion.Current, nodeCurrentConfig) + return fmt.Errorf("MCN status current config version does not match node current config version") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' desired config version '%v' matches desired version in MCN status.", node.Name, nodeDesiredConfig) + if mcn.Status.ConfigVersion.Desired != nodeDesiredConfig { + framework.Logf("MCN status desired config version '%v' does not match node desired config version '%v'.", mcn.Status.ConfigVersion.Desired, nodeDesiredConfig) + return fmt.Errorf("MCN status desired config version does not match node desired config version") + } + + return nil +} + +// `GetRandomNode` gets a random node from a given MCP and checks whether the node is ready. If no +// nodes are ready, it will wait for up to 5 minutes for a node to become available. +func GetRandomNode(oc *exutil.CLI, pool string) corev1.Node { + if node := getRandomNode(oc, pool); isNodeReady(node) { + return node + } + + // If no nodes are ready, wait for up to 5 minutes for one to be ready + waitPeriod := time.Minute * 5 + framework.Logf("No ready nodes found for pool '%s', waiting up to %s for a ready node to become available", pool, waitPeriod) + var targetNode corev1.Node + o.Eventually(func() bool { + if node := getRandomNode(oc, pool); isNodeReady(node) { + targetNode = node + return true + } + + return false + }, 5*time.Minute, 2*time.Second).Should(o.BeTrue()) + + return targetNode +} + +// `getRandomNode` gets a random node from a given pool +func getRandomNode(oc *exutil.CLI, pool string) corev1.Node { + nodes, err := GetNodesByRole(oc, pool) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(nodes).ShouldNot(o.BeEmpty()) + + // Disable gosec here to avoid throwing + // G404: Use of weak random number generator (math/rand instead of crypto/rand) + // #nosec + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + return nodes[rnd.Intn(len(nodes))] +} + +// `GetNodesByRole` gets all nodes labeled with the desired role +func GetNodesByRole(oc *exutil.CLI, role string) ([]corev1.Node, error) { + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{fmt.Sprintf("node-role.kubernetes.io/%s", role): ""}).String(), + } + nodes, err := oc.AsAdmin().KubeClient().CoreV1().Nodes().List(context.TODO(), listOptions) + if err != nil { + return nil, err + } + return nodes.Items, nil +} + +// `isNodeReady` determines if a given node is ready +func isNodeReady(node corev1.Node) bool { + // If the node is cordoned, it is not ready. + if node.Spec.Unschedulable { + return false + } + + // If the nodes' kubelet is not ready, it is not ready. + if !isNodeKubeletReady(node) { + return false + } + + // If the nodes' MCD is not done, it is not ready. + if !checkMCDState(node, "Done") { + return false + } + + return true +} + +// `isNodeKubeletReady` determines if a given node's kubelet is ready +func isNodeKubeletReady(node corev1.Node) bool { + for _, condition := range node.Status.Conditions { + if condition.Reason == "KubeletReady" && condition.Status == "True" && condition.Type == "Ready" { + return true + } + } + + return false +} + +// `checkMCDState` determines whether the MCD state matches the provided desired state +func checkMCDState(node corev1.Node, desiredState string) bool { + state := node.Annotations[stateAnnotationKey] + return state == desiredState +} + +// `WaitForMCPToBeReady` waits up to 5 minutes for a pool to be in an updated state with a specified number of ready machines +func WaitForMCPToBeReady(oc *exutil.CLI, machineConfigClient *machineconfigclient.Clientset, poolName string, readyMachineCount int32) { + o.Eventually(func() bool { + mcp, err := machineConfigClient.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) + if err != nil { + framework.Logf("Failed to grab MCP '%v', error :%v", poolName, err) + return false + } + // Check if the pool is in an updated state with the correct number of ready machines + if IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) && mcp.Status.UpdatedMachineCount == readyMachineCount { + return true + } + framework.Logf("MCP '%v' has %v ready machines. Waiting for the desired ready machine count of %v.", poolName, mcp.Status.UpdatedMachineCount, readyMachineCount) + return false + }, 5*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for MCP '%v' to be in 'Updated' state with %v ready machines.", poolName, readyMachineCount) +} + +// `GetCordonedNodes` gets all cordoned nodes +// - If maxUnavailable > 1, this will return multiple cordoned nodes +// - If maxUnavailable == 1, this will return one cordoned node +func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node { + // Wait for the MCP to start updating + o.Expect(WaitForMCPConditionStatus(oc, mcpName, mcfgv1.MachineConfigPoolUpdating, corev1.ConditionTrue, 3*time.Minute, 2*time.Second)).NotTo(o.HaveOccurred(), "Waiting for 'Updating' status change failed.") + + // Get updating nodes + var allUpdatingNodes []corev1.Node + o.Eventually(func() bool { + nodes, nodeErr := GetNodesByRole(oc, mcpName) + o.Expect(nodeErr).NotTo(o.HaveOccurred(), "Error getting nodes from %v MCP.", mcpName) + o.Expect(nodes).ShouldNot(o.BeEmpty(), "No nodes found for %v MCP.", mcpName) + + for _, node := range nodes { + if node.Spec.Unschedulable { + allUpdatingNodes = append(allUpdatingNodes, node) + } + } + + return len(allUpdatingNodes) > 0 + }, 5*time.Minute, 10*time.Second).Should(o.BeTrue()) + + return allUpdatingNodes +} + +// `WaitForMCPConditionStatus` waits up to the desired timeout for the desired MCP condition to match the desired status (ex. wait until "Updating" is "True") +func WaitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcfgv1.MachineConfigPoolConditionType, status corev1.ConditionStatus, timeout time.Duration, interval time.Duration) error { + framework.Logf("Waiting up to %v for MCP '%s' condition '%s' to be '%s'.", timeout, mcpName, conditionType, status) + machineConfigClient, err := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Eventually(func() bool { + framework.Logf("Waiting for '%v' MCP's '%v' condition to be '%v'.", mcpName, conditionType, status) + + // Get MCP + mcp, mcpErr := machineConfigClient.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), mcpName, metav1.GetOptions{}) + if mcpErr != nil { + framework.Logf("Failed to grab MCP '%v', error :%v", mcpName, err) + return false + } + + // Loop through conditions to get check for desired condition type/status combonation + conditions := mcp.Status.Conditions + for _, condition := range conditions { + if condition.Type == conditionType { + framework.Logf("MCP '%s' condition '%s' status is '%s'", mcp.Name, conditionType, condition.Status) + return condition.Status == status + } + } + + return false + }, timeout, interval).Should(o.BeTrue()) + return nil +} + +// `WaitForMCNConditionStatus` waits up to a specified timeout for the desired MCN condition to match the desired status (ex. wait until "Updated" is "False") +func WaitForMCNConditionStatus(clientSet *machineconfigclient.Clientset, mcnName string, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus, timeout time.Duration, interval time.Duration) error { + o.Eventually(func() bool { + framework.Logf("Waiting for MCN '%v' %v condition to be %v.", mcnName, conditionType, status) + + // Get MCN & check if the MCN condition status matches the desired status + workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + if workerErr != nil { + framework.Logf("Error getting MCN for node '%v': %v", mcnName, workerErr) + return false + } + return CheckMCNConditionStatus(workerNodeMCN, conditionType, status) + }, timeout, interval).Should(o.BeTrue()) + return nil +} + +// `CheckMCNConditionStatus` checks that an MCN condition matches the desired status (ex. confirm "Updated" is "False") +func CheckMCNConditionStatus(mcn *mcfgv1alpha1.MachineConfigNode, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus) bool { + conditionStatus := getMCNConditionStatus(mcn, conditionType) + return conditionStatus == status +} + +// `getMCNConditionStatus` returns the status of the desired condition type for MCN, or an empty string if the condition does not exist +func getMCNConditionStatus(mcn *mcfgv1alpha1.MachineConfigNode, conditionType mcfgv1alpha1.StateProgress) metav1.ConditionStatus { + // Loop through conditions and return the status of the desired condition type + conditions := mcn.Status.Conditions + for _, condition := range conditions { + if condition.Type == string(conditionType) { + framework.Logf("MCN '%s' %s condition status is %s", mcn.Name, conditionType, condition.Status) + return condition.Status + } + } + return "" +} + +// `ConfirmUpdatedMCNStatus` confirms that an MCN is in a fully updated state, which requires: +// 1. "Updated" = True +// 2. All other conditions = False +func ConfirmUpdatedMCNStatus(clientSet *machineconfigclient.Clientset, mcnName string) bool { + // Get MCN + workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + o.Expect(workerErr).NotTo(o.HaveOccurred()) + + // Loop through conditions and return the status of the desired condition type + conditions := workerNodeMCN.Status.Conditions + for _, condition := range conditions { + if condition.Type == string(mcfgv1alpha1.MachineConfigNodeUpdated) && condition.Status != metav1.ConditionTrue { + framework.Logf("Node '%s' update is not complete; 'Updated' condition status is '%v'", mcnName, condition.Status) + return false + } else if condition.Type != string(mcfgv1alpha1.MachineConfigNodeUpdated) && condition.Status != metav1.ConditionFalse { + framework.Logf("Node '%s' is updated but MCN is invalid; '%v' codition status is '%v'", mcnName, condition.Type, condition.Status) + return false + } + } + + framework.Logf("Node '%s' update is complete and corresponding MCN is valid.", mcnName) + return true +} + +// `GetDegradedNode` gets a degraded node from a specified MCP +func GetDegradedNode(oc *exutil.CLI, mcpName string) (corev1.Node, error) { + // Get nodes in desired pool + nodes, nodeErr := GetNodesByRole(oc, mcpName) + if nodeErr != nil { + return corev1.Node{}, nodeErr + } else if len(nodes) == 0 { + return corev1.Node{}, fmt.Errorf("no nodes found in MCP '%v", mcpName) + } + + // Get degraded node + for _, node := range nodes { + if checkMCDState(node, "Degraded") { + return node, nil + } + } + + return corev1.Node{}, errors.New("no degraded node found") +} + +// `RecoverFromDegraded` gets the degraded node in the desired MCP, forces the node to recover by updating its desired +// config to be its current config, and waits for the MCP to return to an Update=True state +func RecoverFromDegraded(oc *exutil.CLI, mcpName string) error { + framework.Logf("Recovering %s pool from degraded state", mcpName) + + // Get nodes from degraded MCP & update the desired config of the degraded node to force a recovery update + nodes, nodeErr := GetNodesByRole(oc, mcpName) + o.Expect(nodeErr).NotTo(o.HaveOccurred()) + o.Expect(nodes).ShouldNot(o.BeEmpty()) + for _, node := range nodes { + framework.Logf("Restoring desired config for node: %s", node.Name) + if checkMCDState(node, "Done") { + framework.Logf("Node %s is updated and does not need to be recovered", node.Name) + } else { + err := restoreDesiredConfig(oc, node) + if err != nil { + return fmt.Errorf("error restoring desired config in node %s. Error: %s", node.Name, err) + } + } + } + + // Wait for MCP to not be in degraded status + mcpErr := WaitForMCPConditionStatus(oc, mcpName, "Degraded", "False", 4*time.Minute, 5*time.Second) + o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("could not recover %v MCP from the degraded status.", mcpName)) + mcpErr = WaitForMCPConditionStatus(oc, mcpName, "Updated", "True", 7*time.Minute, 5*time.Second) + o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("%v MCP could not reach an updated state.", mcpName)) + return nil +} + +// `restoreDesiredConfig` updates the value of a node's desiredConfig annotation to be equal to the value of its currentConfig (desiredConfig=currentConfig) +func restoreDesiredConfig(oc *exutil.CLI, node corev1.Node) error { + // Get current config + currentConfig := node.Annotations[currentConfigAnnotationKey] + if currentConfig == "" { + return fmt.Errorf("currentConfig annotation is empty for node %s", node.Name) + } + + // Update desired config to be equal to current config + framework.Logf("Node: %s is restoring desiredConfig value to match currentConfig value: %s", node.Name, currentConfig) + configErr := oc.Run("patch").Args(fmt.Sprintf("node/%v", node.Name), "--patch", fmt.Sprintf(`{"metadata":{"annotations":{"machineconfiguration.openshift.io/desiredConfig":"%v"}}}`, currentConfig), "--type=merge").Execute() + return configErr +} + +// `WorkersCanBeScaled` checks whether the worker nodes in a cluster can be scaled. +// Cases where scaling worker nodes is NOT possible include: +// - Baremetal platform +// - MachineAPI is disabled +// - Error getting list of MachineSets / no MachineSets exist +// - All MachineSets have 0 worker nodes +func WorkersCanBeScaled(oc *exutil.CLI, machineClient *machineclient.Clientset) (bool, error) { + framework.Logf("Checking if worker nodes can be scaled using machinesets.") + + // Check if platform is baremetal + framework.Logf("Checking if cluster platform is baremetal.") + if checkPlatform(oc) == "baremetal" { + framework.Logf("Cluster platform is baremetal. Nodes cannot be scaled in baremetal test environments.") + return false, nil + } + + // Check if MachineAPI is enabled + framework.Logf("Checking if MachineAPI is enabled.") + if !isCapabilityEnabled(oc, "MachineAPI") { + framework.Logf("MachineAPI capability is not enabled. Nodes cannot be scaled.") + return false, nil + } + + // Get MachineSets + framework.Logf("Getting MachineSets.") + machineSets, machineSetErr := machineClient.MachineV1beta1().MachineSets("openshift-machine-api").List(context.TODO(), metav1.ListOptions{}) + if machineSetErr != nil { + framework.Logf("Error getting list of MachineSets.") + return false, machineSetErr + } else if len(machineSets.Items) == 0 { + framework.Logf("No MachineSets configured. Nodes cannot be scaled.") + return false, nil + } + + // Check if all MachineSets have 0 replicas + // Per openshift-tests-private repo: + // "In some UPI/SNO/Compact clusters machineset resources exist, but they are all configured with 0 replicas + // If all machinesets have 0 replicas, then it means that we need to skip the test case" + machineSetsWithReplicas := 0 + for _, machineSet := range machineSets.Items { + replicas := machineSet.Spec.Replicas + machineSetsWithReplicas += int(*replicas) + } + if machineSetsWithReplicas == 0 { + framework.Logf("All machinesets have 0 worker nodes. Nodes cannot be scaled.") + return false, nil + } + + return true, nil +} + +// `checkPlatform` returns the cluster's platform +func checkPlatform(oc *exutil.CLI) string { + output, err := oc.AsAdmin().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platformStatus.type}").Output() + o.Expect(err).NotTo(o.HaveOccurred(), "Failed determining cluster infrastructure.") + return strings.ToLower(output) +} +// `isCapabilityEnabled` checks whether a desired capability is in the cluster's enabledCapabilities list +func isCapabilityEnabled(oc *exutil.CLI, desiredCapability osconfigv1.ClusterVersionCapability) bool { + enabledCapabilities := getEnabledCapabilities(oc) + enabled := false + for _, enabledCapability := range enabledCapabilities { + if enabledCapability == desiredCapability { + enabled = true + break + } + } + framework.Logf("Capability '%s' is enabled: %v", desiredCapability, enabled) + + return enabled +} + +// `getEnabledCapabilities` gets a cluster's enabled capability list +func getEnabledCapabilities(oc *exutil.CLI) []osconfigv1.ClusterVersionCapability { + clusterversion, err := oc.AsAdmin().AdminConfigClient().ConfigV1().ClusterVersions().Get(context.TODO(), "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting clusterverion.") + enabledCapabilities := clusterversion.Status.Capabilities.EnabledCapabilities + + return enabledCapabilities +} + +// `ScaleMachineSet` scales the provided MachineSet by updating the replica to be the provided value +func ScaleMachineSet(oc *exutil.CLI, machineSetName string, replicaValue string) error { + return oc.Run("scale").Args(fmt.Sprintf("--replicas=%v", replicaValue), "machinesets.machine.openshift.io", machineSetName, "-n", "openshift-machine-api").Execute() +} + +// `GetMachinesByPhase` get machine by phase e.g. Running, Provisioning, Provisioned, Deleting etc. +func GetMachinesByPhase(machineClient *machineclient.Clientset, machineSetName string, desiredPhase string) (machinev1beta1.Machine, error) { + desiredMachine := machinev1beta1.Machine{} + err := fmt.Errorf("no %v machine found in %v MachineSet", desiredPhase, machineSetName) + o.Eventually(func() bool { + framework.Logf("Trying to get machine with phase %v from MachineSet '%v'.", desiredPhase, machineSetName) + + // Get machines in desired MachineSet + machines, machinesErr := machineClient.MachineV1beta1().Machines(mapiNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: fmt.Sprintf("machine.openshift.io/cluster-api-machineset=%v", machineSetName)}) + o.Expect(machinesErr).NotTo(o.HaveOccurred()) + + // Find machine in desired phase + for _, machine := range machines.Items { + machinePhase := ptr.Deref(machine.Status.Phase, "") + if machinePhase == desiredPhase { + desiredMachine = machine + err = nil + return true + } + } + return false + }, 8*time.Minute, 3*time.Second).Should(o.BeTrue()) + return desiredMachine, err +} + +// `UpdateDeleteMachineAnnotation` updates the provided MachineSet's `deletePolicy` to be true. +// This will ensure the create machine is the one deleted on cleanup. +func UpdateDeleteMachineAnnotation(oc *exutil.CLI, machineSetName string) error { + return oc.Run("patch").Args(fmt.Sprintf("machines.machine.openshift.io/%v", machineSetName), "-n", "openshift-machine-api", "--patch", `{"metadata":{"annotations":{"machine.openshift.io/delete-machine":"true"}}}`, "--type=merge").Execute() +} + +// `WaitForMachineInState` waits up to 7 minutes for the desired machine to be in the desired state +func WaitForMachineInState(machineClient *machineclient.Clientset, machineName string, desiredPhase string) error { + o.Eventually(func() bool { + // Get the desired machine + machine, machineErr := machineClient.MachineV1beta1().Machines(mapiNamespace).Get(context.TODO(), machineName, metav1.GetOptions{}) + o.Expect(machineErr).NotTo(o.HaveOccurred()) + + // Check if machine phase is desired phase + machinePhase := ptr.Deref(machine.Status.Phase, "") + framework.Logf("Machine '%v' is in %v phase.", machineName, machinePhase) + return machinePhase == desiredPhase + }, 10*time.Minute, 10*time.Second).Should(o.BeTrue()) + return nil +} + +// `GetNodeInMachine` gets the node associated with a machine +func GetNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) { + // Get name of nodes associated with the desired machine + nodeNames, nodeNamesErr := oc.Run("get").Args("nodes", "-o", fmt.Sprintf(`jsonpath='{.items[?(@.metadata.annotations.machine\.openshift\.io/machine=="openshift-machine-api/%v")].metadata.name}'`, machineName)).Output() + if nodeNamesErr != nil { //error getting filtered node names + return corev1.Node{}, nodeNamesErr + } else if nodeNames == "" { //error when no nodes are found + return corev1.Node{}, fmt.Errorf("no node is linked to Machine: %s", machineName) + } + + // Determine the number of nodes in the Machine + // Note: the format of `nodeNames` is the names of nodes seperated by a space (ex: "node-name-1 node-name-2"), + // so the number of nodes is equal to one more than the number of spaces + numberOfNodeNames := strings.Count(nodeNames, " ") + 1 + if numberOfNodeNames > 1 { //error when a machine has more than one node + return corev1.Node{}, fmt.Errorf("more than one node is linked to Machine: %s; number of nodes: %d", machineName, numberOfNodeNames) + } + + node, nodeErr := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), strings.ReplaceAll(nodeNames, "'", ""), metav1.GetOptions{}) + if nodeErr != nil { //error getting filtered node names + return corev1.Node{}, nodeErr + } + + return *node, nil +} + +// `GetNewReadyNodeInMachine` waits up to 2 minutes for the newly provisioned node in a desired machine node to be ready +func GetNewReadyNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) { + desiredNode := corev1.Node{} + err := fmt.Errorf("no ready node in Machine: %s", machineName) + o.Eventually(func() bool { + // Get the desired node + node, nodeErr := GetNodeInMachine(oc, machineName) + o.Expect(nodeErr).NotTo(o.HaveOccurred()) + + // Check if node is in desiredStatus + framework.Logf("Checking if node '%v' is ready.", node.Name) + if isNodeReady(node) { + framework.Logf("Node '%v' is ready.", node.Name) + desiredNode = node + err = nil + return true + } + + return false + }, 2*time.Minute, 3*time.Second).Should(o.BeTrue()) + return desiredNode, err +} + +// `WaitForValidMCNProperties` waits for the MCN of a node to be valid. To be valid, the following must be true: +// - MCN with name equivalent to node name exists (waits up to 20 sec) +// - Pool name in MCN spec matches node MCP association (waits up to 1 min) +// - Desired config version of node matches desired config version in MCN spec (waits up to 1 min) +// - Current config version of node matches current config version in MCN status (waits up to 2 min) +// - Desired config version of node matches desired config version in MCN status (waits up to 1 min) +func WaitForValidMCNProperties(clientSet *machineconfigclient.Clientset, node corev1.Node) error { + nodeDesiredConfig := node.Annotations[desiredConfigAnnotationKey] + nodeCurrentConfig := node.Annotations[currentConfigAnnotationKey] + + // Check MCN exists and that its name and node name match + framework.Logf("Checking MCN exists and name matches node name '%v'.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN name matches node's name + framework.Logf("Node name: %v. MCN name: %v.", node.Name, newMCN.Name) + return node.Name == newMCN.Name + }, 20*time.Second, 2*time.Second).Should(o.BeTrue(), fmt.Sprintf("Could not get MCN for node %v", node.Name)) + + // Check pool name in MCN matches node MCP association + // Note: pool name should be default value of `worker` + framework.Logf("Waiting for node MCP to match pool name in MCN '%v' spec.", node.Name) + nodeMCP := "" + var ok bool + if _, ok = node.Labels["node-role.kubernetes.io/worker"]; ok { + nodeMCP = "worker" + } else { + return fmt.Errorf("node MCP association could be determined for node %v; node is not in default worker pool", node.Name) + } + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN pool name in spec matches node's MCP association + framework.Logf("Node MCP association: %v. MCN spec pool name: %v.", nodeMCP, newMCN.Spec.Pool.Name) + return newMCN.Spec.Pool.Name == nodeMCP + }, 1*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check desired config version matches for node and MCN spec config version + framework.Logf("Waiting for node desired config version to match desired config version in MCN '%v' spec.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN desired config version in spec matches node's desired config version + framework.Logf("Node desired config version: %v. MCN spec desired config version: %v.", nodeDesiredConfig, newMCN.Spec.ConfigVersion.Desired) + return newMCN.Spec.ConfigVersion.Desired == nodeDesiredConfig + }, 1*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check current config version matches for node and MCN status config version + framework.Logf("Waiting for node current config version to match current config version in MCN '%v' status.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN current config version in status matches node's current config version + framework.Logf("Node current config version: %v. MCN status current config version: %v.", nodeCurrentConfig, newMCN.Status.ConfigVersion.Current) + return newMCN.Status.ConfigVersion.Current == nodeCurrentConfig + }, 2*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check desired config version matches for node and MCN status config version + framework.Logf("Waiting for node desired config version to match desired config version in MCN '%v' status.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN desired config version in status matches node's desired config version + framework.Logf("Node desired config version: %v. MCN status desired config version: %v.", nodeDesiredConfig, newMCN.Status.ConfigVersion.Desired) + return newMCN.Status.ConfigVersion.Desired == nodeDesiredConfig + }, 2*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil +} + +// `ScaleMachineSetDown` will determine whether a MachineSet needs to be scaled and, if so, will +// scale it. A MachineSet needs to be scaled if its desired replica value does not match its +// current replica value. +func ScaleMachineSetDown(oc *exutil.CLI, machineSet machinev1beta1.MachineSet, desiredReplicaValue int, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Check if MachineSet needs to be scaled + if int(*machineSet.Spec.Replicas) == desiredReplicaValue { + framework.Logf("MachineSet '%v' does not need to be scaled. Current replica value %v matches desired replica value of %v.", machineSet.Name, *machineSet.Spec.Replicas, desiredReplicaValue) + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSet.Name, desiredReplicaValue) + return ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%d", desiredReplicaValue)) +} + +// `CleanupProvisionedMachine` scales down the replica count for a given MachineSet and checks whether the +// provisioned Machine provided is deleted. +func CleanupProvisionedMachine(oc *exutil.CLI, machineClient *machineclient.Clientset, machineSetName string, desiredReplicaValue int, + machineName string, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSetName, desiredReplicaValue) + scaleErr := ScaleMachineSet(oc, machineSetName, fmt.Sprintf("%d", desiredReplicaValue)) + if scaleErr != nil { + return scaleErr + } + + // Check that provisioned machine is deleted + return WaitForMachineToBeDeleted(machineClient, machineName) +} + +// `CleanupCreatedNode` scales down the replica count for a given MachineSet and checks whether the +// created Node provided is deleted. +func CleanupCreatedNode(oc *exutil.CLI, machineSetName string, desiredReplicaValue int, nodeName string, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSetName, desiredReplicaValue) + scaleErr := ScaleMachineSet(oc, machineSetName, fmt.Sprintf("%d", desiredReplicaValue)) + if scaleErr != nil { + return scaleErr + } + + // Check that created node is deleted + return WaitForNodeToBeDeleted(oc, nodeName) +} + +// `WaitForNodeToBeDeleted` waits up to 10 minutes for a node to be deleted (no longer exist) +func WaitForNodeToBeDeleted(oc *exutil.CLI, nodeName string) error { + o.Eventually(func() bool { + framework.Logf("Checking if node '%v' is deleted.", nodeName) + + // Check if node still exists + _, nodeErr := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if apierrors.IsNotFound(nodeErr) { + framework.Logf("Node '%v' has been deleted.", nodeName) + return true + } + if nodeErr != nil { + framework.Logf("Error trying to get node: %v.", nodeErr) + return false + } + + framework.Logf("Node '%v' still exists.", nodeName) + return false + }, 10*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil +} + +// `WaitForMCNToBeDeleted` up waits to 4 minutes for a MCN to be deleted (no longer exist) +func WaitForMCNToBeDeleted(clientSet *machineconfigclient.Clientset, mcnName string) error { + o.Eventually(func() bool { + framework.Logf("Check if MCN '%v' is deleted.", mcnName) + + // Check if MCN still exists + _, mcnErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + if apierrors.IsNotFound(mcnErr) { + framework.Logf("MCN '%v' has been deleted.", mcnName) + return true + } + if mcnErr != nil { + framework.Logf("Error trying to get MCN: '%v'.", mcnErr) + return false + } + + framework.Logf("MCN '%v' still exists.", mcnName) + return false + }, 4*time.Minute, 3*time.Second).Should(o.BeTrue()) + return nil +} + +// `WaitForMachineToBeDeleted` waits up to 10 minutes for a machine to be deleted (no longer exist) +func WaitForMachineToBeDeleted(machineClient *machineclient.Clientset, machineName string) error { + o.Eventually(func() bool { + framework.Logf("Checking if machine '%v' is deleted.", machineName) + + // Check if machine still exists + _, machineErr := machineClient.MachineV1beta1().Machines(mapiNamespace).Get(context.TODO(), machineName, metav1.GetOptions{}) + if apierrors.IsNotFound(machineErr) { + framework.Logf("Machine '%v' has been deleted.", machineName) + return true + } + if machineErr != nil { + framework.Logf("Error trying to get machine: %v.", machineErr) + return false + } + + framework.Logf("Machine '%v' still exists.", machineName) + return false + }, 10*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil +} + +// ExecCmdOnNodeWithError behaves like ExecCmdOnNode, with the exception that +// any errors are returned to the caller for inspection. This allows one to +// execute a command that is expected to fail; e.g., stat /nonexistant/file. +func ExecCmdOnNodeWithError(oc *exutil.CLI, node corev1.Node, subArgs ...string) (string, error) { + cmd, err := execCmdOnNode(oc, node, subArgs...) + if err != nil { + return "", err + } + + out, err := cmd.CombinedOutput() + return string(out), err +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func ExecCmdOnNode(oc *exutil.CLI, node corev1.Node, subArgs ...string) string { + cmd, err := execCmdOnNode(oc, node, subArgs...) + o.Expect(err).NotTo(o.HaveOccurred(), "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + cmd.Stderr = os.Stderr + + out, err := cmd.Output() + if err != nil { + // common err is that the mcd went down mid cmd. Re-try for good measure + cmd, err = execCmdOnNode(oc, node, subArgs...) + o.Expect(err).NotTo(o.HaveOccurred(), "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + out, err = cmd.Output() + + } + o.Expect(err).NotTo(o.HaveOccurred(), "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) + return string(out) +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func execCmdOnNode(oc *exutil.CLI, node corev1.Node, subArgs ...string) (*exec.Cmd, error) { + // Check for an oc binary in $PATH. + path, err := exec.LookPath("oc") + if err != nil { + return nil, fmt.Errorf("could not locate oc command: %w", err) + } + + mcd, err := mcdForNode(oc.AsAdmin().KubeClient(), &node) + if err != nil { + return nil, fmt.Errorf("could not get MCD for node %s: %w", node.Name, err) + } + + mcdName := mcd.ObjectMeta.Name + + entryPoint := path + args := []string{"rsh", + "-n", "openshift-machine-config-operator", + "-c", "machine-config-daemon", + mcdName} + args = append(args, subArgs...) + + cmd := exec.Command(entryPoint, args...) + return cmd, nil +} + +func mcdForNode(client kubernetes.Interface, node *corev1.Node) (*corev1.Pod, error) { + // find the MCD pod that has spec.nodeNAME = node.Name and get its name: + listOptions := metav1.ListOptions{ + FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String(), + } + listOptions.LabelSelector = labels.SelectorFromSet(labels.Set{"k8s-app": "machine-config-daemon"}).String() + + mcdList, err := client.CoreV1().Pods("openshift-machine-config-operator").List(context.TODO(), listOptions) + if err != nil { + return nil, err + } + if len(mcdList.Items) != 1 { + if len(mcdList.Items) == 0 { + return nil, fmt.Errorf("failed to find MCD for node %s", node.Name) + } + return nil, fmt.Errorf("too many (%d) MCDs for node %s", len(mcdList.Items), node.Name) + } + return &mcdList.Items[0], nil } // Get nodes from a Pool diff --git a/test/extended/machine_config/machine_config_node.go b/test/extended/machine_config/machine_config_node.go new file mode 100644 index 000000000000..599594ddced6 --- /dev/null +++ b/test/extended/machine_config/machine_config_node.go @@ -0,0 +1,517 @@ +package machine_config + +import ( + "bytes" + "context" + "fmt" + "os/exec" + "path/filepath" + "time" + + mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1" + machineclient "github.com/openshift/client-go/machine/clientset/versioned" + machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + worker = "worker" + master = "master" + custom = "infra" +) + +var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNodes]", func() { + defer g.GinkgoRecover() + var ( + MCOMachineConfigPoolBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfigpool") + MCOMachineConfigBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfig") + infraMCPFixture = filepath.Join(MCOMachineConfigPoolBaseDir, "infra-mcp.yaml") + customMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-infra-mc.yaml") + masterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-master-mc.yaml") + invalidWorkerMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-worker-invalid-mc.yaml") + invalidMasterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-master-invalid-mc.yaml") + oc = exutil.NewCLIWithoutNamespace("machine-config") + ) + + g.It("[Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]", func() { + if IsSingleNode(oc) { //handle SNO clusters + ValidateMCNPropertiesSNO(oc, infraMCPFixture) + } else { //handle standard, non-SNO, clusters + ValidateMCNProperties(oc, infraMCPFixture) + } + }) + + g.It("[Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]", func() { + if IsSingleNode(oc) { + ValidateMCNConditionTransitionsSNO(oc, masterMCFixture) + } else { + ValidateMCNConditionTransitions(oc, customMCFixture, infraMCPFixture) + } + }) + + g.It("[Serial][Slow]Should properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]", func() { + if IsSingleNode(oc) { //handle SNO clusters + ValidateMCNConditionOnNodeDegrade(oc, invalidMasterMCFixture, true) + } else { //handle standard, non-SNO, clusters + ValidateMCNConditionOnNodeDegrade(oc, invalidWorkerMCFixture, false) + } + }) + + g.It("[Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNOnNodeCreationAndDeletion(oc) + }) + + g.It("Should properly block MCN updates from a MCD that is not the associated one [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNScopeSadPathTest(oc) + }) + + g.It("Should properly block MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNScopeImpersonationPathTest(oc) + }) + + g.It("Should properly update the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNScopeHappyPathTest(oc) + }) +}) + +// `ValidateMCNProperties` checks that MCN properties match the corresponding node properties +// Note: This test case does not work for SNO clusters due to the cluster's one node assuming +// both the worker and master role since `GetRandomNode` selects nodes using node roles. Role +// matching is not necessarily synonymous with MCP association in edge cases, such as in SNO. +func ValidateMCNProperties(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab a random node from each default pool + workerNode := GetRandomNode(oc, worker) + o.Expect(workerNode.Name).NotTo(o.Equal(""), "Could not get a worker node.") + masterNode := GetRandomNode(oc, master) + o.Expect(masterNode.Name).NotTo(o.Equal(""), "Could not get a master node.") + + // Validate MCN for node in default `worker` pool + framework.Logf("Validating MCN properties for node in default '%v' pool.", worker) + mcnErr := ValidateMCNForNodeInPool(oc, clientSet, workerNode, worker) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in default pool '%v'.", worker)) + + // Validate MCN for node in default `master` pool + framework.Logf("Validating MCN properties for node in default '%v' pool.", master) + mcnErr = ValidateMCNForNodeInPool(oc, clientSet, masterNode, master) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in default pool '%v'.", master)) + + // Cleanup custom MCP on test completion or failure + defer func() { + // Get starting state of default worker MCP + workerMcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), worker, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Could not get worker MCP.") + workerMcpReadyMachines := workerMcp.Status.ReadyMachineCount + + // Unlabel node + framework.Logf("Removing label node-role.kubernetes.io/%v from node %v", custom, workerNode.Name) + unlabelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s-", custom)).Execute() + o.Expect(unlabelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not remove label 'node-role.kubernetes.io/%s' from node '%v'.", custom, workerNode.Name)) + + // Wait for infra pool to report no nodes & for worker MCP to be ready + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", custom, 0) + WaitForMCPToBeReady(oc, clientSet, custom, 0) + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", worker, workerMcpReadyMachines+1) + WaitForMCPToBeReady(oc, clientSet, worker, workerMcpReadyMachines+1) + + // Delete custom MCP + framework.Logf("Deleting MCP %v", custom) + deleteMCPErr := oc.Run("delete").Args("mcp", custom).Execute() + o.Expect(deleteMCPErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting MCP '%v': %v", custom, deleteMCPErr)) + }() + + // Apply the fixture to create a custom MCP called "infra" & label the worker node accordingly + mcpErr := oc.Run("apply").Args("-f", fixture).Execute() + o.Expect(mcpErr).NotTo(o.HaveOccurred(), "Could not create custom MCP.") + labelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s=", custom)).Execute() + o.Expect(labelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not add label 'node-role.kubernetes.io/%s' to node '%v'.", custom, workerNode.Name)) + + // Wait for the custom pool to be updated with the node ready + framework.Logf("Waiting for '%v' MCP to be updated with %v ready machines.", custom, 1) + WaitForMCPToBeReady(oc, clientSet, custom, 1) + + // Get node in custom pool + customNodes, customNodeErr := GetNodesByRole(oc, custom) + o.Expect(customNodeErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not get node in MCP '%v'.", custom)) + customNode := customNodes[0] + + // Validate MCN for node in custom pool + framework.Logf("Validating MCN properties for node in custom '%v' pool.", custom) + mcnErr = ValidateMCNForNodeInPool(oc, clientSet, customNode, custom) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in custom pool '%v'.", custom)) +} + +// `ValidateMCNPropertiesSNO` checks that MCN properties match the corresponding node properties +// specifically for SNO clusters. Note that this test does not include creating a custom MCP, as +// the default SNO node remains part of the master pool. +func ValidateMCNPropertiesSNO(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab the cluster's node + node := GetRandomNode(oc, master) + o.Expect(node.Name).NotTo(o.Equal(""), "Could not get a worker node.") + + // Validate MCN for the cluster's node + framework.Logf("Validating MCN properties for the node in pool '%v'.", master) + mcnErr := ValidateMCNForNodeInPool(oc, clientSet, node, master) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties for the node in pool '%v'.", master)) +} + +// `ValidateMCNConditionTransitions` checks that Conditions properly update on a node update +// Note that a custom MCP is created for this test to limit the number of upgrading nodes & +// decrease cleanup time. +func ValidateMCNConditionTransitions(oc *exutil.CLI, mcFixture string, mcpFixture string) { + poolName := custom + mcName := fmt.Sprintf("90-%v-testfile", poolName) + + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab a random worker node + workerNode := GetRandomNode(oc, worker) + o.Expect(workerNode.Name).NotTo(o.Equal(""), "Could not get a worker node.") + + // Cleanup custom MCP and delete MC on failure or test completion + defer func() { + // Get starting state of default worker MCP + workerMcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), worker, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Could not get worker MCP.") + workerMcpReadyMachines := workerMcp.Status.ReadyMachineCount + + // Unlabel node + framework.Logf("Removing label node-role.kubernetes.io/%v from node %v", custom, workerNode.Name) + unlabelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s-", custom)).Execute() + o.Expect(unlabelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not remove label 'node-role.kubernetes.io/%s' from node '%v'.", custom, workerNode.Name)) + + // Wait for infra MCP to report no ready nodes + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", custom, 0) + WaitForMCPToBeReady(oc, clientSet, custom, 0) + + // Delete applied MC + deleteMCErr := oc.Run("delete").Args("machineconfig", mcName).Execute() + o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not delete MachineConfig '%v'.", mcName)) + + // Wait for worker MCP to be ready + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", worker, workerMcpReadyMachines+1) + WaitForMCPToBeReady(oc, clientSet, worker, workerMcpReadyMachines+1) + + // Delete custom MCP + framework.Logf("Deleting MCP %v", custom) + deleteMCPErr := oc.Run("delete").Args("mcp", custom).Execute() + o.Expect(deleteMCPErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting MCP '%v': %v", custom, deleteMCPErr)) + }() + + // Apply the fixture to create a custom MCP called "infra" & label the worker node accordingly + mcpErr := oc.Run("apply").Args("-f", mcpFixture).Execute() + o.Expect(mcpErr).NotTo(o.HaveOccurred(), "Could not create custom MCP.") + labelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s=", custom)).Execute() + o.Expect(labelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not add label 'node-role.kubernetes.io/%s' to node '%v'.", custom, workerNode.Name)) + + // Apply MC targeting custom pool node + mcErr := oc.Run("apply").Args("-f", mcFixture).Execute() + o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") + updatingNodeName := workerNode.Name + + // Validate transition through conditions for MCN + validateTransitionThroughConditions(clientSet, updatingNodeName) + + // When an update is complete, all conditions other than `Updated` must be false + framework.Logf("Checking all conditions other than 'Updated' are False.") + o.Expect(ConfirmUpdatedMCNStatus(clientSet, updatingNodeName)).Should(o.BeTrue(), "Error, all conditions must be 'False' when Updated=True.") +} + +// `ValidateMCNConditionTransitionsSNO` checks that Conditions properly update on a node update +// in Single Node Openshift +func ValidateMCNConditionTransitionsSNO(oc *exutil.CLI, mcFixture string) { + poolName := master + mcName := fmt.Sprintf("90-%v-testfile", poolName) + + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Delete MC on failure or test completion + defer func() { + deleteMCErr := oc.Run("delete").Args("machineconfig", mcName).Execute() + o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not delete MachineConfig '%v'.", mcName)) + }() + + // Apply MC targeting worker node + mcErr := oc.Run("apply").Args("-f", mcFixture).Execute() + o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") + + // Get the first updating node + updatingNodes := GetCordonedNodes(oc, poolName) + o.Expect(len(updatingNodes) > 0).Should(o.BeTrue(), fmt.Sprintf("No ready nodes found for MCP '%v'.", poolName)) + updatingNode := updatingNodes[0] + + // Validate transition through conditions for MCN + validateTransitionThroughConditions(clientSet, updatingNode.Name) + + // When an update is complete, all conditions other than `Updated` must be false + framework.Logf("Checking all conditions other than 'Updated' are False.") + o.Expect(ConfirmUpdatedMCNStatus(clientSet, updatingNode.Name)).Should(o.BeTrue(), "Error, all conditions must be 'False' when Updated=True.") +} + +// `validateTransitionThroughConditions` validates the condition trasnitions in the MCN during a node update +func validateTransitionThroughConditions(clientSet *machineconfigclient.Clientset, updatingNodeName string) { + // Note that some conditions are passed through quickly in a node update, so the test can + // "miss" catching the phases. For test stability, if we fail to catch an "Unknown" status, + // a warning will be logged instead of erroring out the test. + framework.Logf("Waiting for Updated=False") + err := WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionFalse, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=False.") + framework.Logf("Waiting for UpdatePrepared=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdatePrepared, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdatePrepared=True.") + framework.Logf("Waiting for UpdateExecuted=Unknown") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect UpdateExecuted=Unknown.") + } + framework.Logf("Waiting for Cordoned=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateCordoned, metav1.ConditionTrue, 30*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Cordoned=True.") + framework.Logf("Waiting for Drained=Unknown") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect Drained=Unknown.") + } + framework.Logf("Waiting for Drained=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionTrue, 4*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Drained=True.") + framework.Logf("Waiting for AppliedFilesAndOS=Unknown") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect AppliedFilesAndOS=Unknown.") + } + framework.Logf("Waiting for AppliedFilesAndOS=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionTrue, 3*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect AppliedFilesAndOS=True.") + framework.Logf("Waiting for UpdateExecuted=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionTrue, 20*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateExecuted=True.") + framework.Logf("Waiting for RebootedNode=Unknown") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect RebootedNode=Unknown.") + } + framework.Logf("Waiting for RebootedNode=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionTrue, 6*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect RebootedNode=True.") + framework.Logf("Waiting for Resumed=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeResumed, metav1.ConditionTrue, 15*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Resumed=True.") + framework.Logf("Waiting for UpdateComplete=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateComplete, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateComplete=True.") + framework.Logf("Waiting for Uncordoned=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateUncordoned, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Uncordoned=True.") + framework.Logf("Waiting for Updated=True") + err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=True.") +} + +// `ValidateMCNConditionOnNodeDegrade` checks that Conditions properly update on a node failure (MCP degrade) +func ValidateMCNConditionOnNodeDegrade(oc *exutil.CLI, fixture string, isSno bool) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // In SNO, master pool will degrade + poolName := worker + mcName := "91-worker-testfile-invalid" + if isSno { + poolName = master + mcName = "91-master-testfile-invalid" + } + + // Cleanup MC and fix node degradation on failure or test completion + defer func() { + // Delete the applied MC + deleteMCErr := oc.Run("delete").Args("machineconfig", mcName).Execute() + o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not delete MachineConfig '%v'.", mcName)) + + // Recover the degraded MCP + recoverErr := RecoverFromDegraded(oc, poolName) + o.Expect(recoverErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not recover MCP '%v' from degraded state.", poolName)) + }() + + // Apply invalid MC + mcErr := oc.Run("apply").Args("-f", fixture).Execute() + o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") + + // Wait for MCP to be in a degraded state with one degraded machine + degradedErr := WaitForMCPConditionStatus(oc, poolName, "Degraded", corev1.ConditionTrue, 8*time.Minute, 3*time.Second) + o.Expect(degradedErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error waiting for '%v' MCP to be in a degraded state.", poolName)) + mcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting '%v' MCP.", poolName)) + o.Expect(mcp.Status.DegradedMachineCount).To(o.BeNumerically("==", 1), fmt.Sprintf("Degraded machine count is not 1. It is %v.", mcp.Status.DegradedMachineCount)) + + // Get degraded node + degradedNode, degradedNodeErr := GetDegradedNode(oc, poolName) + o.Expect(degradedNodeErr).NotTo(o.HaveOccurred(), "Could not get degraded node.") + + // Validate MCN of degraded node + degradedNodeMCN, degradedErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), degradedNode.Name, metav1.GetOptions{}) + o.Expect(degradedErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting MCN of degraded node '%v'.", degradedNode.Name)) + framework.Logf("Validating that `AppliedFilesAndOS` and `UpdateExecuted` conditions in '%v' MCN have a status of 'Unknown'.", degradedNodeMCN.Name) + o.Expect(CheckMCNConditionStatus(degradedNodeMCN, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown)).Should(o.BeTrue(), "Condition 'AppliedFilesAndOS' does not have the expected status of 'Unknown'.") + o.Expect(CheckMCNConditionStatus(degradedNodeMCN, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown)).Should(o.BeTrue(), "Condition 'UpdateExecuted' does not have the expected status of 'Unknown'.") +} + +// `ValidateMCNProperties` checks that MCNs with correct properties are created on node creation +// and deleted on node deletion +func ValidateMCNOnNodeCreationAndDeletion(oc *exutil.CLI) { + cleanupCompleted := false + + // Create machine client for test + machineClient, machineErr := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(machineErr).NotTo(o.HaveOccurred(), "Error creating machine client for test.") + + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Skip test if worker nodes cannot be scaled + canBeScaled, canScaleErr := WorkersCanBeScaled(oc, machineClient) + o.Expect(canScaleErr).NotTo(o.HaveOccurred(), "Error occured when determining whether worker nodes can be scaled.") + if !canBeScaled { + g.Skip("Worker nodes cannot be scaled using MachineSets. This test cannot be executed if workers cannot be scaled via MachineSets.") + } + + // Get MachineSet for test + framework.Logf("Getting MachineSet for testing.") + machineSet := getRandomMachineSet(machineClient) + framework.Logf("MachineSet '%s' will be used for testing", machineSet.Name) + originalReplica := int(*machineSet.Spec.Replicas) + + // Create node by scaling MachineSet + framework.Logf("Scaling up MachineSet to create node.") + updatedReplica := originalReplica + 1 + scaleErr := ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%d", updatedReplica)) + o.Expect(scaleErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error scaling MachineSet %v to replica value %v.", machineSet.Name, updatedReplica)) + + // If we fail at this point, cleanup should include scaling the MachineSet replica back down to the + // original value, when needed (in the case where the replica value patch was successful). + defer func() { + cleanupErr := ScaleMachineSetDown(oc, machineSet, originalReplica, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error cleaning up cluster by scaling down MachineSet '%v'.", machineSet.Name)) + cleanupCompleted = true + }() + + // Get the new machine + framework.Logf("Getting the new machine.") + provisioningMachine, provisioningMachineErr := GetMachinesByPhase(machineClient, machineSet.Name, "Provisioning") + o.Expect(provisioningMachineErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Cannot find provisioning machine in MachineSet %v", machineSet.Name)) + newMachineName := provisioningMachine.Name + + // If we fail past this point, cleanup should include scaling the MachineSet replica back down to the + // original value and ensuring that the newly provisioned Machine is deleted. + defer func() { + cleanupErr := CleanupProvisionedMachine(oc, machineClient, machineSet.Name, originalReplica, newMachineName, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error removing provisioned Machine '%v' by scaling down MachineSet '%v'.", newMachineName, machineSet.Name)) + cleanupCompleted = true + }() + + // Annotate the machine so it is deleted on the MachineSet scale down + framework.Logf("Updating delete-machine annotation on Machine '%v' to be 'true'.", newMachineName) + deleteAnnotationErr := UpdateDeleteMachineAnnotation(oc, newMachineName) + o.Expect(deleteAnnotationErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error updating delete-machine annotation for machine '%v'.", newMachineName)) + + // Wait for new Machine to be ready + framework.Logf("Waiting for new machine %v to be ready.", newMachineName) + WaitForMachineInState(machineClient, newMachineName, "Running") + + // Get the new node + framework.Logf("Getting new node in machine %v.", newMachineName) + newNode, nodeErr := GetNewReadyNodeInMachine(oc, newMachineName) + o.Expect(nodeErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Cannot find provisioning node in Machine %v", newMachineName)) + framework.Logf("Got new node: %v.", newNode.Name) + + // If we fail past this point, cleanup should include scaling the MachineSet replica back down to the + // original value and ensuring that the newly created Node is deleted. + defer func() { + cleanupErr := CleanupCreatedNode(oc, newMachineName, originalReplica, newNode.Name, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error removing created Node '%v' by scaling down MachineSet '%v'.", newNode.Name, machineSet.Name)) + cleanupCompleted = true + }() + + // Validate new MCN + validMCNErr := WaitForValidMCNProperties(clientSet, newNode) + o.Expect(validMCNErr).NotTo(o.HaveOccurred(), fmt.Sprintf("MCN for node '%v' has invalid properties.", newNode)) + + // Scale down the MachineSet to delete the created node + framework.Logf("Scaling down MachineSet to delete node.") + scaleErr = ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%v", originalReplica)) + o.Expect(scaleErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting node by scaling MachineSet %v to replica value %v.", machineSet.Name, originalReplica)) + + // Wait for created node to delete + framework.Logf("Waiting for node '%v' to be deleted.", newNode.Name) + o.Expect(WaitForNodeToBeDeleted(oc, newNode.Name)).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting node '%v'.", newNode.Name)) + + // Check that corresponding MCN is removed alongside node + o.Expect(WaitForMCNToBeDeleted(clientSet, newNode.Name)).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting MCN '%v'.", newNode.Name)) + + // If we successfully make it here, no cleanup is required + cleanupCompleted = true +} + +// `ValidateMCNScopeSadPathTest` checks that MCN updates from a MCD that is not the associated one are blocked +func ValidateMCNScopeSadPathTest(oc *exutil.CLI) { + // Grab two random nodes from different pools, so we don't end up testing and targeting the same node. + nodeUnderTest := GetRandomNode(oc, "worker") + targetNode := GetRandomNode(oc, "master") + + // Attempt to patch the MCN owned by targetNode from nodeUnderTest's MCD. This should fail. + // This oc command effectively use the service account of the nodeUnderTest's MCD pod, which should only be able to edit nodeUnderTest's MCN. + cmdOutput, err := ExecCmdOnNodeWithError(oc, nodeUnderTest, "chroot", "/rootfs", "oc", "patch", "machineconfignodes", targetNode.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}") + + o.Expect(err).To(o.HaveOccurred()) + o.Expect(cmdOutput).To(o.ContainSubstring("updates to MCN " + targetNode.Name + " can only be done from the MCN's owner node")) +} + +// `ValidateMCNScopeSadPathTest` checks that MCN updates by impersonation of the MCD SA are blocked +func ValidateMCNScopeImpersonationPathTest(oc *exutil.CLI) { + // Grab a random node from the worker pool + nodeUnderTest := GetRandomNode(oc, "worker") + + var errb bytes.Buffer + // Attempt to patch the MCN owned by nodeUnderTest by impersonating the MCD SA. This should fail. + cmd := exec.Command("oc", "patch", "machineconfignodes", nodeUnderTest.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}", "--as=system:serviceaccount:openshift-machine-config-operator:machine-config-daemon") + cmd.Stderr = &errb + err := cmd.Run() + + o.Expect(err).To(o.HaveOccurred()) + o.Expect(errb.String()).To(o.ContainSubstring("this user must have a \"authentication.kubernetes.io/node-name\" claim")) + +} + +// `ValidateMCNScopeSadPathTest` checks that MCN updates from the associated MCD are allowed +func ValidateMCNScopeHappyPathTest(oc *exutil.CLI) { + + // Grab a random node from the worker pool + nodeUnderTest := GetRandomNode(oc, "worker") + + // Attempt to patch the MCN owned by nodeUnderTest from nodeUnderTest's MCD. This should succeed. + // This oc command effectively use the service account of the nodeUnderTest's MCD pod, which should only be able to edit nodeUnderTest's MCN. + ExecCmdOnNode(oc, nodeUnderTest, "chroot", "/rootfs", "oc", "patch", "machineconfignodes", nodeUnderTest.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}") +} diff --git a/test/extended/testdata/bindata.go b/test/extended/testdata/bindata.go index 11bb4b735612..50b94d79d11a 100644 --- a/test/extended/testdata/bindata.go +++ b/test/extended/testdata/bindata.go @@ -420,7 +420,12 @@ // test/extended/testdata/long_names/Dockerfile // test/extended/testdata/long_names/fixture.json // test/extended/testdata/machine_config/kubeletconfig/gcKC.yaml +// test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml +// test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml +// test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml +// test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml // test/extended/testdata/machine_config/machineconfigpool/customMCP.yaml +// test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-all.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml @@ -49056,6 +49061,138 @@ func testExtendedTestdataMachine_configKubeletconfigGckcYaml() (*asset, error) { return a, nil } +var _testExtendedTestdataMachine_configMachineconfig0InfraMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: infra + name: 90-infra-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test +`) + +func testExtendedTestdataMachine_configMachineconfig0InfraMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig0InfraMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig0InfraMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig0InfraMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfig0MasterMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 90-master-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test +`) + +func testExtendedTestdataMachine_configMachineconfig0MasterMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig0MasterMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig0MasterMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 91-master-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core +`) + +func testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 91-worker-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core +`) + +func testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + var _testExtendedTestdataMachine_configMachineconfigpoolCustommcpYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 kind: MachineConfigPool metadata: @@ -49086,6 +49223,34 @@ func testExtendedTestdataMachine_configMachineconfigpoolCustommcpYaml() (*asset, return a, nil } +var _testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: infra +spec: + machineConfigSelector: + matchExpressions: + - {key: machineconfiguration.openshift.io/role, operator: In, values: [worker,infra]} + nodeSelector: + matchLabels: + node-role.kubernetes.io/infra: "" +`) + +func testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + var _testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml = []byte(`apiVersion: operator.openshift.io/v1 kind: MachineConfiguration metadata: @@ -55971,7 +56136,12 @@ var _bindata = map[string]func() (*asset, error){ "test/extended/testdata/long_names/Dockerfile": testExtendedTestdataLong_namesDockerfile, "test/extended/testdata/long_names/fixture.json": testExtendedTestdataLong_namesFixtureJson, "test/extended/testdata/machine_config/kubeletconfig/gcKC.yaml": testExtendedTestdataMachine_configKubeletconfigGckcYaml, + "test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml": testExtendedTestdataMachine_configMachineconfig0InfraMcYaml, + "test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml": testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, + "test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml": testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, + "test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml": testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, "test/extended/testdata/machine_config/machineconfigpool/customMCP.yaml": testExtendedTestdataMachine_configMachineconfigpoolCustommcpYaml, + "test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml": testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-all.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesNoneYaml, @@ -56726,8 +56896,15 @@ var _bintree = &bintree{nil, map[string]*bintree{ "kubeletconfig": {nil, map[string]*bintree{ "gcKC.yaml": {testExtendedTestdataMachine_configKubeletconfigGckcYaml, map[string]*bintree{}}, }}, + "machineconfig": {nil, map[string]*bintree{ + "0-infra-mc.yaml": {testExtendedTestdataMachine_configMachineconfig0InfraMcYaml, map[string]*bintree{}}, + "0-master-mc.yaml": {testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, map[string]*bintree{}}, + "1-master-invalid-mc.yaml": {testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, map[string]*bintree{}}, + "1-worker-invalid-mc.yaml": {testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, map[string]*bintree{}}, + }}, "machineconfigpool": {nil, map[string]*bintree{ "customMCP.yaml": {testExtendedTestdataMachine_configMachineconfigpoolCustommcpYaml, map[string]*bintree{}}, + "infra-mcp.yaml": {testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, map[string]*bintree{}}, }}, "machineconfigurations": {nil, map[string]*bintree{ "managedbootimages-all.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, map[string]*bintree{}}, diff --git a/test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml b/test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml new file mode 100644 index 000000000000..7b2518da6dc0 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/0-infra-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: infra + name: 90-infra-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test diff --git a/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml b/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml new file mode 100644 index 000000000000..b52938475f63 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 90-master-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test diff --git a/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml b/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml new file mode 100644 index 000000000000..9ca3c4b90859 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 91-master-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core diff --git a/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml b/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml new file mode 100644 index 000000000000..489717e42fed --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 91-worker-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core diff --git a/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml b/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml new file mode 100644 index 000000000000..afc03a26c2e2 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml @@ -0,0 +1,11 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: infra +spec: + machineConfigSelector: + matchExpressions: + - {key: machineconfiguration.openshift.io/role, operator: In, values: [worker,infra]} + nodeSelector: + matchLabels: + node-role.kubernetes.io/infra: "" diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 6f9f3ccd5156..4b127893e02a 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -1355,6 +1355,20 @@ var Annotations = map[string]string{ "[sig-kubevirt] services when running openshift cluster on KubeVirt virtual machines should allow direct connections to pods from guest cluster pod in pod network across different guest nodes": " [Suite:openshift/conformance/parallel]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block MCN updates from a MCD that is not the associated one [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly update the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]": "", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]": "", + "[sig-mco][OCPFeatureGate:ManagedBootImagesAWS][Serial] Should degrade on a MachineSet with an OwnerReference [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", "[sig-mco][OCPFeatureGate:ManagedBootImagesAWS][Serial] Should not update boot images on any MachineSet when not configured [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", diff --git a/zz_generated.manifests/test-reporting.yaml b/zz_generated.manifests/test-reporting.yaml index 81dd41796392..8ce161275ada 100644 --- a/zz_generated.manifests/test-reporting.yaml +++ b/zz_generated.manifests/test-reporting.yaml @@ -114,6 +114,20 @@ spec: on desired.architecture field in the CV [apigroup:image.openshift.io]' - featureGate: MachineConfigNodes tests: + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block + MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block + MCN updates from a MCD that is not the associated one [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly update + the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have + MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly + transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should + properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should + properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:PinnedImages][OCPFeatureGate:MachineConfigNodes][Serial] All Nodes in a Custom Pool should have the PinnedImages in PIS [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:PinnedImages][OCPFeatureGate:MachineConfigNodes][Serial] From 6f7469ee5ac5b1a1009de7829e0e5303997c51a7 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Mon, 14 Apr 2025 13:32:52 -0400 Subject: [PATCH 3/3] mco-1596: change node update to be rebootless & mco-1595: divide tests for default and custom mcp cases --- test/extended/machine_config/OWNERS | 2 +- .../boot_image_update_agnostic.go | 8 +- .../machine_config/boot_image_update_aws.go | 2 +- .../machine_config/boot_image_update_gcp.go | 2 +- test/extended/machine_config/helpers.go | 71 +++--- .../machine_config/machine_config_node.go | 217 ++++++++++++------ test/extended/testdata/bindata.go | 39 +++- .../nodedisruptionpolicy-rebootless-path.yaml | 11 + .../generated/zz_generated.annotations.go | 6 +- zz_generated.manifests/test-reporting.yaml | 7 +- 10 files changed, 252 insertions(+), 113 deletions(-) create mode 100644 test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml diff --git a/test/extended/machine_config/OWNERS b/test/extended/machine_config/OWNERS index 87dcdd07da38..179fe20355af 100644 --- a/test/extended/machine_config/OWNERS +++ b/test/extended/machine_config/OWNERS @@ -17,4 +17,4 @@ reviewers: - LorbusChris - RishabhSaini - isabella-janssen - - pablintino \ No newline at end of file + - pablintino diff --git a/test/extended/machine_config/boot_image_update_agnostic.go b/test/extended/machine_config/boot_image_update_agnostic.go index 3116176337ad..be6623a193b0 100644 --- a/test/extended/machine_config/boot_image_update_agnostic.go +++ b/test/extended/machine_config/boot_image_update_agnostic.go @@ -18,7 +18,7 @@ import ( func AllMachineSetTest(oc *exutil.CLI, fixture string) { // This fixture applies a boot image update configuration that opts in all machinesets - ApplyBootImageFixture(oc, fixture) + ApplyMachineConfigurationFixture(oc, fixture) // Step through all machinesets and verify boot images are reconciled correctly. machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) @@ -34,7 +34,7 @@ func AllMachineSetTest(oc *exutil.CLI, fixture string) { func PartialMachineSetTest(oc *exutil.CLI, fixture string) { // This fixture applies a boot image update configuration that opts in any machineset with the label test=boot - ApplyBootImageFixture(oc, fixture) + ApplyMachineConfigurationFixture(oc, fixture) // Pick a random machineset to test machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) @@ -61,7 +61,7 @@ func PartialMachineSetTest(oc *exutil.CLI, fixture string) { func NoneMachineSetTest(oc *exutil.CLI, fixture string) { // This fixture applies a boot image update configuration that opts in no machinesets, i.e. feature is disabled. - ApplyBootImageFixture(oc, fixture) + ApplyMachineConfigurationFixture(oc, fixture) // Step through all machinesets and verify boot images are reconciled correctly. machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) @@ -77,7 +77,7 @@ func NoneMachineSetTest(oc *exutil.CLI, fixture string) { func DegradeOnOwnerRefTest(oc *exutil.CLI, fixture string) { e2eskipper.Skipf("This test is temporarily disabled until boot image skew enforcement is implemented") // This fixture applies a boot image update configuration that opts in all machinesets - ApplyBootImageFixture(oc, fixture) + ApplyMachineConfigurationFixture(oc, fixture) // Pick a random machineset to test machineClient, err := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) diff --git a/test/extended/machine_config/boot_image_update_aws.go b/test/extended/machine_config/boot_image_update_aws.go index 4d2b5beba3fc..64efca62e9e2 100644 --- a/test/extended/machine_config/boot_image_update_aws.go +++ b/test/extended/machine_config/boot_image_update_aws.go @@ -32,7 +32,7 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:ManagedBootImagesAWS][Serial]", fun }) g.AfterEach(func() { - ApplyBootImageFixture(oc, emptyMachineSetFixture) + ApplyMachineConfigurationFixture(oc, emptyMachineSetFixture) }) g.It("Should update boot images only on MachineSets that are opted in [apigroup:machineconfiguration.openshift.io]", func() { diff --git a/test/extended/machine_config/boot_image_update_gcp.go b/test/extended/machine_config/boot_image_update_gcp.go index f2e09bbac303..43e3ed3b6624 100644 --- a/test/extended/machine_config/boot_image_update_gcp.go +++ b/test/extended/machine_config/boot_image_update_gcp.go @@ -33,7 +33,7 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:ManagedBootImages][Serial]", func() g.AfterEach(func() { // Clear out boot image configuration between tests - ApplyBootImageFixture(oc, emptyMachineSetFixture) + ApplyMachineConfigurationFixture(oc, emptyMachineSetFixture) }) g.It("Should update boot images only on MachineSets that are opted in [apigroup:machineconfiguration.openshift.io]", func() { diff --git a/test/extended/machine_config/helpers.go b/test/extended/machine_config/helpers.go index c6085765492d..d05a782fc0f2 100644 --- a/test/extended/machine_config/helpers.go +++ b/test/extended/machine_config/helpers.go @@ -12,6 +12,7 @@ import ( "time" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" osconfigv1 "github.com/openshift/api/config/v1" @@ -328,7 +329,7 @@ func WaitForOneMasterNodeToBeReady(oc *exutil.CLI) error { } // Applies a boot image fixture and waits for the MCO to reconcile the status -func ApplyBootImageFixture(oc *exutil.CLI, fixture string) { +func ApplyMachineConfigurationFixture(oc *exutil.CLI, fixture string) { err := oc.Run("apply").Args("-f", fixture).Execute() o.Expect(err).NotTo(o.HaveOccurred()) @@ -498,30 +499,17 @@ func WaitForMCPToBeReady(oc *exutil.CLI, machineConfigClient *machineconfigclien }, 5*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for MCP '%v' to be in 'Updated' state with %v ready machines.", poolName, readyMachineCount) } -// `GetCordonedNodes` gets all cordoned nodes -// - If maxUnavailable > 1, this will return multiple cordoned nodes -// - If maxUnavailable == 1, this will return one cordoned node -func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node { +// `GetUpdatingNodeSNO` returns the SNO node when the `master` MCP of the cluster starts updating +func GetUpdatingNodeSNO(oc *exutil.CLI, mcpName string) corev1.Node { // Wait for the MCP to start updating o.Expect(WaitForMCPConditionStatus(oc, mcpName, mcfgv1.MachineConfigPoolUpdating, corev1.ConditionTrue, 3*time.Minute, 2*time.Second)).NotTo(o.HaveOccurred(), "Waiting for 'Updating' status change failed.") - // Get updating nodes - var allUpdatingNodes []corev1.Node - o.Eventually(func() bool { - nodes, nodeErr := GetNodesByRole(oc, mcpName) - o.Expect(nodeErr).NotTo(o.HaveOccurred(), "Error getting nodes from %v MCP.", mcpName) - o.Expect(nodes).ShouldNot(o.BeEmpty(), "No nodes found for %v MCP.", mcpName) - - for _, node := range nodes { - if node.Spec.Unschedulable { - allUpdatingNodes = append(allUpdatingNodes, node) - } - } + // SNO only has one node, so when the MCP is updating, the node is also updating + node, nodeErr := GetNodesByRole(oc, mcpName) + o.Expect(nodeErr).NotTo(o.HaveOccurred(), "Error getting nodes from %v MCP.", mcpName) + o.Expect(node).ShouldNot(o.BeEmpty(), "No nodes found for %v MCP.", mcpName) - return len(allUpdatingNodes) > 0 - }, 5*time.Minute, 10*time.Second).Should(o.BeTrue()) - - return allUpdatingNodes + return node[0] } // `WaitForMCPConditionStatus` waits up to the desired timeout for the desired MCP condition to match the desired status (ex. wait until "Updating" is "True") @@ -539,7 +527,7 @@ func WaitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcf return false } - // Loop through conditions to get check for desired condition type/status combonation + // Loop through conditions to get check for desired condition type/status combination conditions := mcp.Status.Conditions for _, condition := range conditions { if condition.Type == conditionType { @@ -554,19 +542,38 @@ func WaitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcf } // `WaitForMCNConditionStatus` waits up to a specified timeout for the desired MCN condition to match the desired status (ex. wait until "Updated" is "False") -func WaitForMCNConditionStatus(clientSet *machineconfigclient.Clientset, mcnName string, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus, timeout time.Duration, interval time.Duration) error { - o.Eventually(func() bool { +func WaitForMCNConditionStatus(clientSet *machineconfigclient.Clientset, mcnName string, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus, + timeout time.Duration, interval time.Duration) (bool, error) { + + conditionMet := false + var conditionErr error + var workerNodeMCN *mcfgv1alpha1.MachineConfigNode + if err := wait.PollUntilContextTimeout(context.TODO(), interval, timeout, true, func(_ context.Context) (bool, error) { framework.Logf("Waiting for MCN '%v' %v condition to be %v.", mcnName, conditionType, status) - // Get MCN & check if the MCN condition status matches the desired status - workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) - if workerErr != nil { - framework.Logf("Error getting MCN for node '%v': %v", mcnName, workerErr) - return false + workerNodeMCN, conditionErr = clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + // Record if an error occurs when getting the MCN resource + if conditionErr != nil { + framework.Logf("Error getting MCN for node '%v': %v", mcnName, conditionErr) + return false, nil } - return CheckMCNConditionStatus(workerNodeMCN, conditionType, status) - }, timeout, interval).Should(o.BeTrue()) - return nil + + // Check if the MCN status is as desired + conditionMet = CheckMCNConditionStatus(workerNodeMCN, conditionType, status) + return conditionMet, nil + }); err != nil { + framework.Logf("The desired MCN condition was never met: %v", err) + // Handle the situation where there were errors getting the MCN resource + if conditionErr != nil { + framework.Logf("An error occured waiting for MCN '%v' %v condition to be %v: %v", mcnName, conditionType, status, conditionErr) + return conditionMet, fmt.Errorf("MCN '%v' %v condition was not %v: %v", mcnName, conditionType, status, conditionErr) + } + // Handle case when no errors occur grabbing the MCN, but we time out waiting for the condition to be in the desired state + framework.Logf("A timeout occured waiting for MCN '%v' %v condition was not %v.", mcnName, conditionType, status) + return conditionMet, nil + } + + return conditionMet, conditionErr } // `CheckMCNConditionStatus` checks that an MCN condition matches the desired status (ex. confirm "Updated" is "False") diff --git a/test/extended/machine_config/machine_config_node.go b/test/extended/machine_config/machine_config_node.go index 599594ddced6..642bdc032d87 100644 --- a/test/extended/machine_config/machine_config_node.go +++ b/test/extended/machine_config/machine_config_node.go @@ -30,29 +30,37 @@ const ( var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNodes]", func() { defer g.GinkgoRecover() var ( - MCOMachineConfigPoolBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfigpool") - MCOMachineConfigBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfig") - infraMCPFixture = filepath.Join(MCOMachineConfigPoolBaseDir, "infra-mcp.yaml") - customMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-infra-mc.yaml") - masterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-master-mc.yaml") - invalidWorkerMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-worker-invalid-mc.yaml") - invalidMasterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-master-invalid-mc.yaml") - oc = exutil.NewCLIWithoutNamespace("machine-config") + MCOMachineConfigPoolBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfigpool") + MCOMachineConfigurationBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfigurations") + MCOMachineConfigBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfig") + infraMCPFixture = filepath.Join(MCOMachineConfigPoolBaseDir, "infra-mcp.yaml") + nodeDisruptionFixture = filepath.Join(MCOMachineConfigurationBaseDir, "nodedisruptionpolicy-rebootless-path.yaml") + nodeDisruptionEmptyFixture = filepath.Join(MCOMachineConfigurationBaseDir, "managedbootimages-empty.yaml") + customMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-infra-mc.yaml") + masterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-master-mc.yaml") + invalidWorkerMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-worker-invalid-mc.yaml") + invalidMasterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-master-invalid-mc.yaml") + oc = exutil.NewCLIWithoutNamespace("machine-config") ) - g.It("[Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]", func() { + g.It("Should have MCN properties matching associated node properties for nodes in default MCPs [apigroup:machineconfiguration.openshift.io]", func() { if IsSingleNode(oc) { //handle SNO clusters - ValidateMCNPropertiesSNO(oc, infraMCPFixture) + ValidateMCNPropertiesSNO(oc) } else { //handle standard, non-SNO, clusters - ValidateMCNProperties(oc, infraMCPFixture) + ValidateMCNPropertiesDefaultMCP(oc) } }) - g.It("[Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]", func() { + g.It("[Serial]Should have MCN properties matching associated node properties for nodes in custom MCPs [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNPropertiesCustomMCP(oc, infraMCPFixture) + }) + + g.It("[Serial]Should properly transition through MCN conditions on rebootless node update [apigroup:machineconfiguration.openshift.io]", func() { if IsSingleNode(oc) { - ValidateMCNConditionTransitionsSNO(oc, masterMCFixture) + ValidateMCNConditionTransitionsOnRebootlessUpdateSNO(oc, nodeDisruptionFixture, nodeDisruptionEmptyFixture, masterMCFixture) } else { - ValidateMCNConditionTransitions(oc, customMCFixture, infraMCPFixture) + ValidateMCNConditionTransitionsOnRebootlessUpdate(oc, nodeDisruptionFixture, nodeDisruptionEmptyFixture, customMCFixture, infraMCPFixture) } }) @@ -85,11 +93,12 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNodes]", func() { }) }) -// `ValidateMCNProperties` checks that MCN properties match the corresponding node properties +// `ValidateMCNPropertiesDefaultMCP` checks that MCN properties match the corresponding node +// properties for nodes in the default (worker & master) MCPs. // Note: This test case does not work for SNO clusters due to the cluster's one node assuming // both the worker and master role since `GetRandomNode` selects nodes using node roles. Role // matching is not necessarily synonymous with MCP association in edge cases, such as in SNO. -func ValidateMCNProperties(oc *exutil.CLI, fixture string) { +func ValidateMCNPropertiesDefaultMCP(oc *exutil.CLI) { // Create client set for test clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") @@ -109,6 +118,17 @@ func ValidateMCNProperties(oc *exutil.CLI, fixture string) { framework.Logf("Validating MCN properties for node in default '%v' pool.", master) mcnErr = ValidateMCNForNodeInPool(oc, clientSet, masterNode, master) o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in default pool '%v'.", master)) +} + +// `ValidateMCNPropertiesCustomMCP` checks that MCN properties match the corresponding node properties +func ValidateMCNPropertiesCustomMCP(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab a random node from each default pool + workerNode := GetRandomNode(oc, worker) + o.Expect(workerNode.Name).NotTo(o.Equal(""), "Could not get a worker node.") // Cleanup custom MCP on test completion or failure defer func() { @@ -151,14 +171,14 @@ func ValidateMCNProperties(oc *exutil.CLI, fixture string) { // Validate MCN for node in custom pool framework.Logf("Validating MCN properties for node in custom '%v' pool.", custom) - mcnErr = ValidateMCNForNodeInPool(oc, clientSet, customNode, custom) + mcnErr := ValidateMCNForNodeInPool(oc, clientSet, customNode, custom) o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in custom pool '%v'.", custom)) } // `ValidateMCNPropertiesSNO` checks that MCN properties match the corresponding node properties // specifically for SNO clusters. Note that this test does not include creating a custom MCP, as // the default SNO node remains part of the master pool. -func ValidateMCNPropertiesSNO(oc *exutil.CLI, fixture string) { +func ValidateMCNPropertiesSNO(oc *exutil.CLI) { // Create client set for test clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") @@ -176,7 +196,7 @@ func ValidateMCNPropertiesSNO(oc *exutil.CLI, fixture string) { // `ValidateMCNConditionTransitions` checks that Conditions properly update on a node update // Note that a custom MCP is created for this test to limit the number of upgrading nodes & // decrease cleanup time. -func ValidateMCNConditionTransitions(oc *exutil.CLI, mcFixture string, mcpFixture string) { +func ValidateMCNConditionTransitionsOnRebootlessUpdate(oc *exutil.CLI, nodeDisruptionFixture string, nodeDisruptionEmptyFixture string, mcFixture string, mcpFixture string) { poolName := custom mcName := fmt.Sprintf("90-%v-testfile", poolName) @@ -188,7 +208,17 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, mcFixture string, mcpFixtur workerNode := GetRandomNode(oc, worker) o.Expect(workerNode.Name).NotTo(o.Equal(""), "Could not get a worker node.") - // Cleanup custom MCP and delete MC on failure or test completion + // Remove node disruption policy on test completion or failure + defer func() { + // Apply empty MachineConfiguration fixture to remove previously set NodeDisruptionPolicy + framework.Logf("Removing node disruption policy.") + ApplyMachineConfigurationFixture(oc, nodeDisruptionEmptyFixture) + }() + + // Apply a node disruption policy to allow for rebootless update + ApplyMachineConfigurationFixture(oc, nodeDisruptionFixture) + + // Cleanup custom MCP, and delete MC on test completion or failure defer func() { // Get starting state of default worker MCP workerMcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), worker, metav1.GetOptions{}) @@ -230,7 +260,7 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, mcFixture string, mcpFixtur updatingNodeName := workerNode.Name // Validate transition through conditions for MCN - validateTransitionThroughConditions(clientSet, updatingNodeName) + validateTransitionThroughConditions(clientSet, updatingNodeName, true) // When an update is complete, all conditions other than `Updated` must be false framework.Logf("Checking all conditions other than 'Updated' are False.") @@ -239,7 +269,7 @@ func ValidateMCNConditionTransitions(oc *exutil.CLI, mcFixture string, mcpFixtur // `ValidateMCNConditionTransitionsSNO` checks that Conditions properly update on a node update // in Single Node Openshift -func ValidateMCNConditionTransitionsSNO(oc *exutil.CLI, mcFixture string) { +func ValidateMCNConditionTransitionsOnRebootlessUpdateSNO(oc *exutil.CLI, nodeDisruptionFixture string, nodeDisruptionEmptyFixture string, mcFixture string) { poolName := master mcName := fmt.Sprintf("90-%v-testfile", poolName) @@ -247,23 +277,39 @@ func ValidateMCNConditionTransitionsSNO(oc *exutil.CLI, mcFixture string) { clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") - // Delete MC on failure or test completion + // Remove node disruption policy on test completion or failure defer func() { + // Apply empty MachineConfiguration fixture to remove previously set NodeDisruptionPolicy + framework.Logf("Removing node disruption policy.") + ApplyMachineConfigurationFixture(oc, nodeDisruptionEmptyFixture) + }() + + // Apply a node disruption policy to allow for rebootless update + ApplyMachineConfigurationFixture(oc, nodeDisruptionFixture) + + // Delete applied MC on test completion or failure + defer func() { + // Delete applied MC + framework.Logf("Deleting MC '%v'.", mcName) deleteMCErr := oc.Run("delete").Args("machineconfig", mcName).Execute() o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not delete MachineConfig '%v'.", mcName)) + + // Wait for master MCP to be ready + time.Sleep(15 * time.Second) //wait to not catch the updated state before the deleted mc triggers an update + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", poolName, 1) + WaitForMCPToBeReady(oc, clientSet, poolName, 1) }() // Apply MC targeting worker node mcErr := oc.Run("apply").Args("-f", mcFixture).Execute() o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") - // Get the first updating node - updatingNodes := GetCordonedNodes(oc, poolName) - o.Expect(len(updatingNodes) > 0).Should(o.BeTrue(), fmt.Sprintf("No ready nodes found for MCP '%v'.", poolName)) - updatingNode := updatingNodes[0] + // Get the updating node + updatingNode := GetUpdatingNodeSNO(oc, poolName) + framework.Logf("Node '%v' is updating.", updatingNode.Name) // Validate transition through conditions for MCN - validateTransitionThroughConditions(clientSet, updatingNode.Name) + validateTransitionThroughConditions(clientSet, updatingNode.Name, true) // When an update is complete, all conditions other than `Updated` must be false framework.Logf("Checking all conditions other than 'Updated' are False.") @@ -271,63 +317,102 @@ func ValidateMCNConditionTransitionsSNO(oc *exutil.CLI, mcFixture string) { } // `validateTransitionThroughConditions` validates the condition trasnitions in the MCN during a node update -func validateTransitionThroughConditions(clientSet *machineconfigclient.Clientset, updatingNodeName string) { +func validateTransitionThroughConditions(clientSet *machineconfigclient.Clientset, updatingNodeName string, isRebootless bool) { // Note that some conditions are passed through quickly in a node update, so the test can // "miss" catching the phases. For test stability, if we fail to catch an "Unknown" status, // a warning will be logged instead of erroring out the test. framework.Logf("Waiting for Updated=False") - err := WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionFalse, 1*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=False.") + conditionMet, err := WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionFalse, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Updated=False: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Updated=False.") + framework.Logf("Waiting for UpdatePrepared=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdatePrepared, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdatePrepared=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdatePrepared, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdatePrepared=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdatePrepared=True.") + framework.Logf("Waiting for UpdateExecuted=Unknown") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) - if err != nil { + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdateExecuted=Unknown: %v", err)) + if !conditionMet { framework.Logf("Warning, could not detect UpdateExecuted=Unknown.") } - framework.Logf("Waiting for Cordoned=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateCordoned, metav1.ConditionTrue, 30*time.Second, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Cordoned=True.") - framework.Logf("Waiting for Drained=Unknown") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) - if err != nil { - framework.Logf("Warning, could not detect Drained=Unknown.") + + // On standard, non-rebootless, update, check that node transitions through "Cordoned" and "Drained" phases + if !isRebootless { + framework.Logf("Waiting for Cordoned=True") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateCordoned, metav1.ConditionTrue, 30*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Cordoned=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Cordoned=True.") + + framework.Logf("Waiting for Drained=Unknown") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Drained=Unknown: %v", err)) + if !conditionMet { + framework.Logf("Warning, could not detect Drained=Unknown.") + } + + framework.Logf("Waiting for Drained=True") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionTrue, 4*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Drained=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Drained=True.") } - framework.Logf("Waiting for Drained=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionTrue, 4*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Drained=True.") + framework.Logf("Waiting for AppliedFilesAndOS=Unknown") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) - if err != nil { + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for AppliedFilesAndOS=Unknown: %v", err)) + if !conditionMet { framework.Logf("Warning, could not detect AppliedFilesAndOS=Unknown.") } + framework.Logf("Waiting for AppliedFilesAndOS=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionTrue, 3*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect AppliedFilesAndOS=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionTrue, 3*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for AppliedFilesAndOS=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect AppliedFilesAndOS=True.") + framework.Logf("Waiting for UpdateExecuted=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionTrue, 20*time.Second, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateExecuted=True.") - framework.Logf("Waiting for RebootedNode=Unknown") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) - if err != nil { - framework.Logf("Warning, could not detect RebootedNode=Unknown.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionTrue, 20*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdateExecuted=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdateExecuted=True.") + + // On rebootless update, check that node transitions through "UpdatePostActionComplete" phase + if isRebootless { + framework.Logf("Waiting for UpdatePostActionComplete=True") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdatePostActionComplete, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdatePostActionComplete=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdatePostActionComplete=True.") + } else { // On standard, non-rebootless, update, check that node transitions through "RebootedNode" phase + framework.Logf("Waiting for RebootedNode=Unknown") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for RebootedNode=Unknown: %v", err)) + if !conditionMet { + framework.Logf("Warning, could not detect RebootedNode=Unknown.") + } + + framework.Logf("Waiting for RebootedNode=True") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionTrue, 6*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for RebootedNode=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect RebootedNode=True.") } - framework.Logf("Waiting for RebootedNode=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionTrue, 6*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect RebootedNode=True.") framework.Logf("Waiting for Resumed=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeResumed, metav1.ConditionTrue, 15*time.Second, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Resumed=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeResumed, metav1.ConditionTrue, 15*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Resumed=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Resumed=True.") + framework.Logf("Waiting for UpdateComplete=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateComplete, metav1.ConditionTrue, 10*time.Second, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateComplete=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateComplete, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdateComplete=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdateComplete=True.") + framework.Logf("Waiting for Uncordoned=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateUncordoned, metav1.ConditionTrue, 10*time.Second, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Uncordoned=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdateUncordoned, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for UpdateComplete=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect UpdateComplete=True.") + framework.Logf("Waiting for Updated=True") - err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) - o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=True.") + conditionMet, err = WaitForMCNConditionStatus(clientSet, updatingNodeName, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error occured while waiting for Updated=True: %v", err)) + o.Expect(conditionMet).To(o.BeTrue(), "Error, could not detect Updated=True.") } // `ValidateMCNConditionOnNodeDegrade` checks that Conditions properly update on a node failure (MCP degrade) diff --git a/test/extended/testdata/bindata.go b/test/extended/testdata/bindata.go index 50b94d79d11a..bd55159912e3 100644 --- a/test/extended/testdata/bindata.go +++ b/test/extended/testdata/bindata.go @@ -430,6 +430,7 @@ // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-partial.yaml +// test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml // test/extended/testdata/machine_config/pinnedimage/customGCMCPpis.yaml // test/extended/testdata/machine_config/pinnedimage/customInvalidPis.yaml // test/extended/testdata/machine_config/pinnedimage/customMCPpis.yaml @@ -49373,6 +49374,34 @@ func testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesPar return a, nil } +var _testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYaml = []byte(`apiVersion: operator.openshift.io/v1 +kind: MachineConfiguration +metadata: + name: cluster + namespace: openshift-machine-config-operator +spec: + nodeDisruptionPolicy: + files: + - path: /home/core/test + actions: + - type: None +`) + +func testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + var _testExtendedTestdataMachine_configPinnedimageCustomgcmcppisYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 kind: PinnedImageSet metadata: @@ -56146,6 +56175,7 @@ var _bindata = map[string]func() (*asset, error){ "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesNoneYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-partial.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesPartialYaml, + "test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml": testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYaml, "test/extended/testdata/machine_config/pinnedimage/customGCMCPpis.yaml": testExtendedTestdataMachine_configPinnedimageCustomgcmcppisYaml, "test/extended/testdata/machine_config/pinnedimage/customInvalidPis.yaml": testExtendedTestdataMachine_configPinnedimageCustominvalidpisYaml, "test/extended/testdata/machine_config/pinnedimage/customMCPpis.yaml": testExtendedTestdataMachine_configPinnedimageCustommcppisYaml, @@ -56907,10 +56937,11 @@ var _bintree = &bintree{nil, map[string]*bintree{ "infra-mcp.yaml": {testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, map[string]*bintree{}}, }}, "machineconfigurations": {nil, map[string]*bintree{ - "managedbootimages-all.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, map[string]*bintree{}}, - "managedbootimages-empty.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, map[string]*bintree{}}, - "managedbootimages-none.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesNoneYaml, map[string]*bintree{}}, - "managedbootimages-partial.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesPartialYaml, map[string]*bintree{}}, + "managedbootimages-all.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, map[string]*bintree{}}, + "managedbootimages-empty.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, map[string]*bintree{}}, + "managedbootimages-none.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesNoneYaml, map[string]*bintree{}}, + "managedbootimages-partial.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesPartialYaml, map[string]*bintree{}}, + "nodedisruptionpolicy-rebootless-path.yaml": {testExtendedTestdataMachine_configMachineconfigurationsNodedisruptionpolicyRebootlessPathYaml, map[string]*bintree{}}, }}, "pinnedimage": {nil, map[string]*bintree{ "customGCMCPpis.yaml": {testExtendedTestdataMachine_configPinnedimageCustomgcmcppisYaml, map[string]*bintree{}}, diff --git a/test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml b/test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml new file mode 100644 index 000000000000..2c6c53102525 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfigurations/nodedisruptionpolicy-rebootless-path.yaml @@ -0,0 +1,11 @@ +apiVersion: operator.openshift.io/v1 +kind: MachineConfiguration +metadata: + name: cluster + namespace: openshift-machine-config-operator +spec: + nodeDisruptionPolicy: + files: + - path: /home/core/test + actions: + - type: None diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 4b127893e02a..ee3d71bdf153 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -1355,15 +1355,17 @@ var Annotations = map[string]string{ "[sig-kubevirt] services when running openshift cluster on KubeVirt virtual machines should allow direct connections to pods from guest cluster pod in pod network across different guest nodes": " [Suite:openshift/conformance/parallel]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should have MCN properties matching associated node properties for nodes in default MCPs [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block MCN updates from a MCD that is not the associated one [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", "[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly update the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/parallel]", - "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have MCN properties matching associated node properties for nodes in custom MCPs [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", - "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly transition through MCN conditions on rebootless node update [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]": "", diff --git a/zz_generated.manifests/test-reporting.yaml b/zz_generated.manifests/test-reporting.yaml index 8ce161275ada..2990b999db74 100644 --- a/zz_generated.manifests/test-reporting.yaml +++ b/zz_generated.manifests/test-reporting.yaml @@ -114,6 +114,8 @@ spec: on desired.architecture field in the CV [apigroup:image.openshift.io]' - featureGate: MachineConfigNodes tests: + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should have MCN properties + matching associated node properties for nodes in default MCPs [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly block @@ -121,9 +123,10 @@ spec: - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] Should properly update the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have - MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]' + MCN properties matching associated node properties for nodes in custom MCPs + [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly - transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]' + transition through MCN conditions on rebootless node update [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]' - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should