From b05c0026897f55792c094a15fc6f42335540004b Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Mon, 10 Mar 2025 14:56:33 -0400 Subject: [PATCH 1/3] owners: update MCO team list --- test/extended/machine_config/OWNERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/extended/machine_config/OWNERS b/test/extended/machine_config/OWNERS index 08ca82b82b16..467bf01b382b 100644 --- a/test/extended/machine_config/OWNERS +++ b/test/extended/machine_config/OWNERS @@ -3,8 +3,16 @@ approvers: - dkhater-redhat - yuqi-zhang - cheesesashimi + - umohnani8 + - LorbusChris + - RishabhSaini + - isabella-janssen reviewers: - djoshy - dkhater-redhat - yuqi-zhang - cheesesashimi + - umohnani8 + - LorbusChris + - RishabhSaini + - isabella-janssen \ No newline at end of file From 9519f3cde4d9f22b78e7d3dabe035dee969e7eb1 Mon Sep 17 00:00:00 2001 From: Isabella Janssen Date: Tue, 1 Apr 2025 17:48:44 -0400 Subject: [PATCH 2/3] mco-1595, mco-1596, mco-1597, mco-1598: implement 4/5 origin tests for MachineConfigNode --- test/extended/machine_config/helpers.go | 751 ++++++++++++++++++ .../machine_config/machine_config_node.go | 388 +++++++++ test/extended/testdata/bindata.go | 143 ++++ .../machineconfig/0-master-mc.yaml | 16 + .../machineconfig/1-master-invalid-mc.yaml | 16 + .../machineconfig/1-worker-invalid-mc.yaml | 16 + .../machineconfigpool/infra-mcp.yaml | 11 + .../generated/zz_generated.annotations.go | 8 + zz_generated.manifests/test-reporting.yaml | 10 + 9 files changed, 1359 insertions(+) create mode 100644 test/extended/machine_config/machine_config_node.go create mode 100644 test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml create mode 100644 test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml diff --git a/test/extended/machine_config/helpers.go b/test/extended/machine_config/helpers.go index 01c482628702..2467ee856184 100644 --- a/test/extended/machine_config/helpers.go +++ b/test/extended/machine_config/helpers.go @@ -3,13 +3,16 @@ package machine_config import ( "context" "encoding/json" + "errors" "fmt" "math/rand" + "strings" "time" osconfigv1 "github.com/openshift/api/config/v1" machinev1beta1 "github.com/openshift/api/machine/v1beta1" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1" opv1 "github.com/openshift/api/operator/v1" machineclient "github.com/openshift/client-go/machine/clientset/versioned" machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" @@ -21,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/kubernetes/test/e2e/framework" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" "k8s.io/utils/ptr" @@ -34,6 +38,9 @@ const ( cmName = "coreos-bootimages" mapiMasterMachineLabelSelector = "machine.openshift.io/cluster-api-machine-role=master" mapiMachineSetArchAnnotationKey = "capacity.cluster-autoscaler.kubernetes.io/labels" + currentConfigAnnotationKey = "machineconfiguration.openshift.io/currentConfig" + desiredConfigAnnotationKey = "machineconfiguration.openshift.io/desiredConfig" + stateAnnotationKey = "machineconfiguration.openshift.io/state" ) // skipUnlessTargetPlatform skips the test if it is running on the target platform @@ -82,6 +89,13 @@ func skipOnSingleNodeTopology(oc *exutil.CLI) { } } +// `IsSingleNode` returns true if the cluster is using single-node topology and false otherwise +func IsSingleNode(oc *exutil.CLI) bool { + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Error determining cluster infrastructure.") + return infra.Status.ControlPlaneTopology == osconfigv1.SingleReplicaTopologyMode +} + // getRandomMachineSet picks a random machineset present on the cluster func getRandomMachineSet(machineClient *machineclient.Clientset) machinev1beta1.MachineSet { machineSets, err := machineClient.MachineV1beta1().MachineSets("openshift-machine-api").List(context.TODO(), metav1.ListOptions{}) @@ -315,5 +329,742 @@ func ApplyBootImageFixture(oc *exutil.CLI, fixture string) { // Ensure status accounts for the fixture that was applied WaitForMachineConfigurationStatusUpdate(oc) +} + +// `ValidateMCNForNodeInPool` validates the MCN of a node in a given pool. It does the following: +// 1. Get node from desired pool +// 2. Get the MCN for the node +// 3. Validate the MCN against the node properties +// - Check that `mcn.Spec.Pool.Name` matches provided `poolName` +// - Check that `mcn.Name` matches the node name +// - Check that `mcn.Spec.ConfigVersion.Desired` matches the node desired config version +// - Check that `nmcn.Status.ConfigVersion.Current` matches the node current config version +// - Check that `mcn.Status.ConfigVersion.Desired` matches the node desired config version +func ValidateMCNForNodeInPool(oc *exutil.CLI, clientSet *machineconfigclient.Clientset, node corev1.Node, poolName string) error { + // Get node's desired and current config versions + nodeCurrentConfig := node.Annotations[currentConfigAnnotationKey] + nodeDesiredConfig := node.Annotations[desiredConfigAnnotationKey] + + // Get node MCN + framework.Logf("Getting MCN for node '%v'.", node.Name) + mcn, mcnErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if mcnErr != nil { + framework.Logf("Could not get MCN for node '%v'.", node.Name) + return mcnErr + } + + // Check MCN pool name value for default MCPs + framework.Logf("Checking MCN pool name for node '%v' matches pool association '%v'.", node.Name, poolName) + if mcn.Spec.Pool.Name != poolName { + framework.Logf("MCN pool name '%v' does not match node MCP association '%v'.", mcn.Spec.Pool.Name, poolName) + return fmt.Errorf("MCN pool name does not match node MCP association") + } + + // Check MCN name matches node name + framework.Logf("Checking MCN name matches node name '%v'.", node.Name) + if mcn.Name != node.Name { + framework.Logf("MCN name '%v' does not match node name '%v'.", mcn.Name, node.Name) + return fmt.Errorf("MCN name does not match node name") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' desired config version '%v' matches desired config version in MCN spec.", node.Name, nodeDesiredConfig) + if mcn.Spec.ConfigVersion.Desired != nodeDesiredConfig { + framework.Logf("MCN spec desired config version '%v' does not match node desired config version '%v'.", mcn.Spec.ConfigVersion.Desired, nodeDesiredConfig) + return fmt.Errorf("MCN spec desired config version does not match node desired config version") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' current config version '%v' matches current version in MCN status.", node.Name, nodeCurrentConfig) + if mcn.Status.ConfigVersion.Current != nodeCurrentConfig { + framework.Logf("MCN status current config version '%v' does not match node current config version '%v'.", mcn.Status.ConfigVersion.Current, nodeCurrentConfig) + return fmt.Errorf("MCN status current config version does not match node current config version") + } + + // Check desired config version in MCN spec matches desired config on node + framework.Logf("Checking node '%v' desired config version '%v' matches desired version in MCN status.", node.Name, nodeDesiredConfig) + if mcn.Status.ConfigVersion.Desired != nodeDesiredConfig { + framework.Logf("MCN status desired config version '%v' does not match node desired config version '%v'.", mcn.Status.ConfigVersion.Desired, nodeDesiredConfig) + return fmt.Errorf("MCN status desired config version does not match node desired config version") + } + + return nil +} + +// `GetRandomNode` gets a random node from a given MCP and checks whether the node is ready. If no +// nodes are ready, it will wait for up to 5 minutes for a node to become available. +func GetRandomNode(oc *exutil.CLI, pool string) corev1.Node { + if node := getRandomNode(oc, pool); isNodeReady(node) { + return node + } + + // If no nodes are ready, wait for up to 5 minutes for one to be ready + waitPeriod := time.Minute * 5 + framework.Logf("No ready nodes found for pool '%s', waiting up to %s for a ready node to become available", pool, waitPeriod) + var targetNode corev1.Node + o.Eventually(func() bool { + if node := getRandomNode(oc, pool); isNodeReady(node) { + targetNode = node + return true + } + + return false + }, 5*time.Minute, 2*time.Second).Should(o.BeTrue()) + + return targetNode +} + +// `getRandomNode` gets a random node from a given pool +func getRandomNode(oc *exutil.CLI, pool string) corev1.Node { + nodes, err := GetNodesByRole(oc, pool) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(nodes).ShouldNot(o.BeEmpty()) + + // Disable gosec here to avoid throwing + // G404: Use of weak random number generator (math/rand instead of crypto/rand) + // #nosec + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + return nodes[rnd.Intn(len(nodes))] +} + +// `GetNodesByRole` gets all nodes labeled with the desired role +func GetNodesByRole(oc *exutil.CLI, role string) ([]corev1.Node, error) { + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{fmt.Sprintf("node-role.kubernetes.io/%s", role): ""}).String(), + } + nodes, err := oc.AsAdmin().KubeClient().CoreV1().Nodes().List(context.TODO(), listOptions) + if err != nil { + return nil, err + } + return nodes.Items, nil +} + +// `isNodeReady` determines if a given node is ready +func isNodeReady(node corev1.Node) bool { + // If the node is cordoned, it is not ready. + if node.Spec.Unschedulable { + return false + } + + // If the nodes' kubelet is not ready, it is not ready. + if !isNodeKubeletReady(node) { + return false + } + + // If the nodes' MCD is not done, it is not ready. + if !checkMCDState(node, "Done") { + return false + } + + return true +} + +// `isNodeKubeletReady` determines if a given node's kubelet is ready +func isNodeKubeletReady(node corev1.Node) bool { + for _, condition := range node.Status.Conditions { + if condition.Reason == "KubeletReady" && condition.Status == "True" && condition.Type == "Ready" { + return true + } + } + + return false +} + +// `checkMCDState` determines whether the MCD state matches the provided desired state +func checkMCDState(node corev1.Node, desiredState string) bool { + state := node.Annotations[stateAnnotationKey] + return state == desiredState +} + +// `WaitForMCPToBeReady` waits up to 5 minutes for a pool to be in an updated state with a specified number of ready machines +func WaitForMCPToBeReady(oc *exutil.CLI, machineConfigClient *machineconfigclient.Clientset, poolName string, readyMachineCount int32) { + o.Eventually(func() bool { + mcp, err := machineConfigClient.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) + if err != nil { + framework.Logf("Failed to grab MCP '%v', error :%v", poolName, err) + return false + } + // Check if the pool is in an updated state with the correct number of ready machines + if IsMachineConfigPoolConditionTrue(mcp.Status.Conditions, mcfgv1.MachineConfigPoolUpdated) && mcp.Status.UpdatedMachineCount == readyMachineCount { + return true + } + framework.Logf("MCP '%v' has %v ready machines. Waiting for the desired ready machine count of %v.", poolName, mcp.Status.UpdatedMachineCount, readyMachineCount) + return false + }, 5*time.Minute, 10*time.Second).Should(o.BeTrue(), "Timed out waiting for MCP '%v' to be in 'Updated' state with %v ready machines.", poolName, readyMachineCount) +} + +// `GetCordonedNodes` gets all cordoned nodes +// - If maxUnavailable > 1, this will return multiple cordoned nodes +// - If maxUnavailable == 1, this will return one cordoned node +func GetCordonedNodes(oc *exutil.CLI, mcpName string) []corev1.Node { + // Wait for the MCP to start updating + o.Expect(WaitForMCPConditionStatus(oc, mcpName, mcfgv1.MachineConfigPoolUpdating, corev1.ConditionTrue, 3*time.Minute, 2*time.Second)).NotTo(o.HaveOccurred(), "Waiting for 'Updating' status change failed.") + + // Get updating nodes + var allUpdatingNodes []corev1.Node + o.Eventually(func() bool { + nodes, nodeErr := GetNodesByRole(oc, mcpName) + o.Expect(nodeErr).NotTo(o.HaveOccurred(), "Error getting nodes from %v MCP.", mcpName) + o.Expect(nodes).ShouldNot(o.BeEmpty(), "No nodes found for %v MCP.", mcpName) + + for _, node := range nodes { + if node.Spec.Unschedulable { + allUpdatingNodes = append(allUpdatingNodes, node) + } + } + + return len(allUpdatingNodes) > 0 + }, 5*time.Minute, 10*time.Second).Should(o.BeTrue()) + + return allUpdatingNodes +} + +// `WaitForMCPConditionStatus` waits up to the desired timeout for the desired MCP condition to match the desired status (ex. wait until "Updating" is "True") +func WaitForMCPConditionStatus(oc *exutil.CLI, mcpName string, conditionType mcfgv1.MachineConfigPoolConditionType, status corev1.ConditionStatus, timeout time.Duration, interval time.Duration) error { + framework.Logf("Waiting up to %v for MCP '%s' condition '%s' to be '%s'.", timeout, mcpName, conditionType, status) + machineConfigClient, err := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Eventually(func() bool { + framework.Logf("Waiting for '%v' MCP's '%v' condition to be '%v'.", mcpName, conditionType, status) + + // Get MCP + mcp, mcpErr := machineConfigClient.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), mcpName, metav1.GetOptions{}) + if mcpErr != nil { + framework.Logf("Failed to grab MCP '%v', error :%v", mcpName, err) + return false + } + + // Loop through conditions to get check for desired condition type/status combonation + conditions := mcp.Status.Conditions + for _, condition := range conditions { + if condition.Type == conditionType { + framework.Logf("MCP '%s' condition '%s' status is '%s'", mcp.Name, conditionType, condition.Status) + return condition.Status == status + } + } + + return false + }, timeout, interval).Should(o.BeTrue()) + return nil +} + +// `WaitForMCNConditionStatus` waits up to a specified timeout for the desired MCN condition to match the desired status (ex. wait until "Updated" is "False") +func WaitForMCNConditionStatus(clientSet *machineconfigclient.Clientset, mcnName string, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus, timeout time.Duration, interval time.Duration) error { + o.Eventually(func() bool { + framework.Logf("Waiting for MCN '%v' %v condition to be %v.", mcnName, conditionType, status) + + // Get MCN & check if the MCN condition status matches the desired status + workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + o.Expect(workerErr).NotTo(o.HaveOccurred()) + return CheckMCNConditionStatus(workerNodeMCN, conditionType, status) + }, timeout, interval).Should(o.BeTrue()) + return nil +} + +// `CheckMCNConditionStatus` checks that an MCN condition matches the desired status (ex. confirm "Updated" is "False") +func CheckMCNConditionStatus(mcn *mcfgv1alpha1.MachineConfigNode, conditionType mcfgv1alpha1.StateProgress, status metav1.ConditionStatus) bool { + conditionStatus := getMCNConditionStatus(mcn, conditionType) + return conditionStatus == status +} + +// `getMCNConditionStatus` returns the status of the desired condition type for MCN, or an empty string if the condition does not exist +func getMCNConditionStatus(mcn *mcfgv1alpha1.MachineConfigNode, conditionType mcfgv1alpha1.StateProgress) metav1.ConditionStatus { + // Loop through conditions and return the status of the desired condition type + conditions := mcn.Status.Conditions + for _, condition := range conditions { + if condition.Type == string(conditionType) { + framework.Logf("MCN '%s' %s condition status is %s", mcn.Name, conditionType, condition.Status) + return condition.Status + } + } + return "" +} + +// `ConfirmUpdatedMCNStatus` confirms that an MCN is in a fully updated state, which requires: +// 1. "Updated" = True +// 2. All other conditions = False +func ConfirmUpdatedMCNStatus(clientSet *machineconfigclient.Clientset, mcnName string) bool { + // Get MCN + workerNodeMCN, workerErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + o.Expect(workerErr).NotTo(o.HaveOccurred()) + // Loop through conditions and return the status of the desired condition type + conditions := workerNodeMCN.Status.Conditions + for _, condition := range conditions { + if condition.Type == string(mcfgv1alpha1.MachineConfigNodeUpdated) && condition.Status != metav1.ConditionTrue { + framework.Logf("Node '%s' update is not complete; 'Updated' condition status is '%v'", mcnName, condition.Status) + return false + } else if condition.Type != string(mcfgv1alpha1.MachineConfigNodeUpdated) && condition.Status != metav1.ConditionFalse { + framework.Logf("Node '%s' is updated but MCN is invalid; '%v' codition status is '%v'", mcnName, condition.Type, condition.Status) + return false + } + } + + framework.Logf("Node '%s' update is complete and corresponding MCN is valid.", mcnName) + return true +} + +// `GetDegradedNode` gets a degraded node from a specified MCP +func GetDegradedNode(oc *exutil.CLI, mcpName string) (corev1.Node, error) { + // Get nodes in desired pool + nodes, nodeErr := GetNodesByRole(oc, mcpName) + if nodeErr != nil { + return corev1.Node{}, nodeErr + } else if len(nodes) == 0 { + return corev1.Node{}, fmt.Errorf("no nodes found in MCP '%v", mcpName) + } + + // Get degraded node + for _, node := range nodes { + if checkMCDState(node, "Degraded") { + return node, nil + } + } + + return corev1.Node{}, errors.New("no degraded node found") +} + +// `RecoverFromDegraded` gets the degraded node in the desired MCP, forces the node to recover by updating its desired +// config to be its current config, and waits for the MCP to return to an Update=True state +func RecoverFromDegraded(oc *exutil.CLI, mcpName string) error { + framework.Logf("Recovering %s pool from degraded state", mcpName) + + // Get nodes from degraded MCP & update the desired config of the degraded node to force a recovery update + nodes, nodeErr := GetNodesByRole(oc, mcpName) + o.Expect(nodeErr).NotTo(o.HaveOccurred()) + o.Expect(nodes).ShouldNot(o.BeEmpty()) + for _, node := range nodes { + framework.Logf("Restoring desired config for node: %s", node.Name) + if checkMCDState(node, "Done") { + framework.Logf("Node %s is updated and does not need to be recovered", node.Name) + } else { + err := restoreDesiredConfig(oc, node) + if err != nil { + return fmt.Errorf("error restoring desired config in node %s. Error: %s", node.Name, err) + } + } + } + + // Wait for MCP to not be in degraded status + mcpErr := WaitForMCPConditionStatus(oc, mcpName, "Degraded", "False", 4*time.Minute, 5*time.Second) + o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("could not recover %v MCP from the degraded status.", mcpName)) + mcpErr = WaitForMCPConditionStatus(oc, mcpName, "Updated", "True", 7*time.Minute, 5*time.Second) + o.Expect(mcpErr).NotTo(o.HaveOccurred(), fmt.Sprintf("%v MCP could not reach an updated state.", mcpName)) + return nil +} + +// `restoreDesiredConfig` updates the value of a node's desiredConfig annotation to be equal to the value of its currentConfig (desiredConfig=currentConfig) +func restoreDesiredConfig(oc *exutil.CLI, node corev1.Node) error { + // Get current config + currentConfig := node.Annotations[currentConfigAnnotationKey] + if currentConfig == "" { + return fmt.Errorf("currentConfig annotation is empty for node %s", node.Name) + } + + // Update desired config to be equal to current config + framework.Logf("Node: %s is restoring desiredConfig value to match currentConfig value: %s", node.Name, currentConfig) + configErr := oc.Run("patch").Args(fmt.Sprintf("node/%v", node.Name), "--patch", fmt.Sprintf(`{"metadata":{"annotations":{"machineconfiguration.openshift.io/desiredConfig":"%v"}}}`, currentConfig), "--type=merge").Execute() + return configErr +} + +// `WorkersCanBeScaled` checks whether the worker nodes in a cluster can be scaled. +// Cases where scaling worker nodes is NOT possible include: +// - Baremetal platform +// - MachineAPI is disabled +// - Error getting list of MachineSets / no MachineSets exist +// - All MachineSets have 0 worker nodes +func WorkersCanBeScaled(oc *exutil.CLI, machineClient *machineclient.Clientset) (bool, error) { + framework.Logf("Checking if worker nodes can be scaled using machinesets.") + + // Check if platform is baremetal + framework.Logf("Checking if cluster platform is baremetal.") + if checkPlatform(oc) == "baremetal" { + framework.Logf("Cluster platform is baremetal. Nodes cannot be scaled in baremetal test environments.") + return false, nil + } + + // Check if MachineAPI is enabled + framework.Logf("Checking if MachineAPI is enabled.") + if !isCapabilityEnabled(oc, "MachineAPI") { + framework.Logf("MachineAPI capability is not enabled. Nodes cannot be scaled.") + return false, nil + } + + // Get MachineSets + framework.Logf("Getting MachineSets.") + machineSets, machineSetErr := machineClient.MachineV1beta1().MachineSets("openshift-machine-api").List(context.TODO(), metav1.ListOptions{}) + if machineSetErr != nil { + framework.Logf("Error getting list of MachineSets.") + return false, machineSetErr + } else if len(machineSets.Items) == 0 { + framework.Logf("No MachineSets configured. Nodes cannot be scaled.") + return false, nil + } + + // Check if all MachineSets have 0 replicas + // Per openshift-tests-private repo: + // "In some UPI/SNO/Compact clusters machineset resources exist, but they are all configured with 0 replicas + // If all machinesets have 0 replicas, then it means that we need to skip the test case" + machineSetsWithReplicas := 0 + for _, machineSet := range machineSets.Items { + replicas := machineSet.Spec.Replicas + machineSetsWithReplicas += int(*replicas) + } + if machineSetsWithReplicas == 0 { + framework.Logf("All machinesets have 0 worker nodes. Nodes cannot be scaled.") + return false, nil + } + + return true, nil +} + +// `checkPlatform` returns the cluster's platform +func checkPlatform(oc *exutil.CLI) string { + output, err := oc.AsAdmin().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platformStatus.type}").Output() + o.Expect(err).NotTo(o.HaveOccurred(), "Failed determining cluster infrastructure.") + return strings.ToLower(output) +} + +// `isCapabilityEnabled` checks whether a desired capability is in the cluster's enabledCapabilities list +func isCapabilityEnabled(oc *exutil.CLI, desiredCapability osconfigv1.ClusterVersionCapability) bool { + enabledCapabilities := getEnabledCapabilities(oc) + enabled := false + for _, enabledCapability := range enabledCapabilities { + if enabledCapability == desiredCapability { + enabled = true + break + } + } + framework.Logf("Capability '%s' is enabled: %v", desiredCapability, enabled) + + return enabled +} + +// `getEnabledCapabilities` gets a cluster's enabled capability list +func getEnabledCapabilities(oc *exutil.CLI) []osconfigv1.ClusterVersionCapability { + clusterversion, err := oc.AsAdmin().AdminConfigClient().ConfigV1().ClusterVersions().Get(context.TODO(), "version", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting clusterverion.") + enabledCapabilities := clusterversion.Status.Capabilities.EnabledCapabilities + + return enabledCapabilities +} + +// `ScaleMachineSet` scales the provided MachineSet by updating the replica to be the provided value +func ScaleMachineSet(oc *exutil.CLI, machineSetName string, replicaValue string) error { + return oc.Run("scale").Args(fmt.Sprintf("--replicas=%v", replicaValue), "machinesets.machine.openshift.io", machineSetName, "-n", "openshift-machine-api").Execute() +} + +// `GetMachinesByPhase` get machine by phase e.g. Running, Provisioning, Provisioned, Deleting etc. +func GetMachinesByPhase(machineClient *machineclient.Clientset, machineSetName string, desiredPhase string) (machinev1beta1.Machine, error) { + desiredMachine := machinev1beta1.Machine{} + err := fmt.Errorf("no %v machine found in %v MachineSet", desiredPhase, machineSetName) + o.Eventually(func() bool { + framework.Logf("Trying to get machine with phase %v from MachineSet '%v'.", desiredPhase, machineSetName) + + // Get machines in desired MachineSet + machines, machinesErr := machineClient.MachineV1beta1().Machines(mapiNamespace).List(context.Background(), metav1.ListOptions{LabelSelector: fmt.Sprintf("machine.openshift.io/cluster-api-machineset=%v", machineSetName)}) + o.Expect(machinesErr).NotTo(o.HaveOccurred()) + + // Find machine in desired phase + for _, machine := range machines.Items { + machinePhase := ptr.Deref(machine.Status.Phase, "") + if machinePhase == desiredPhase { + desiredMachine = machine + err = nil + return true + } + } + return false + }, 1*time.Minute, 3*time.Second).Should(o.BeTrue()) + return desiredMachine, err +} + +// `UpdateDeleteMachineAnnotation` updates the provided MachineSet's `deletePolicy` to be true. +// This will ensure the create machine is the one deleted on cleanup. +func UpdateDeleteMachineAnnotation(oc *exutil.CLI, machineSetName string) error { + return oc.Run("patch").Args(fmt.Sprintf("machines.machine.openshift.io/%v", machineSetName), "-n", "openshift-machine-api", "--patch", `{"metadata":{"annotations":{"machine.openshift.io/delete-machine":"true"}}}`, "--type=merge").Execute() +} + +// `WaitForMachineInState` waits up to 7 minutes for the desired machine to be in the desired state +func WaitForMachineInState(machineClient *machineclient.Clientset, machineName string, desiredPhase string) error { + o.Eventually(func() bool { + // Get the desired machine + machine, machineErr := machineClient.MachineV1beta1().Machines(mapiNamespace).Get(context.TODO(), machineName, metav1.GetOptions{}) + o.Expect(machineErr).NotTo(o.HaveOccurred()) + + // Check if machine phase is desired phase + machinePhase := ptr.Deref(machine.Status.Phase, "") + framework.Logf("Machine '%v' is in %v phase.", machineName, machinePhase) + return machinePhase == desiredPhase + }, 10*time.Minute, 10*time.Second).Should(o.BeTrue()) + return nil +} + +// `GetNodeInMachine` gets the node associated with a machine +func GetNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) { + // Get name of nodes associated with the desired machine + nodeNames, nodeNamesErr := oc.Run("get").Args("nodes", "-o", fmt.Sprintf(`jsonpath='{.items[?(@.metadata.annotations.machine\.openshift\.io/machine=="openshift-machine-api/%v")].metadata.name}'`, machineName)).Output() + if nodeNamesErr != nil { //error getting filtered node names + return corev1.Node{}, nodeNamesErr + } else if nodeNames == "" { //error when no nodes are found + return corev1.Node{}, fmt.Errorf("no node is linked to Machine: %s", machineName) + } + + // Determine the number of nodes in the Machine + // Note: the format of `nodeNames` is the names of nodes seperated by a space (ex: "node-name-1 node-name-2"), + // so the number of nodes is equal to one more than the number of spaces + numberOfNodeNames := strings.Count(nodeNames, " ") + 1 + if numberOfNodeNames > 1 { //error when a machine has more than one node + return corev1.Node{}, fmt.Errorf("more than one node is linked to Machine: %s; number of nodes: %d", machineName, numberOfNodeNames) + } + + node, nodeErr := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), strings.ReplaceAll(nodeNames, "'", ""), metav1.GetOptions{}) + if nodeErr != nil { //error getting filtered node names + return corev1.Node{}, nodeErr + } + + return *node, nil +} + +// `GetNewReadyNodeInMachine` waits up to 2 minutes for the newly provisioned node in a desired machine node to be ready +func GetNewReadyNodeInMachine(oc *exutil.CLI, machineName string) (corev1.Node, error) { + desiredNode := corev1.Node{} + err := fmt.Errorf("no ready node in Machine: %s", machineName) + o.Eventually(func() bool { + // Get the desired node + node, nodeErr := GetNodeInMachine(oc, machineName) + o.Expect(nodeErr).NotTo(o.HaveOccurred()) + + // Check if node is in desiredStatus + framework.Logf("Checking if node '%v' is ready.", node.Name) + if isNodeReady(node) { + framework.Logf("Node '%v' is ready.", node.Name) + desiredNode = node + err = nil + return true + } + + return false + }, 2*time.Minute, 3*time.Second).Should(o.BeTrue()) + return desiredNode, err +} + +// `WaitForValidMCNProperties` waits for the MCN of a node to be valid. To be valid, the following must be true: +// - MCN with name equivalent to node name exists (waits up to 20 sec) +// - Pool name in MCN spec matches node MCP association (waits up to 1 min) +// - Desired config version of node matches desired config version in MCN spec (waits up to 1 min) +// - Current config version of node matches current config version in MCN status (waits up to 2 min) +// - Desired config version of node matches desired config version in MCN status (waits up to 1 min) +func WaitForValidMCNProperties(clientSet *machineconfigclient.Clientset, node corev1.Node) error { + nodeDesiredConfig := node.Annotations[desiredConfigAnnotationKey] + nodeCurrentConfig := node.Annotations[currentConfigAnnotationKey] + + // Check MCN exists and that its name and node name match + framework.Logf("Checking MCN exists and name matches node name '%v'.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN name matches node's name + framework.Logf("Node name: %v. MCN name: %v.", node.Name, newMCN.Name) + return node.Name == newMCN.Name + }, 20*time.Second, 2*time.Second).Should(o.BeTrue(), fmt.Sprintf("Could not get MCN for node %v", node.Name)) + + // Check pool name in MCN matches node MCP association + // Note: pool name should be default value of `worker` + framework.Logf("Waiting for node MCP to match pool name in MCN '%v' spec.", node.Name) + nodeMCP := "" + var ok bool + if _, ok = node.Labels["node-role.kubernetes.io/worker"]; ok { + nodeMCP = "worker" + } else { + return fmt.Errorf("node MCP association could be determined for node %v; node is not in default worker pool", node.Name) + } + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN pool name in spec matches node's MCP association + framework.Logf("Node MCP association: %v. MCN spec pool name: %v.", nodeMCP, newMCN.Spec.Pool.Name) + return newMCN.Spec.Pool.Name == nodeMCP + }, 1*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check desired config version matches for node and MCN spec config version + framework.Logf("Waiting for node desired config version to match desired config version in MCN '%v' spec.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN desired config version in spec matches node's desired config version + framework.Logf("Node desired config version: %v. MCN spec desired config version: %v.", nodeDesiredConfig, newMCN.Spec.ConfigVersion.Desired) + return newMCN.Spec.ConfigVersion.Desired == nodeDesiredConfig + }, 1*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check current config version matches for node and MCN status config version + framework.Logf("Waiting for node current config version to match current config version in MCN '%v' status.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN current config version in status matches node's current config version + framework.Logf("Node current config version: %v. MCN status current config version: %v.", nodeCurrentConfig, newMCN.Status.ConfigVersion.Current) + return newMCN.Status.ConfigVersion.Current == nodeCurrentConfig + }, 2*time.Minute, 5*time.Second).Should(o.BeTrue()) + + // Check desired config version matches for node and MCN status config version + framework.Logf("Waiting for node desired config version to match desired config version in MCN '%v' status.", node.Name) + o.Eventually(func() bool { + // Get the desired MCN + newMCN, newMCNErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) + if newMCNErr != nil { + framework.Logf("Failed getting MCN '%v'.", node.Name) + return false + } + + // Check if MCN desired config version in status matches node's desired config version + framework.Logf("Node desired config version: %v. MCN status desired config version: %v.", nodeDesiredConfig, newMCN.Status.ConfigVersion.Desired) + return newMCN.Status.ConfigVersion.Desired == nodeDesiredConfig + }, 2*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil +} + +// `ScaleMachineSetDown` will determine whether a MachineSet needs to be scaled and, if so, will +// scale it. A MachineSet needs to be scaled if its desired replica value does not match its +// current replica value. +func ScaleMachineSetDown(oc *exutil.CLI, machineSet machinev1beta1.MachineSet, desiredReplicaValue int, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Check if MachineSet needs to be scaled + if int(*machineSet.Spec.Replicas) == desiredReplicaValue { + framework.Logf("MachineSet '%v' does not need to be scaled. Current replica value %v matches desired replica value of %v.", machineSet.Name, *machineSet.Spec.Replicas, desiredReplicaValue) + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSet.Name, desiredReplicaValue) + return ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%d", desiredReplicaValue)) +} + +// `CleanupProvisionedMachine` scales down the replica count for a given MachineSet and checks whether the +// provisioned Machine provided is deleted. +func CleanupProvisionedMachine(oc *exutil.CLI, machineClient *machineclient.Clientset, machineSetName string, desiredReplicaValue int, + machineName string, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSetName, desiredReplicaValue) + scaleErr := ScaleMachineSet(oc, machineSetName, fmt.Sprintf("%d", desiredReplicaValue)) + if scaleErr != nil { + return scaleErr + } + + // Check that provisioned machine is deleted + return WaitForMachineToBeDeleted(machineClient, machineName) +} + +// `CleanupCreatedNode` scales down the replica count for a given MachineSet and checks whether the +// created Node provided is deleted. +func CleanupCreatedNode(oc *exutil.CLI, machineSetName string, desiredReplicaValue int, nodeName string, cleanupCompleted bool) error { + // Skip when cleanup is not needed + if cleanupCompleted { + return nil + } + + // Scale MachineSet to desired replica value + framework.Logf("Scaling MachineSet '%s' to replica value %v.", machineSetName, desiredReplicaValue) + scaleErr := ScaleMachineSet(oc, machineSetName, fmt.Sprintf("%d", desiredReplicaValue)) + if scaleErr != nil { + return scaleErr + } + + // Check that created node is deleted + return WaitForNodeToBeDeleted(oc, nodeName) +} + +// `WaitForNodeToBeDeleted` waits up to 10 minutes for a node to be deleted (no longer exist) +func WaitForNodeToBeDeleted(oc *exutil.CLI, nodeName string) error { + o.Eventually(func() bool { + framework.Logf("Checking if node '%v' is deleted.", nodeName) + + // Check if node still exists + _, nodeErr := oc.AsAdmin().KubeClient().CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + if apierrors.IsNotFound(nodeErr) { + framework.Logf("Node '%v' has been deleted.", nodeName) + return true + } + if nodeErr != nil { + framework.Logf("Error trying to get node: %v.", nodeErr) + return false + } + + framework.Logf("Node '%v' still exists.", nodeName) + return false + }, 10*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil +} + +// `WaitForMCNToBeDeleted` up waits to 4 minutes for a MCN to be deleted (no longer exist) +func WaitForMCNToBeDeleted(clientSet *machineconfigclient.Clientset, mcnName string) error { + o.Eventually(func() bool { + framework.Logf("Check if MCN '%v' is deleted.", mcnName) + + // Check if MCN still exists + _, mcnErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), mcnName, metav1.GetOptions{}) + if apierrors.IsNotFound(mcnErr) { + framework.Logf("MCN '%v' has been deleted.", mcnName) + return true + } + if mcnErr != nil { + framework.Logf("Error trying to get MCN: '%v'.", mcnErr) + return false + } + + framework.Logf("MCN '%v' still exists.", mcnName) + return false + }, 4*time.Minute, 3*time.Second).Should(o.BeTrue()) + return nil +} + +// `WaitForMachineToBeDeleted` waits up to 10 minutes for a machine to be deleted (no longer exist) +func WaitForMachineToBeDeleted(machineClient *machineclient.Clientset, machineName string) error { + o.Eventually(func() bool { + framework.Logf("Checking if machine '%v' is deleted.", machineName) + + // Check if machine still exists + _, machineErr := machineClient.MachineV1beta1().Machines(mapiNamespace).Get(context.TODO(), machineName, metav1.GetOptions{}) + if apierrors.IsNotFound(machineErr) { + framework.Logf("Machine '%v' has been deleted.", machineName) + return true + } + if machineErr != nil { + framework.Logf("Error trying to get machine: %v.", machineErr) + return false + } + + framework.Logf("Machine '%v' still exists.", machineName) + return false + }, 10*time.Minute, 5*time.Second).Should(o.BeTrue()) + return nil } diff --git a/test/extended/machine_config/machine_config_node.go b/test/extended/machine_config/machine_config_node.go new file mode 100644 index 000000000000..fbcafb887a8e --- /dev/null +++ b/test/extended/machine_config/machine_config_node.go @@ -0,0 +1,388 @@ +package machine_config + +import ( + "context" + "fmt" + "path/filepath" + "time" + + mcfgv1alpha1 "github.com/openshift/api/machineconfiguration/v1alpha1" + machineclient "github.com/openshift/client-go/machine/clientset/versioned" + machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + exutil "github.com/openshift/origin/test/extended/util" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + worker = "worker" + master = "master" + custom = "infra" +) + +var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNodes]", func() { + defer g.GinkgoRecover() + var ( + MCOMachineConfigPoolBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfigpool") + MCOMachineConfigBaseDir = exutil.FixturePath("testdata", "machine_config", "machineconfig") + infraMCPFixture = filepath.Join(MCOMachineConfigPoolBaseDir, "infra-mcp.yaml") + testFileMCFixture = filepath.Join(MCOMachineConfigBaseDir, "0-master-mc.yaml") + invalidWorkerMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-worker-invalid-mc.yaml") + invalidMasterMCFixture = filepath.Join(MCOMachineConfigBaseDir, "1-master-invalid-mc.yaml") + oc = exutil.NewCLIWithoutNamespace("machine-config") + ) + + g.It("[Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]", func() { + if IsSingleNode(oc) { //handle SNO clusters + ValidateMCNPropertiesSNO(oc, infraMCPFixture) + } else { //handle standard, non-SNO, clusters + ValidateMCNProperties(oc, infraMCPFixture) + } + }) + + g.It("[Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]", func() { + ValidateMCNConditionTransitions(oc, testFileMCFixture) + }) + + g.It("[Serial][Slow]Should properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]", func() { + if IsSingleNode(oc) { //handle SNO clusters + ValidateMCNConditionOnNodeDegrade(oc, invalidMasterMCFixture, true) + } else { //handle standard, non-SNO, clusters + ValidateMCNConditionOnNodeDegrade(oc, invalidWorkerMCFixture, false) + } + }) + + g.It("[Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]", func() { + skipOnSingleNodeTopology(oc) //skip this test for SNO + ValidateMCNOnNodeCreationAndDeletion(oc) + }) +}) + +// `ValidateMCNProperties` checks that MCN properties match the corresponding node properties +// Note: This test case does not work for SNO clusters due to the cluster's one node assuming +// both the worker and master role since `GetRandomNode` selects nodes using node roles. Role +// matching is not necessarily synonymous with MCP association in edge cases, such as in SNO. +func ValidateMCNProperties(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab a random node from each default pool + workerNode := GetRandomNode(oc, worker) + o.Expect(workerNode.Name).NotTo(o.Equal(""), "Could not get a worker node.") + masterNode := GetRandomNode(oc, master) + o.Expect(masterNode.Name).NotTo(o.Equal(""), "Could not get a master node.") + + // Validate MCN for node in default `worker` pool + framework.Logf("Validating MCN properties for node in default '%v' pool.", worker) + mcnErr := ValidateMCNForNodeInPool(oc, clientSet, workerNode, worker) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in default pool '%v'.", worker)) + + // Validate MCN for node in default `master` pool + framework.Logf("Validating MCN properties for node in default '%v' pool.", master) + mcnErr = ValidateMCNForNodeInPool(oc, clientSet, masterNode, master) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in default pool '%v'.", master)) + + // Cleanup custom MCP on test completion or failure + defer func() { + // Get starting state of default worker MCP + workerMcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), worker, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "Could not get worker MCP.") + workerMcpReadyMachines := workerMcp.Status.ReadyMachineCount + + // Unlabel node + framework.Logf("Removing label node-role.kubernetes.io/%v from node %v", custom, workerNode.Name) + unlabelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s-", custom)).Execute() + o.Expect(unlabelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not remove label 'node-role.kubernetes.io/%s' from node '%v'.", custom, workerNode.Name)) + + // Wait for infra pool to report no nodes & for worker MCP to be ready + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", custom, 0) + WaitForMCPToBeReady(oc, clientSet, custom, 0) + framework.Logf("Waiting for %v MCP to be updated with %v ready machines.", worker, workerMcpReadyMachines+1) + WaitForMCPToBeReady(oc, clientSet, worker, workerMcpReadyMachines+1) + + // Delete custom MCP + framework.Logf("Deleting MCP %v", custom) + deleteMCPErr := oc.Run("delete").Args("mcp", custom).Execute() + o.Expect(deleteMCPErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting MCP '%v': %v", custom, deleteMCPErr)) + }() + + // Apply the fixture to create a custom MCP called "infra" & label the worker node accordingly + mcpErr := oc.Run("apply").Args("-f", fixture).Execute() + o.Expect(mcpErr).NotTo(o.HaveOccurred(), "Could not create custom MCP.") + labelErr := oc.Run("label").Args(fmt.Sprintf("node/%s", workerNode.Name), fmt.Sprintf("node-role.kubernetes.io/%s=", custom)).Execute() + o.Expect(labelErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not add label 'node-role.kubernetes.io/%s' to node '%v'.", custom, workerNode.Name)) + + // Wait for the custom pool to be updated with the node ready + framework.Logf("Waiting for '%v' MCP to be updated with %v ready machines.", custom, 1) + WaitForMCPToBeReady(oc, clientSet, custom, 1) + + // Get node in custom pool + customNodes, customNodeErr := GetNodesByRole(oc, custom) + o.Expect(customNodeErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not get node in MCP '%v'.", custom)) + customNode := customNodes[0] + + // Validate MCN for node in custom pool + framework.Logf("Validating MCN properties for node in custom '%v' pool.", custom) + mcnErr = ValidateMCNForNodeInPool(oc, clientSet, customNode, custom) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties node in custom pool '%v'.", custom)) +} + +// `ValidateMCNPropertiesSNO` checks that MCN properties match the corresponding node properties +// specifically for SNO clusters. Note that this test does not include creating a custom MCP, as +// the default SNO node remains part of the master pool. +func ValidateMCNPropertiesSNO(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Grab the cluster's node + node := GetRandomNode(oc, master) + o.Expect(node.Name).NotTo(o.Equal(""), "Could not get a worker node.") + + // Validate MCN for the cluster's node + framework.Logf("Validating MCN properties for the node in pool '%v'.", master) + mcnErr := ValidateMCNForNodeInPool(oc, clientSet, node, master) + o.Expect(mcnErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error validating MCN properties for the node in pool '%v'.", master)) +} + +// `ValidateMCNConditionTransitions` checks that Conditions properly update on a node update +func ValidateMCNConditionTransitions(oc *exutil.CLI, fixture string) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Delete MC on failure or test completion + defer func() { + deleteMCErr := oc.Run("delete").Args("machineconfig", "90-master-testfile").Execute() + o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), "Could not delete MachineConfig '90-master-testfile'.") + }() + + // Apply MC targeting master pool + mcErr := oc.Run("apply").Args("-f", fixture).Execute() + o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") + + // Get an updating master node + updatingNodes := GetCordonedNodes(oc, master) + o.Expect(len(updatingNodes) > 0, fmt.Sprintf("No ready nodes found for MCP '%v'.", master)) + masterNode := updatingNodes[0] + + // Validate transition through conditions for MCN + // Note that some conditions are passed through quickly in a node update, so the test can + // "miss" catching the phases. For test stability, if we fail to catch an "Unknown" status, + // a warning will be logged instead of erroring out the test. + framework.Logf("Waiting for Updated=False") + err := WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionFalse, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=False.") + framework.Logf("Waiting for UpdatePrepared=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdatePrepared, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdatePrepared=True.") + framework.Logf("Waiting for UpdateExecuted=Unknown") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect UpdateExecuted=Unknown.") + } + framework.Logf("Waiting for Cordoned=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateCordoned, metav1.ConditionTrue, 30*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Cordoned=True.") + framework.Logf("Waiting for Drained=Unknown") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect Drained=Unknown.") + } + framework.Logf("Waiting for Drained=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateDrained, metav1.ConditionTrue, 4*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Drained=True.") + framework.Logf("Waiting for AppliedFilesAndOS=Unknown") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect AppliedFilesAndOS=Unknown.") + } + framework.Logf("Waiting for AppliedFilesAndOS=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionTrue, 3*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect AppliedFilesAndOS=True.") + framework.Logf("Waiting for UpdateExecuted=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionTrue, 20*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateExecuted=True.") + framework.Logf("Waiting for UpdatePostActionComplete=Unknown") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdatePostActionComplete, metav1.ConditionUnknown, 30*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect UpdatePostActionComplete=Unknown.") + } + framework.Logf("Waiting for RebootedNode=Unknown") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionUnknown, 15*time.Second, 1*time.Second) + if err != nil { + framework.Logf("Warning, could not detect RebootedNode=Unknown.") + } + framework.Logf("Waiting for RebootedNode=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateRebooted, metav1.ConditionTrue, 5*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect RebootedNode=True.") + framework.Logf("Waiting for Resumed=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeResumed, metav1.ConditionTrue, 15*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Resumed=True.") + framework.Logf("Waiting for UpdateComplete=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateComplete, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect UpdateComplete=True.") + framework.Logf("Waiting for Uncordoned=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdateUncordoned, metav1.ConditionTrue, 10*time.Second, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Uncordoned=True.") + framework.Logf("Waiting for Updated=True") + err = WaitForMCNConditionStatus(clientSet, masterNode.Name, mcfgv1alpha1.MachineConfigNodeUpdated, metav1.ConditionTrue, 1*time.Minute, 1*time.Second) + o.Expect(err).NotTo(o.HaveOccurred(), "Error, could not detect Updated=True.") + + // When an update is complete, all conditions other than `Updated` must be false + framework.Logf("Checking all conditions other than 'Updated' are False.") + o.Expect(ConfirmUpdatedMCNStatus(clientSet, masterNode.Name)).Should(o.BeTrue(), "Error, all conditions must be 'False' when Updated=True.") +} + +// `ValidateMCNConditionOnNodeDegrade` checks that Conditions properly update on a node failure (MCP degrade) +func ValidateMCNConditionOnNodeDegrade(oc *exutil.CLI, fixture string, isSno bool) { + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // In SNO, master pool will degrade + poolName := worker + mcName := "91-worker-testfile-invalid" + if isSno { + poolName = master + mcName = "91-master-testfile-invalid" + } + + // Cleanup MC and fix node degradation on failure or test completion + defer func() { + // Delete the applied MC + deleteMCErr := oc.Run("delete").Args("machineconfig", mcName).Execute() + o.Expect(deleteMCErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not delete MachineConfig '%v'.", mcName)) + + // Recover the degraded MCP + recoverErr := RecoverFromDegraded(oc, poolName) + o.Expect(recoverErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Could not recover MCP '%v' from degraded state.", poolName)) + }() + + // Apply invalid MC + mcErr := oc.Run("apply").Args("-f", fixture).Execute() + o.Expect(mcErr).NotTo(o.HaveOccurred(), "Could not apply MachineConfig.") + + // Wait for MCP to be in a degraded state with one degraded machine + degradedErr := WaitForMCPConditionStatus(oc, poolName, "Degraded", corev1.ConditionTrue, 8*time.Minute, 3*time.Second) + o.Expect(degradedErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error waiting for '%v' MCP to be in a degraded state.", poolName)) + mcp, err := clientSet.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), poolName, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting '%v' MCP.", poolName)) + o.Expect(mcp.Status.DegradedMachineCount).To(o.BeNumerically("==", 1), fmt.Sprintf("Degraded machine count is not 1. It is %v.", mcp.Status.DegradedMachineCount)) + + // Get degraded node + degradedNode, degradedNodeErr := GetDegradedNode(oc, poolName) + o.Expect(degradedNodeErr).NotTo(o.HaveOccurred(), "Could not get degraded node.") + + // Validate MCN of degraded node + degradedNodeMCN, degradedErr := clientSet.MachineconfigurationV1alpha1().MachineConfigNodes().Get(context.TODO(), degradedNode.Name, metav1.GetOptions{}) + o.Expect(degradedErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error getting MCN of degraded node '%v'.", degradedNode.Name)) + framework.Logf("Validating that `AppliedFilesAndOS` and `UpdateExecuted` conditions in '%v' MCN have a status of 'Unknown'.", degradedNodeMCN.Name) + o.Expect(CheckMCNConditionStatus(degradedNodeMCN, mcfgv1alpha1.MachineConfigNodeUpdateFilesAndOS, metav1.ConditionUnknown)).Should(o.BeTrue(), "Condition 'AppliedFilesAndOS' does not have the expected status of 'Unknown'.") + o.Expect(CheckMCNConditionStatus(degradedNodeMCN, mcfgv1alpha1.MachineConfigNodeUpdateExecuted, metav1.ConditionUnknown)).Should(o.BeTrue(), "Condition 'UpdateExecuted' does not have the expected status of 'Unknown'.") +} + +// `ValidateMCNProperties` checks that MCNs with correct properties are created on node creation +// and deleted on node deletion +func ValidateMCNOnNodeCreationAndDeletion(oc *exutil.CLI) { + cleanupCompleted := false + newNode := corev1.Node{} + newMachineName := "" + + // Create machine client for test + machineClient, machineErr := machineclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(machineErr).NotTo(o.HaveOccurred(), "Error creating machine client for test.") + + // Create client set for test + clientSet, clientErr := machineconfigclient.NewForConfig(oc.KubeFramework().ClientConfig()) + o.Expect(clientErr).NotTo(o.HaveOccurred(), "Error creating client set for test.") + + // Skip test if worker nodes cannot be scaled + canBeScaled, canScaleErr := WorkersCanBeScaled(oc, machineClient) + o.Expect(canScaleErr).NotTo(o.HaveOccurred(), "Error occured when determining whether worker nodes can be scaled.") + if !canBeScaled { + g.Skip("Worker nodes cannot be scaled using MachineSets. This test cannot be executed if workers cannot be scaled via MachineSets.") + } + + // Get MachineSet for test + framework.Logf("Getting MachineSet for testing.") + machineSet := getRandomMachineSet(machineClient) + framework.Logf("MachineSet '%s' will be used for testing", machineSet.Name) + originalReplica := int(*machineSet.Spec.Replicas) + + // Create node by scaling MachineSet + framework.Logf("Scaling up MachineSet to create node.") + updatedReplica := originalReplica + 1 + scaleErr := ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%d", updatedReplica)) + o.Expect(scaleErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error scaling MachineSet %v to replica value %v.", machineSet.Name, updatedReplica)) + + // If we fail at this point, cleanup should include scaling the MachineSet replica back down to the + // original value, when needed (in the case where the replica value patch was successful). + defer func() { + cleanupErr := ScaleMachineSetDown(oc, machineSet, originalReplica, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error cleaning up cluster by scaling down MachineSet '%v'.", machineSet.Name)) + cleanupCompleted = true + }() + + // Get the new machine + framework.Logf("Getting the new machine.") + provisioningMachine, provisioningMachineErr := GetMachinesByPhase(machineClient, machineSet.Name, "Provisioning") + o.Expect(provisioningMachineErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Cannot find provisioning machine in MachineSet %v", machineSet.Name)) + newMachineName = provisioningMachine.Name + + // If we fail past this point, cleanup should include scaling the MachineSet replica back down to the + // original value and ensuring that the newly provisioned Machine is deleted. + defer func() { + cleanupErr := CleanupProvisionedMachine(oc, machineClient, machineSet.Name, originalReplica, newMachineName, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error removing provisioned Machine '%v' by scaling down MachineSet '%v'.", newMachineName, machineSet.Name)) + cleanupCompleted = true + }() + + // Annotate the machine so it is deleted on the MachineSet scale down + framework.Logf("Updating delete-machine annotation on Machine '%v' to be 'true'.", newMachineName) + deleteAnnotationErr := UpdateDeleteMachineAnnotation(oc, newMachineName) + o.Expect(deleteAnnotationErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error updating delete-machine annotation for machine '%v'.", newMachineName)) + + // Wait for new Machine to be ready + framework.Logf("Waiting for new machine %v to be ready.", newMachineName) + WaitForMachineInState(machineClient, newMachineName, "Running") + + // Get the new node + framework.Logf("Getting new node in machine %v.", newMachineName) + newNode, nodeErr := GetNewReadyNodeInMachine(oc, newMachineName) + o.Expect(nodeErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Cannot find provisioning node in Machine %v", newMachineName)) + framework.Logf("Got new node: %v.", newNode.Name) + + // If we fail past this point, cleanup should include scaling the MachineSet replica back down to the + // original value and ensuring that the newly created Node is deleted. + defer func() { + cleanupErr := CleanupCreatedNode(oc, newMachineName, originalReplica, newNode.Name, cleanupCompleted) + o.Expect(cleanupErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error removing created Node '%v' by scaling down MachineSet '%v'.", newNode.Name, machineSet.Name)) + cleanupCompleted = true + }() + + // Validate new MCN + validMCNErr := WaitForValidMCNProperties(clientSet, newNode) + o.Expect(validMCNErr).NotTo(o.HaveOccurred(), fmt.Sprintf("MCN for node '%v' has invalid properties.", newNode)) + + // Scale down the MachineSet to delete the created node + framework.Logf("Scaling down MachineSet to delete node.") + scaleErr = ScaleMachineSet(oc, machineSet.Name, fmt.Sprintf("%v", originalReplica)) + o.Expect(scaleErr).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting node by scaling MachineSet %v to replica value %v.", machineSet.Name, originalReplica)) + + // Wait for created node to delete + framework.Logf("Waiting for node '%v' to be deleted.", newNode.Name) + o.Expect(WaitForNodeToBeDeleted(oc, newNode.Name), fmt.Sprintf("Error deleting node '%v'.", newNode.Name)) + + // Check that corresponding MCN is removed alongside node + o.Expect(WaitForMCNToBeDeleted(clientSet, newNode.Name), fmt.Sprintf("Error deleting MCN '%v'.", newNode.Name)) + + // If we successfully make it here, no cleanup is required + cleanupCompleted = true +} diff --git a/test/extended/testdata/bindata.go b/test/extended/testdata/bindata.go index ed0f5791d68b..4a61c3e7d9e5 100644 --- a/test/extended/testdata/bindata.go +++ b/test/extended/testdata/bindata.go @@ -419,6 +419,10 @@ // test/extended/testdata/ldap/ldapserver-service.yaml // test/extended/testdata/long_names/Dockerfile // test/extended/testdata/long_names/fixture.json +// test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml +// test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml +// test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml +// test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-all.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml // test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml @@ -49020,6 +49024,133 @@ func testExtendedTestdataLong_namesFixtureJson() (*asset, error) { return a, nil } +var _testExtendedTestdataMachine_configMachineconfig0MasterMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 90-master-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test +`) + +func testExtendedTestdataMachine_configMachineconfig0MasterMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig0MasterMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig0MasterMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 91-master-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core +`) + +func testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 91-worker-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core +`) + +func testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml = []byte(`apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: infra +spec: + machineConfigSelector: + matchExpressions: + - {key: machineconfiguration.openshift.io/role, operator: In, values: [worker,infra]} + nodeSelector: + matchLabels: + node-role.kubernetes.io/infra: "" +`) + +func testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYamlBytes() ([]byte, error) { + return _testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, nil +} + +func testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml() (*asset, error) { + bytes, err := testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYamlBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + var _testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml = []byte(`apiVersion: operator.openshift.io/v1 kind: MachineConfiguration metadata: @@ -55774,6 +55905,10 @@ var _bindata = map[string]func() (*asset, error){ "test/extended/testdata/ldap/ldapserver-service.yaml": testExtendedTestdataLdapLdapserverServiceYaml, "test/extended/testdata/long_names/Dockerfile": testExtendedTestdataLong_namesDockerfile, "test/extended/testdata/long_names/fixture.json": testExtendedTestdataLong_namesFixtureJson, + "test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml": testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, + "test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml": testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, + "test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml": testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, + "test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml": testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-all.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-empty.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, "test/extended/testdata/machine_config/machineconfigurations/managedbootimages-none.yaml": testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesNoneYaml, @@ -56520,6 +56655,14 @@ var _bintree = &bintree{nil, map[string]*bintree{ "fixture.json": {testExtendedTestdataLong_namesFixtureJson, map[string]*bintree{}}, }}, "machine_config": {nil, map[string]*bintree{ + "machineconfig": {nil, map[string]*bintree{ + "0-master-mc.yaml": {testExtendedTestdataMachine_configMachineconfig0MasterMcYaml, map[string]*bintree{}}, + "1-master-invalid-mc.yaml": {testExtendedTestdataMachine_configMachineconfig1MasterInvalidMcYaml, map[string]*bintree{}}, + "1-worker-invalid-mc.yaml": {testExtendedTestdataMachine_configMachineconfig1WorkerInvalidMcYaml, map[string]*bintree{}}, + }}, + "machineconfigpool": {nil, map[string]*bintree{ + "infra-mcp.yaml": {testExtendedTestdataMachine_configMachineconfigpoolInfraMcpYaml, map[string]*bintree{}}, + }}, "machineconfigurations": {nil, map[string]*bintree{ "managedbootimages-all.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesAllYaml, map[string]*bintree{}}, "managedbootimages-empty.yaml": {testExtendedTestdataMachine_configMachineconfigurationsManagedbootimagesEmptyYaml, map[string]*bintree{}}, diff --git a/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml b/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml new file mode 100644 index 000000000000..b52938475f63 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/0-master-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 90-master-testfile +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core/test diff --git a/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml b/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml new file mode 100644 index 000000000000..9ca3c4b90859 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/1-master-invalid-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: 91-master-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core diff --git a/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml b/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml new file mode 100644 index 000000000000..489717e42fed --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfig/1-worker-invalid-mc.yaml @@ -0,0 +1,16 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: worker + name: 91-worker-testfile-invalid +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + source: data:,hello%20world%0A + mode: 420 + path: /home/core diff --git a/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml b/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml new file mode 100644 index 000000000000..afc03a26c2e2 --- /dev/null +++ b/test/extended/testdata/machine_config/machineconfigpool/infra-mcp.yaml @@ -0,0 +1,11 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfigPool +metadata: + name: infra +spec: + machineConfigSelector: + matchExpressions: + - {key: machineconfiguration.openshift.io/role, operator: In, values: [worker,infra]} + nodeSelector: + matchLabels: + node-role.kubernetes.io/infra: "" diff --git a/test/extended/util/annotate/generated/zz_generated.annotations.go b/test/extended/util/annotate/generated/zz_generated.annotations.go index 49c3b948a50b..1a36cd9e4a32 100644 --- a/test/extended/util/annotate/generated/zz_generated.annotations.go +++ b/test/extended/util/annotate/generated/zz_generated.annotations.go @@ -1359,6 +1359,14 @@ var Annotations = map[string]string{ "[sig-kubevirt] services when running openshift cluster on KubeVirt virtual machines should allow direct connections to pods from guest cluster pod in pod network across different guest nodes": " [Suite:openshift/conformance/parallel]", + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]": "", + + "[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]": "", + "[sig-mco][OCPFeatureGate:ManagedBootImagesAWS][Serial] Should degrade on a MachineSet with an OwnerReference [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", "[sig-mco][OCPFeatureGate:ManagedBootImagesAWS][Serial] Should not update boot images on any MachineSet when not configured [apigroup:machineconfiguration.openshift.io]": " [Suite:openshift/conformance/serial]", diff --git a/zz_generated.manifests/test-reporting.yaml b/zz_generated.manifests/test-reporting.yaml index ecaae900dc10..14ac8cc2c2cb 100644 --- a/zz_generated.manifests/test-reporting.yaml +++ b/zz_generated.manifests/test-reporting.yaml @@ -98,6 +98,16 @@ spec: - testName: '[sig-imageregistry][OCPFeatureGate:ImageStreamImportMode][Serial] ImageStream API import mode should be PreserveOriginal or Legacy depending on desired.architecture field in the CV [apigroup:image.openshift.io]' + - featureGate: MachineConfigNodes + tests: + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should have + MCN properties matching associated node properties [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial]Should properly + transition through MCN conditions on node update [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should + properly create and remove MCN on node creation and deletion [apigroup:machineconfiguration.openshift.io]' + - testName: '[sig-mco][OCPFeatureGate:MachineConfigNodes] [Serial][Slow]Should + properly report MCN conditions on node degrade [apigroup:machineconfiguration.openshift.io]' - featureGate: ManagedBootImages tests: - testName: '[sig-mco][OCPFeatureGate:ManagedBootImages][Serial] Should degrade From 8bd781e5f7eb0f817df402e5b446ee1918224d1d Mon Sep 17 00:00:00 2001 From: Pablo Rodriguez Nava Date: Fri, 21 Mar 2025 17:10:20 +0100 Subject: [PATCH 3/3] MCO-1599: Add scope and impersonation e2e tests This commits ports from the MCO repo the scope and impersonation tests already existing for the MCN feature. --- test/extended/machine_config/helpers.go | 83 +++++++++++++++++++ .../machine_config/machine_config_node.go | 52 ++++++++++++ 2 files changed, 135 insertions(+) diff --git a/test/extended/machine_config/helpers.go b/test/extended/machine_config/helpers.go index 2467ee856184..85720c47c369 100644 --- a/test/extended/machine_config/helpers.go +++ b/test/extended/machine_config/helpers.go @@ -5,7 +5,11 @@ import ( "encoding/json" "errors" "fmt" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/kubernetes" "math/rand" + "os" + "os/exec" "strings" "time" @@ -1068,3 +1072,82 @@ func WaitForMachineToBeDeleted(machineClient *machineclient.Clientset, machineNa }, 10*time.Minute, 5*time.Second).Should(o.BeTrue()) return nil } + +// ExecCmdOnNodeWithError behaves like ExecCmdOnNode, with the exception that +// any errors are returned to the caller for inspection. This allows one to +// execute a command that is expected to fail; e.g., stat /nonexistant/file. +func ExecCmdOnNodeWithError(oc *exutil.CLI, node corev1.Node, subArgs ...string) (string, error) { + cmd, err := execCmdOnNode(oc, node, subArgs...) + if err != nil { + return "", err + } + + out, err := cmd.CombinedOutput() + return string(out), err +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func ExecCmdOnNode(oc *exutil.CLI, node corev1.Node, subArgs ...string) string { + cmd, err := execCmdOnNode(oc, node, subArgs...) + o.Expect(err).NotTo(o.HaveOccurred(), "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + cmd.Stderr = os.Stderr + + out, err := cmd.Output() + if err != nil { + // common err is that the mcd went down mid cmd. Re-try for good measure + cmd, err = execCmdOnNode(oc, node, subArgs...) + o.Expect(err).NotTo(o.HaveOccurred(), "could not prepare to exec cmd %v on node %s: %s", subArgs, node.Name, err) + out, err = cmd.Output() + + } + o.Expect(err).NotTo(o.HaveOccurred(), "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) + return string(out) +} + +// ExecCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node +// all commands should use /rootfs as root +func execCmdOnNode(oc *exutil.CLI, node corev1.Node, subArgs ...string) (*exec.Cmd, error) { + // Check for an oc binary in $PATH. + path, err := exec.LookPath("oc") + if err != nil { + return nil, fmt.Errorf("could not locate oc command: %w", err) + } + + mcd, err := mcdForNode(oc.AsAdmin().KubeClient(), &node) + if err != nil { + return nil, fmt.Errorf("could not get MCD for node %s: %w", node.Name, err) + } + + mcdName := mcd.ObjectMeta.Name + + entryPoint := path + args := []string{"rsh", + "-n", "openshift-machine-config-operator", + "-c", "machine-config-daemon", + mcdName} + args = append(args, subArgs...) + + cmd := exec.Command(entryPoint, args...) + return cmd, nil +} + +func mcdForNode(client kubernetes.Interface, node *corev1.Node) (*corev1.Pod, error) { + // find the MCD pod that has spec.nodeNAME = node.Name and get its name: + listOptions := metav1.ListOptions{ + FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String(), + } + listOptions.LabelSelector = labels.SelectorFromSet(labels.Set{"k8s-app": "machine-config-daemon"}).String() + + mcdList, err := client.CoreV1().Pods("openshift-machine-config-operator").List(context.TODO(), listOptions) + if err != nil { + return nil, err + } + if len(mcdList.Items) != 1 { + if len(mcdList.Items) == 0 { + return nil, fmt.Errorf("failed to find MCD for node %s", node.Name) + } + return nil, fmt.Errorf("too many (%d) MCDs for node %s", len(mcdList.Items), node.Name) + } + return &mcdList.Items[0], nil +} diff --git a/test/extended/machine_config/machine_config_node.go b/test/extended/machine_config/machine_config_node.go index fbcafb887a8e..2dbcd8cb6c0f 100644 --- a/test/extended/machine_config/machine_config_node.go +++ b/test/extended/machine_config/machine_config_node.go @@ -1,8 +1,10 @@ package machine_config import ( + "bytes" "context" "fmt" + "os/exec" "path/filepath" "time" @@ -61,6 +63,18 @@ var _ = g.Describe("[sig-mco][OCPFeatureGate:MachineConfigNodes]", func() { skipOnSingleNodeTopology(oc) //skip this test for SNO ValidateMCNOnNodeCreationAndDeletion(oc) }) + + g.It("Should properly block MCN updates from a MCD that is not the associated one [apigroup:machineconfiguration.openshift.io]", func() { + ValidateMCNScopeSadPathTest(oc) + }) + + g.It("Should properly block MCN updates by impersonation of the MCD SA [apigroup:machineconfiguration.openshift.io]", func() { + ValidateMCNScopeImpersonationPathTest(oc) + }) + + g.It("Should properly update the MCN from the associated MCD [apigroup:machineconfiguration.openshift.io]", func() { + ValidateMCNScopeHappyPathTest(oc) + }) }) // `ValidateMCNProperties` checks that MCN properties match the corresponding node properties @@ -386,3 +400,41 @@ func ValidateMCNOnNodeCreationAndDeletion(oc *exutil.CLI) { // If we successfully make it here, no cleanup is required cleanupCompleted = true } + +func ValidateMCNScopeSadPathTest(oc *exutil.CLI) { + // Grab two random nodes from different pools, so we don't end up testing and targeting the same node. + nodeUnderTest := GetRandomNode(oc, "worker") + targetNode := GetRandomNode(oc, "master") + + // Attempt to patch the MCN owned by targetNode from nodeUnderTest's MCD. This should fail. + // This oc command effectively use the service account of the nodeUnderTest's MCD pod, which should only be able to edit nodeUnderTest's MCN. + cmdOutput, err := ExecCmdOnNodeWithError(oc, nodeUnderTest, "chroot", "/rootfs", "oc", "patch", "machineconfignodes", targetNode.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}") + + o.Expect(err).To(o.HaveOccurred()) + o.Expect(cmdOutput).To(o.ContainSubstring("updates to MCN " + targetNode.Name + " can only be done from the MCN's owner node")) +} + +func ValidateMCNScopeImpersonationPathTest(oc *exutil.CLI) { + // Grab a random node from the worker pool + nodeUnderTest := GetRandomNode(oc, "worker") + + var errb bytes.Buffer + // Attempt to patch the MCN owned by nodeUnderTest by impersonating the MCD SA. This should fail. + cmd := exec.Command("oc", "patch", "machineconfignodes", nodeUnderTest.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}", "--as=system:serviceaccount:openshift-machine-config-operator:machine-config-daemon") + cmd.Stderr = &errb + err := cmd.Run() + + o.Expect(err).To(o.HaveOccurred()) + o.Expect(errb.String()).To(o.ContainSubstring("this user must have a \"authentication.kubernetes.io/node-name\" claim")) + +} + +func ValidateMCNScopeHappyPathTest(oc *exutil.CLI) { + + // Grab a random node from the worker pool + nodeUnderTest := GetRandomNode(oc, "worker") + + // Attempt to patch the MCN owned by nodeUnderTest from nodeUnderTest's MCD. This should succeed. + // This oc command effectively use the service account of the nodeUnderTest's MCD pod, which should only be able to edit nodeUnderTest's MCN. + ExecCmdOnNode(oc, nodeUnderTest, "chroot", "/rootfs", "oc", "patch", "machineconfignodes", nodeUnderTest.Name, "--type=merge", "-p", "{\"spec\":{\"configVersion\":{\"desired\":\"rendered-worker-test\"}}}") +}