From d5c0a908ee057281420b78c073dc9b1125da37f0 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Tue, 17 Oct 2023 16:41:31 -0400 Subject: [PATCH] fix timing issues upon cluster installation fix degrading on /etc/docker/certs.d not existing as well as cconfig not existing Signed-off-by: Charlie Doern --- pkg/controller/common/constants.go | 5 + .../kubelet_config_controller.go | 13 +++ pkg/controller/render/render_controller.go | 13 +++ .../render/render_controller_test.go | 3 +- .../template/template_controller.go | 34 +----- pkg/daemon/certificate_writer.go | 2 +- pkg/helpers/helpers.go | 27 +++++ pkg/operator/operator.go | 2 +- pkg/operator/sync.go | 102 ++++++++++++++++++ .../k8s.io/code-generator/generate-groups.sh | 0 .../generate-internal-groups.sh | 0 11 files changed, 168 insertions(+), 33 deletions(-) mode change 100755 => 100644 vendor/k8s.io/code-generator/generate-groups.sh mode change 100755 => 100644 vendor/k8s.io/code-generator/generate-internal-groups.sh diff --git a/pkg/controller/common/constants.go b/pkg/controller/common/constants.go index 484543cca6..3859ae8217 100644 --- a/pkg/controller/common/constants.go +++ b/pkg/controller/common/constants.go @@ -1,5 +1,7 @@ package common +import "time" + const ( // MCONamespace is the namespace that should be used for all API objects owned by the MCO by default MCONamespace = "openshift-machine-config-operator" @@ -13,6 +15,9 @@ const ( // OSImageURLOverriddenKey is used to tag a rendered machineconfig when OSImageURL has been overridden from default using machineconfig OSImageURLOverriddenKey = "machineconfiguration.openshift.io/os-image-url-overridden" + ControllerConfigRolloutInterval = time.Second + ControllerConfigTimeout = 5 * time.Minute + // ControllerConfigName is the name of the ControllerConfig object that controllers use ControllerConfigName = "machine-config-controller" diff --git a/pkg/controller/kubelet-config/kubelet_config_controller.go b/pkg/controller/kubelet-config/kubelet_config_controller.go index 4e1ae634e6..6bfd511367 100644 --- a/pkg/controller/kubelet-config/kubelet_config_controller.go +++ b/pkg/controller/kubelet-config/kubelet_config_controller.go @@ -13,6 +13,7 @@ import ( ign3types "github.com/coreos/ignition/v2/config/v3_4/types" "github.com/imdario/mergo" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" macherrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -475,6 +476,18 @@ func (ctrl *Controller) syncKubeletConfig(key string) error { klog.V(4).Infof("Finished syncing kubeletconfig %q (%v)", key, time.Since(startTime)) }() + if err := wait.PollUntilContextTimeout(context.TODO(), ctrlcommon.ControllerConfigRolloutInterval, ctrlcommon.ControllerConfigTimeout, false, func(_ context.Context) (bool, error) { + _, err := ctrl.client.MachineconfigurationV1().ControllerConfigs().Get(context.TODO(), ctrlcommon.ControllerConfigName, metav1.GetOptions{}) + if err == nil { + return true, nil + } + if apierrors.IsNotFound(err) { + return false, nil + } + return false, nil + }); err != nil { + klog.Infof("Controller Config has not been created. Continuing %v", err) + } // Wait to apply a kubelet config if the controller config is not completed if err := apihelpers.IsControllerConfigCompleted(ctrlcommon.ControllerConfigName, ctrl.ccLister.Get); err != nil { return err diff --git a/pkg/controller/render/render_controller.go b/pkg/controller/render/render_controller.go index b405b0b7d8..ae1ab163e7 100644 --- a/pkg/controller/render/render_controller.go +++ b/pkg/controller/render/render_controller.go @@ -423,6 +423,19 @@ func (ctrl *Controller) syncMachineConfigPool(key string) error { return err } + if err := wait.PollUntilContextTimeout(context.TODO(), ctrlcommon.ControllerConfigRolloutInterval, ctrlcommon.ControllerConfigTimeout, false, func(_ context.Context) (bool, error) { + _, err := ctrl.client.MachineconfigurationV1().ControllerConfigs().Get(context.TODO(), ctrlcommon.ControllerConfigName, metav1.GetOptions{}) + if err == nil { + return true, nil + } + if apierrors.IsNotFound(err) { + return false, nil + } + return false, nil + }); err != nil { + klog.Infof("Controller Config has not been created. Continuing %v", err) + } + // TODO(runcom): add tests in render_controller_test.go for this condition if err := apihelpers.IsControllerConfigCompleted(ctrlcommon.ControllerConfigName, ctrl.ccLister.Get); err != nil { return err diff --git a/pkg/controller/render/render_controller_test.go b/pkg/controller/render/render_controller_test.go index 08206c63da..c49ec3723c 100644 --- a/pkg/controller/render/render_controller_test.go +++ b/pkg/controller/render/render_controller_test.go @@ -131,7 +131,7 @@ func (f *fixture) runController(mcpname string, expectError bool) { // checkAction verifies that expected and actual actions are equal and both have // same attached resources func checkAction(expected, actual core.Action, t *testing.T) { - if !(expected.Matches(actual.GetVerb(), actual.GetResource().Resource) && actual.GetSubresource() == expected.GetSubresource()) { + if !(expected.Matches(actual.GetVerb(), actual.GetResource().Resource) && actual.GetSubresource() == expected.GetSubresource() && actual.GetResource().Resource == "controllerconfigs") { t.Errorf("Expected\n\t%#v\ngot\n\t%#v", expected, actual) return } @@ -182,6 +182,7 @@ func filterInformerActions(actions []core.Action) []core.Action { (action.Matches("list", "machineconfigpools") || action.Matches("watch", "machineconfigpools") || action.Matches("list", "controllerconfigs") || + action.Matches("get", "controllerconfigs") || action.Matches("watch", "controllerconfigs") || action.Matches("list", "machineconfigs") || action.Matches("watch", "machineconfigs")) { diff --git a/pkg/controller/template/template_controller.go b/pkg/controller/template/template_controller.go index 9199bc45d6..aef8167113 100644 --- a/pkg/controller/template/template_controller.go +++ b/pkg/controller/template/template_controller.go @@ -3,9 +3,7 @@ package template import ( "bytes" "context" - "crypto/x509" "encoding/json" - "encoding/pem" "fmt" "os" "path/filepath" @@ -22,6 +20,7 @@ import ( "github.com/openshift/library-go/pkg/operator/configobserver/featuregates" mcoResourceApply "github.com/openshift/machine-config-operator/lib/resourceapply" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + "github.com/openshift/machine-config-operator/pkg/helpers" "k8s.io/klog/v2" corev1 "k8s.io/api/core/v1" @@ -439,7 +438,7 @@ func updateControllerConfigCerts(config *mcfgv1.ControllerConfig) bool { newImgCerts := []mcfgv1.ControllerCertificate{} newCtrlCerts := []mcfgv1.ControllerCertificate{} for i, cert := range certs { - certs := createNewCert(cert, names[i]) + certs := helpers.CreateNewCert(cert, names[i]) if len(certs) > 0 { modified = true newCtrlCerts = append(newCtrlCerts, certs...) @@ -447,7 +446,7 @@ func updateControllerConfigCerts(config *mcfgv1.ControllerConfig) bool { } for _, entry := range config.Spec.ImageRegistryBundleData { names = append(names, entry.File) - certs := createNewCert(entry.Data, entry.File) + certs := helpers.CreateNewCert(entry.Data, entry.File) if len(certs) > 0 { modified = true newImgCerts = append(newImgCerts, certs...) @@ -455,7 +454,7 @@ func updateControllerConfigCerts(config *mcfgv1.ControllerConfig) bool { } for _, entry := range config.Spec.ImageRegistryBundleUserData { names = append(names, entry.File) - certs := createNewCert(entry.Data, entry.File) + certs := helpers.CreateNewCert(entry.Data, entry.File) if len(certs) > 0 { modified = true newImgCerts = append(newImgCerts, certs...) @@ -485,31 +484,6 @@ func updateControllerConfigCerts(config *mcfgv1.ControllerConfig) bool { return modified } -func createNewCert(cert []byte, name string) []mcfgv1.ControllerCertificate { - certs := []mcfgv1.ControllerCertificate{} - for len(cert) > 0 { - b, next := pem.Decode(cert) - if b == nil { - klog.Infof("Unable to decode cert %s into a pem block. Cert is either empty or invalid.", string(cert)) - break - } - c, err := x509.ParseCertificate(b.Bytes) - if err != nil { - klog.Infof("Malformed Cert, not syncing") - continue - } - cert = next - certs = append(certs, mcfgv1.ControllerCertificate{ - Subject: c.Subject.String(), - Signer: c.Issuer.String(), - BundleFile: name, - NotBefore: &metav1.Time{Time: c.NotBefore}, - NotAfter: &metav1.Time{Time: c.NotAfter}, - }) - } - return certs -} - // syncControllerConfig will sync the controller config with the given key. // This function is not meant to be invoked concurrently with the same key. func (ctrl *Controller) syncControllerConfig(key string) error { diff --git a/pkg/daemon/certificate_writer.go b/pkg/daemon/certificate_writer.go index 373595e777..6d60f5a0ce 100644 --- a/pkg/daemon/certificate_writer.go +++ b/pkg/daemon/certificate_writer.go @@ -137,7 +137,7 @@ func (dn *Daemon) syncControllerConfigHandler(key string) error { mergedData := append(controllerConfig.Spec.ImageRegistryBundleData, controllerConfig.Spec.ImageRegistryBundleUserData...) entries, err := os.ReadDir("/etc/docker/certs.d") - if err != nil { + if err != nil && !os.IsNotExist(err) { return err } diff --git a/pkg/helpers/helpers.go b/pkg/helpers/helpers.go index 3b00e1f543..37dc5cc440 100644 --- a/pkg/helpers/helpers.go +++ b/pkg/helpers/helpers.go @@ -1,6 +1,8 @@ package helpers import ( + "crypto/x509" + "encoding/pem" "fmt" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" @@ -160,3 +162,28 @@ func ListPools(node *corev1.Node, mcpLister v1.MachineConfigPoolLister) (*mcfgv1 return master, worker, custom, nil } + +func CreateNewCert(cert []byte, name string) []mcfgv1.ControllerCertificate { + certs := []mcfgv1.ControllerCertificate{} + for len(cert) > 0 { + b, next := pem.Decode(cert) + if b == nil { + klog.Infof("Unable to decode cert %s into a pem block. Cert is either empty or invalid.", string(cert)) + break + } + c, err := x509.ParseCertificate(b.Bytes) + if err != nil { + klog.Infof("Malformed Cert, not syncing") + continue + } + cert = next + certs = append(certs, mcfgv1.ControllerCertificate{ + Subject: c.Subject.String(), + Signer: c.Issuer.String(), + BundleFile: name, + NotBefore: &metav1.Time{Time: c.NotBefore}, + NotAfter: &metav1.Time{Time: c.NotAfter}, + }) + } + return certs +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 1562b52402..8a257cc3aa 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -416,10 +416,10 @@ func (optr *Operator) sync(key string) error { // "RenderConfig" must always run first as it sets the renderConfig in the operator // for the sync funcs below {"RenderConfig", optr.syncRenderConfig}, + {"MachineConfigController", optr.syncMachineConfigController}, {"MachineConfigNode", optr.syncMachineConfigNodes}, {"MachineConfigPools", optr.syncMachineConfigPools}, {"MachineConfigDaemon", optr.syncMachineConfigDaemon}, - {"MachineConfigController", optr.syncMachineConfigController}, {"MachineConfigServer", optr.syncMachineConfigServer}, {"MachineOSBuilder", optr.syncMachineOSBuilder}, // this check must always run last since it makes sure the pools are in sync/upgrading correctly diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 99a5006b79..e0ac02d463 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -1,6 +1,7 @@ package operator import ( + "bytes" "context" "crypto/x509" "encoding/base64" @@ -18,6 +19,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -46,6 +48,7 @@ import ( ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" templatectrl "github.com/openshift/machine-config-operator/pkg/controller/template" daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" + "github.com/openshift/machine-config-operator/pkg/helpers" "github.com/openshift/machine-config-operator/pkg/server" "github.com/openshift/machine-config-operator/pkg/upgrademonitor" "github.com/openshift/machine-config-operator/pkg/version" @@ -624,6 +627,11 @@ func (optr *Operator) syncCustomResourceDefinitions() error { } for _, crd := range crds { + dne := false + current, err := optr.ccLister.Get("machine-config-controller") + if apierrors.IsNotFound(err) { + dne = true + } crdBytes, err := manifests.ReadFile(crd) if err != nil { @@ -639,11 +647,105 @@ func (optr *Operator) syncCustomResourceDefinitions() error { return err } } + + if strings.Contains(crd, "controllerconfig") { + currentCR, err := optr.apiExtClient.ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), "controllerconfigs.machineconfiguration.openshift.io", metav1.GetOptions{}) + if err != nil && !os.IsNotExist(err) { + klog.Errorf("Error getting controller config CRD. Cluster might be initializing: %v", err) + } + if currentCR == nil || !equality.Semantic.DeepEqual(c.Spec, currentCR.Spec) || !equality.Semantic.DeepEqual(c.Status, currentCR.Status) || dne { + // we have changes. And need to regen the CR immediately or else we face validation errors. + // right now, the only changes we know of that we support is the controller certificate migration. + newCC, err := optr.client.MachineconfigurationV1().ControllerConfigs().Get(context.TODO(), "machine-config-controller", metav1.GetOptions{}) + if err != nil { + if os.IsNotExist(err) || apierrors.IsNotFound(err) { + continue + } + return err + } + // if this is a new cconfig or data has changed + if dne || anyCertDataDiffers(current.Spec, newCC.Spec) { + names := []string{ + "KubeAPIServerServingCAData", "CloudProviderCAData", "RootCAData", "AdditionalTrustBundle", + } + + certs := [][]byte{ + newCC.Spec.KubeAPIServerServingCAData, + newCC.Spec.CloudProviderCAData, + newCC.Spec.RootCAData, + newCC.Spec.AdditionalTrustBundle, + } + + // update existing information ONLY, we are not here to generate new entries. + newCerts := []mcfgv1.ControllerCertificate{} + for i, certName := range names { + for _, cert := range newCC.Status.ControllerCertificates { + if cert.BundleFile == certName { + newCerts = append(newCerts, helpers.CreateNewCert(certs[i], certName)...) + } + } + } + + for _, entry := range newCC.Spec.ImageRegistryBundleData { + for _, cert := range newCC.Status.ControllerCertificates { + if cert.BundleFile == entry.File { + newCerts = append(newCerts, helpers.CreateNewCert(entry.Data, entry.File)...) + } + } + } + + for _, entry := range newCC.Spec.ImageRegistryBundleUserData { + for _, cert := range newCC.Status.ControllerCertificates { + if cert.BundleFile == entry.File { + newCerts = append(newCerts, helpers.CreateNewCert(entry.Data, entry.File)...) + } + } + } + + newCC.Status.ControllerCertificates = newCerts + + _, err = optr.client.MachineconfigurationV1().ControllerConfigs().Update(context.TODO(), newCC, metav1.UpdateOptions{}) + if err != nil { + return err + } + } + } + } } return nil } +func anyCertDataDiffers(oldSpec, spec mcfgv1.ControllerConfigSpec) bool { + if !bytes.Equal(spec.KubeAPIServerServingCAData, oldSpec.KubeAPIServerServingCAData) || bytes.Equal(spec.CloudProviderCAData, oldSpec.CloudProviderCAData) || !bytes.Equal(spec.AdditionalTrustBundle, oldSpec.AdditionalTrustBundle) || !bytes.Equal(spec.RootCAData, oldSpec.RootCAData) { + return true + } + + if len(spec.ImageRegistryBundleData) != len(oldSpec.ImageRegistryBundleData) || len(spec.ImageRegistryBundleUserData) != len(oldSpec.ImageRegistryBundleUserData) { + return true + } + // for each new cert, see if it already existed + for _, cert := range spec.ImageRegistryBundleData { + for _, oldData := range oldSpec.ImageRegistryBundleData { + // if this is the same cert, with new data then return true + if oldData.File == cert.File && !bytes.Equal(cert.Data, oldData.Data) { + return true + } + } + } + // for each new cert, see if it already existed + for _, cert := range spec.ImageRegistryBundleUserData { + for _, oldData := range oldSpec.ImageRegistryBundleUserData { + // if this is the same cert, with new data then return true + if oldData.File == cert.File && !bytes.Equal(cert.Data, oldData.Data) { + return true + } + } + } + + return false +} + func (optr *Operator) syncMachineConfigPools(config *renderConfig) error { mcps := []string{ "manifests/master.machineconfigpool.yaml", diff --git a/vendor/k8s.io/code-generator/generate-groups.sh b/vendor/k8s.io/code-generator/generate-groups.sh old mode 100755 new mode 100644 diff --git a/vendor/k8s.io/code-generator/generate-internal-groups.sh b/vendor/k8s.io/code-generator/generate-internal-groups.sh old mode 100755 new mode 100644