diff --git a/internal/central/pkg/services/addon.go b/internal/central/pkg/services/addon.go index 36fb408353..03c2f5c5eb 100644 --- a/internal/central/pkg/services/addon.go +++ b/internal/central/pkg/services/addon.go @@ -98,7 +98,7 @@ func (p *AddonProvisioner) Provision(cluster api.Cluster, dataplaneClusterConfig for _, installedAddon := range installedAddons { // addon is installed on the cluster but not present in gitops config - uninstall it - errs = append(errs, p.uninstallAddon(cluster.ClusterID, installedAddon.ID)) + errs = append(errs, p.uninstallAddon(cluster.ClusterID, dataplaneClusterConfig.ClusterName, installedAddon.ID)) p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, cluster.ClusterID, metrics.AddonHealthy) } @@ -127,7 +127,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan if addonErr != nil { if addonErr.Is404() { // addon does not exist, install it - errs = append(errs, p.installAddon(clusterID, expectedConfig)) + errs = append(errs, p.installAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig)) status = metrics.AddonUpgrade } else { errs = append(errs, fmt.Errorf("failed to get addon %s: %w", expectedConfig.ID, addonErr)) @@ -140,7 +140,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan return } if gitOpsConfigDifferent(expectedConfig, installedInOCM) { - errs = append(errs, p.updateAddon(clusterID, expectedConfig)) + errs = append(errs, p.updateAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig)) status = metrics.AddonUpgrade return } @@ -156,7 +156,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan return } if clusterInstallationDifferent(installedOnCluster, versionInstalledInOCM) { - errs = append(errs, p.updateAddon(clusterID, expectedConfig)) + errs = append(errs, p.updateAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig)) status = metrics.AddonUpgrade } else { glog.V(10).Infof("Addon %s is already up-to-date", installedOnCluster.ID) @@ -234,7 +234,7 @@ func (p *AddonProvisioner) getInstalledAddons(cluster api.Cluster) (map[string]d return result, nil } -func (p *AddonProvisioner) installAddon(clusterID string, config gitops.AddonConfig) error { +func (p *AddonProvisioner) installAddon(clusterID string, clusterName string, config gitops.AddonConfig) error { addonInstallation, err := p.newInstallation(config) if err != nil { return err @@ -242,7 +242,7 @@ func (p *AddonProvisioner) installAddon(clusterID string, config gitops.AddonCon if err = p.ocmClient.CreateAddonInstallation(clusterID, addonInstallation); err != nil { return fmt.Errorf("create addon %s in ocm: %w", config.ID, err) } - glog.V(5).Infof("Addon %s has been installed on the cluster %s", config.ID, clusterID) + glog.V(5).Infof("Addon %s has been installed on the cluster %s (%s)", config.ID, clusterID, clusterName) return nil } @@ -261,9 +261,9 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters return installation, nil } -func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error { +func (p *AddonProvisioner) updateAddon(clusterID string, clusterName string, config gitops.AddonConfig) error { if p.backoffUpgradeRequest(config.ID, clusterID) { - glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID) + glog.V(5).Infof("update addon request backoff for cluster: %s (%s)", clusterID, clusterName) return nil } @@ -275,7 +275,7 @@ func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConf if err := p.ocmClient.UpdateAddonInstallation(clusterID, update); err != nil { return fmt.Errorf("update addon %s: %w", update.ID(), err) } - glog.V(5).Infof("Addon %s has been updated on the cluster %s", config.ID, clusterID) + glog.V(5).Infof("Addon %s has been updated on the cluster %s (%s)", config.ID, clusterID, clusterName) return nil } @@ -284,11 +284,11 @@ func (p *AddonProvisioner) backoffUpgradeRequest(addonID string, clusterID strin return p.lastStatusPerInstall[id] != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff } -func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error { +func (p *AddonProvisioner) uninstallAddon(clusterID string, clusterName string, addonID string) error { if err := p.ocmClient.DeleteAddonInstallation(clusterID, addonID); err != nil { return fmt.Errorf("uninstall addon %s: %w", addonID, err) } - glog.V(5).Infof("Addon %s has been uninstalled from the cluster %s", addonID, clusterID) + glog.V(5).Infof("Addon %s has been uninstalled from the cluster %s (%s)", addonID, clusterID, clusterName) return nil } diff --git a/internal/central/pkg/services/clusters.go b/internal/central/pkg/services/clusters.go index c71092928d..734d819e41 100644 --- a/internal/central/pkg/services/clusters.go +++ b/internal/central/pkg/services/clusters.go @@ -8,6 +8,7 @@ import ( "github.com/stackrox/acs-fleet-manager/internal/central/pkg/api/dbapi" "github.com/stackrox/acs-fleet-manager/internal/central/pkg/clusters" "github.com/stackrox/acs-fleet-manager/internal/central/pkg/clusters/types" + "github.com/stackrox/acs-fleet-manager/internal/central/pkg/config" "github.com/golang/glog" "github.com/stackrox/acs-fleet-manager/pkg/metrics" @@ -63,15 +64,17 @@ type ClusterService interface { } type clusterService struct { - connectionFactory *db.ConnectionFactory - providerFactory clusters.ProviderFactory + connectionFactory *db.ConnectionFactory + providerFactory clusters.ProviderFactory + dataplaneClusterConfig *config.DataplaneClusterConfig } // NewClusterService creates a new client for the OSD Cluster Service -func NewClusterService(connectionFactory *db.ConnectionFactory, providerFactory clusters.ProviderFactory) ClusterService { +func NewClusterService(connectionFactory *db.ConnectionFactory, providerFactory clusters.ProviderFactory, dataplaneClusterConfig *config.DataplaneClusterConfig) ClusterService { return &clusterService{ - connectionFactory: connectionFactory, - providerFactory: providerFactory, + connectionFactory: connectionFactory, + providerFactory: providerFactory, + dataplaneClusterConfig: dataplaneClusterConfig, } } @@ -331,7 +334,11 @@ func (c clusterService) DeleteByClusterID(clusterID string) *apiErrors.ServiceEr return apiErrors.NewWithCause(apiErrors.ErrorGeneral, err, "Unable to delete cluster with cluster_id %s", clusterID) } - glog.Infof("Cluster %s deleted successful", clusterID) + clusterName := "" + if c.dataplaneClusterConfig != nil { + clusterName = c.dataplaneClusterConfig.FindClusterNameByClusterID(clusterID) + } + glog.Infof("Cluster %s (%s) deleted successful", clusterID, clusterName) metrics.IncreaseClusterSuccessOperationsCountMetric(constants.ClusterOperationDelete) return nil } diff --git a/internal/central/pkg/services/data_plane_central.go b/internal/central/pkg/services/data_plane_central.go index 8ea5001e0e..208faa7dd9 100644 --- a/internal/central/pkg/services/data_plane_central.go +++ b/internal/central/pkg/services/data_plane_central.go @@ -10,6 +10,7 @@ import ( "github.com/pkg/errors" "github.com/stackrox/acs-fleet-manager/internal/central/constants" "github.com/stackrox/acs-fleet-manager/internal/central/pkg/api/dbapi" + "github.com/stackrox/acs-fleet-manager/internal/central/pkg/config" "github.com/stackrox/acs-fleet-manager/pkg/api" "github.com/stackrox/acs-fleet-manager/pkg/db" serviceError "github.com/stackrox/acs-fleet-manager/pkg/errors" @@ -35,9 +36,10 @@ type DataPlaneCentralService interface { } type dataPlaneCentralService struct { - centralService CentralService - clusterService ClusterService - connectionFactory *db.ConnectionFactory + centralService CentralService + clusterService ClusterService + connectionFactory *db.ConnectionFactory + dataplaneClusterConfig *config.DataplaneClusterConfig } // NewDataPlaneCentralService ... @@ -45,11 +47,13 @@ func NewDataPlaneCentralService( centralSrv CentralService, clusterSrv ClusterService, connectionFactory *db.ConnectionFactory, + dataplaneClusterConfig *config.DataplaneClusterConfig, ) DataPlaneCentralService { return &dataPlaneCentralService{ - centralService: centralSrv, - clusterService: clusterSrv, - connectionFactory: connectionFactory, + centralService: centralSrv, + clusterService: clusterSrv, + connectionFactory: connectionFactory, + dataplaneClusterConfig: dataplaneClusterConfig, } } @@ -167,7 +171,11 @@ func (s *dataPlaneCentralService) setCentralClusterFailed(centralRequest *dbapi. metrics.UpdateCentralRequestsStatusSinceCreatedMetric(constants.CentralRequestStatusFailed, centralRequest.ID, centralRequest.ClusterID, time.Since(centralRequest.CreatedAt)) metrics.IncreaseCentralTotalOperationsCountMetric(constants.CentralOperationCreate) } - logger.Logger.Errorf("Central status for Central ID '%s' in ClusterID '%s' reported as failed by Fleet Shard Operator: '%s'", centralRequest.ID, centralRequest.ClusterID, errMessage) + clusterName := "" + if s.dataplaneClusterConfig != nil { + clusterName = s.dataplaneClusterConfig.FindClusterNameByClusterID(centralRequest.ClusterID) + } + logger.Logger.Errorf("Central status for Central ID '%s' in ClusterID '%s' (%s) reported as failed by Fleet Shard Operator: '%s'", centralRequest.ID, centralRequest.ClusterID, clusterName, errMessage) return nil } diff --git a/internal/central/pkg/workers/clusters_mgr.go b/internal/central/pkg/workers/clusters_mgr.go index a7aa0b9327..2f571c36f4 100644 --- a/internal/central/pkg/workers/clusters_mgr.go +++ b/internal/central/pkg/workers/clusters_mgr.go @@ -189,7 +189,8 @@ func (c *ClusterManager) processDeprovisioningClusters() []error { for i := range deprovisioningClusters { cluster := deprovisioningClusters[i] - glog.V(10).Infof("deprovision cluster ClusterID = %s", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.V(10).Infof("deprovision cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName) metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterDeprovisioning) if err := c.reconcileDeprovisioningCluster(&cluster); err != nil { errs = append(errs, errors.Wrapf(err, "failed to reconcile deprovisioning cluster %s", cluster.ID)) @@ -211,7 +212,8 @@ func (c *ClusterManager) processCleanupClusters() []error { } for _, cluster := range cleanupClusters { - glog.V(10).Infof("cleanup cluster ClusterID = %s", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.V(10).Infof("cleanup cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName) metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterCleanup) if err := c.reconcileCleanupCluster(cluster); err != nil { errs = append(errs, errors.Wrapf(err, "failed to reconcile cleanup cluster %s", cluster.ID)) @@ -234,7 +236,8 @@ func (c *ClusterManager) processAcceptedClusters() []error { for i := range acceptedClusters { cluster := acceptedClusters[i] - glog.V(10).Infof("accepted cluster ClusterID = %s", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.V(10).Infof("accepted cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName) metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterAccepted) if err := c.reconcileAcceptedCluster(&cluster); err != nil { errs = append(errs, errors.Wrapf(err, "failed to reconcile accepted cluster %s", cluster.ID)) @@ -258,7 +261,8 @@ func (c *ClusterManager) processProvisioningClusters() []error { // process each local pending cluster and compare to the underlying ocm cluster for i := range provisioningClusters { provisioningCluster := provisioningClusters[i] - glog.V(10).Infof("provisioning cluster ClusterID = %s", provisioningCluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(provisioningCluster.ClusterID) + glog.V(10).Infof("provisioning cluster ClusterID = %s (%s)", provisioningCluster.ClusterID, clusterName) metrics.UpdateClusterStatusSinceCreatedMetric(provisioningCluster, api.ClusterProvisioning) _, err := c.reconcileClusterStatus(&provisioningCluster) if err != nil { @@ -285,7 +289,8 @@ func (c *ClusterManager) processProvisionedClusters() []error { // process each local provisioned cluster and apply necessary terraforming for _, provisionedCluster := range provisionedClusters { - glog.V(10).Infof("provisioned cluster ClusterID = %s", provisionedCluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(provisionedCluster.ClusterID) + glog.V(10).Infof("provisioned cluster ClusterID = %s (%s)", provisionedCluster.ClusterID, clusterName) metrics.UpdateClusterStatusSinceCreatedMetric(provisionedCluster, api.ClusterProvisioned) err := c.reconcileProvisionedCluster(provisionedCluster) if err != nil { @@ -371,7 +376,8 @@ func (c *ClusterManager) reconcileDeprovisioningCluster(cluster *api.Cluster) er } // cluster has been removed from cluster service. Mark it for cleanup - glog.Infof("Cluster %s has been removed from cluster service.", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.Infof("Cluster %s (%s) has been removed from cluster service.", cluster.ClusterID, clusterName) updateStatusErr := c.ClusterService.UpdateStatus(*cluster, api.ClusterCleanup) if updateStatusErr != nil { return errors.Wrapf(updateStatusErr, "Failed to update deprovisioning cluster %s status to 'cleanup'", cluster.ClusterID) @@ -381,9 +387,10 @@ func (c *ClusterManager) reconcileDeprovisioningCluster(cluster *api.Cluster) er } func (c *ClusterManager) reconcileCleanupCluster(cluster api.Cluster) error { - glog.Infof("Removing Dataplane cluster %s fleetshard service account", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.Infof("Removing Dataplane cluster %s (%s) fleetshard service account", cluster.ClusterID, clusterName) - glog.Infof("Soft deleting the Dataplane cluster %s from the database", cluster.ClusterID) + glog.Infof("Soft deleting the Dataplane cluster %s (%s) from the database", cluster.ClusterID, clusterName) deleteError := c.ClusterService.DeleteByClusterID(cluster.ClusterID) if deleteError != nil { return errors.Wrapf(deleteError, "Failed to soft delete Dataplane cluster %s from the database", cluster.ClusterID) @@ -393,7 +400,8 @@ func (c *ClusterManager) reconcileCleanupCluster(cluster api.Cluster) error { func (c *ClusterManager) reconcileReadyCluster(cluster api.Cluster) error { if !c.DataplaneClusterConfig.IsReadyDataPlaneClustersReconcileEnabled() { - glog.Infof("Reconcile of dataplane ready clusters is disabled. Skipped reconcile of ready ClusterID '%s'", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.Infof("Reconcile of dataplane ready clusters is disabled. Skipped reconcile of ready ClusterID '%s' (%s)", cluster.ClusterID, clusterName) return nil } @@ -452,13 +460,14 @@ func (c *ClusterManager) reconcileClusterInstanceType(cluster api.Cluster) error // reconcileEmptyCluster checks wether a cluster is empty and mark it for deletion func (c *ClusterManager) reconcileEmptyCluster(cluster api.Cluster) (bool, error) { - glog.V(10).Infof("check if cluster is empty, ClusterID = %s", cluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID) + glog.V(10).Infof("check if cluster is empty, ClusterID = %s (%s)", cluster.ClusterID, clusterName) clusterFromDb, err := c.ClusterService.FindNonEmptyClusterByID(cluster.ClusterID) if err != nil { return false, err } if clusterFromDb != nil { - glog.V(10).Infof("cluster is not empty, ClusterID = %s", cluster.ClusterID) + glog.V(10).Infof("cluster is not empty, ClusterID = %s (%s)", cluster.ClusterID, clusterName) return false, nil } @@ -473,7 +482,7 @@ func (c *ClusterManager) reconcileEmptyCluster(cluster api.Cluster) (bool, error siblingClusterCount := clustersByRegionAndCloudProvider[0] if siblingClusterCount.Count <= 1 { // sibling cluster not found - glog.V(10).Infof("no valid sibling found for cluster ClusterID = %s", cluster.ClusterID) + glog.V(10).Infof("no valid sibling found for cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName) return false, nil } @@ -564,7 +573,8 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error { if err := c.ClusterService.RegisterClusterJob(&clusterRequest); err != nil { return []error{errors.Wrapf(err, "Failed to register new cluster %s with config file", p.ClusterID)} } - glog.Infof("Registered a new cluster with config file: %s ", p.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(p.ClusterID) + glog.Infof("Registered a new cluster with config file: %s (%s)", p.ClusterID, clusterName) } // Update existing clusters. @@ -589,7 +599,8 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error { continue } diff := cmp.Diff(*cluster, newCluster) - glog.Infof("Updating data-plane cluster %s. Changes in cluster configuration:\n", manualCluster.ClusterID) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(manualCluster.ClusterID) + glog.Infof("Updating data-plane cluster %s (%s). Changes in cluster configuration:\n", manualCluster.ClusterID, clusterName) for _, diffLine := range strings.Split(diff, "\n") { glog.Infoln(diffLine) } @@ -624,12 +635,14 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error { } var idsOfClustersToDeprovision []string - for _, c := range centralInstanceCount { - if c.Count > 0 { - glog.Infof("Excess cluster %s is not going to be deleted because it has %d centrals.", c.Clusterid, c.Count) + for _, centralCount := range centralInstanceCount { + if centralCount.Count > 0 { + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(centralCount.Clusterid) + glog.Infof("Excess cluster %s (%s) is not going to be deleted because it has %d centrals.", centralCount.Clusterid, clusterName, centralCount.Count) } else { - glog.Infof("Excess cluster is going to be deleted %s", c.Clusterid) - idsOfClustersToDeprovision = append(idsOfClustersToDeprovision, c.Clusterid) + clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(centralCount.Clusterid) + glog.Infof("Excess cluster is going to be deleted %s (%s)", centralCount.Clusterid, clusterName) + idsOfClustersToDeprovision = append(idsOfClustersToDeprovision, centralCount.Clusterid) } } diff --git a/internal/central/pkg/workers/clusters_mgr_test.go b/internal/central/pkg/workers/clusters_mgr_test.go index 2a8e984633..495772f1e7 100644 --- a/internal/central/pkg/workers/clusters_mgr_test.go +++ b/internal/central/pkg/workers/clusters_mgr_test.go @@ -28,9 +28,11 @@ func TestClusterManager_processReadyClusters_emptyConfig(t *testing.T) { } c := &ClusterManager{ ClusterManagerOptions: ClusterManagerOptions{ - ClusterService: clusterService, - GitOpsConfigProvider: provider, - DataplaneClusterConfig: &config.DataplaneClusterConfig{}, + ClusterService: clusterService, + GitOpsConfigProvider: provider, + DataplaneClusterConfig: &config.DataplaneClusterConfig{ + ClusterConfig: &config.ClusterConfig{}, + }, }, } errs := c.processReadyClusters()