Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions internal/central/pkg/services/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func (p *AddonProvisioner) Provision(cluster api.Cluster, dataplaneClusterConfig

for _, installedAddon := range installedAddons {
// addon is installed on the cluster but not present in gitops config - uninstall it
errs = append(errs, p.uninstallAddon(cluster.ClusterID, installedAddon.ID))
errs = append(errs, p.uninstallAddon(cluster.ClusterID, dataplaneClusterConfig.ClusterName, installedAddon.ID))
p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, cluster.ClusterID, metrics.AddonHealthy)
}

Expand Down Expand Up @@ -127,7 +127,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan
if addonErr != nil {
if addonErr.Is404() {
// addon does not exist, install it
errs = append(errs, p.installAddon(clusterID, expectedConfig))
errs = append(errs, p.installAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig))
status = metrics.AddonUpgrade
} else {
errs = append(errs, fmt.Errorf("failed to get addon %s: %w", expectedConfig.ID, addonErr))
Expand All @@ -140,7 +140,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan
return
}
if gitOpsConfigDifferent(expectedConfig, installedInOCM) {
errs = append(errs, p.updateAddon(clusterID, expectedConfig))
errs = append(errs, p.updateAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig))
status = metrics.AddonUpgrade
return
}
Expand All @@ -156,7 +156,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan
return
}
if clusterInstallationDifferent(installedOnCluster, versionInstalledInOCM) {
errs = append(errs, p.updateAddon(clusterID, expectedConfig))
errs = append(errs, p.updateAddon(clusterID, dataplaneClusterConfig.ClusterName, expectedConfig))
status = metrics.AddonUpgrade
} else {
glog.V(10).Infof("Addon %s is already up-to-date", installedOnCluster.ID)
Expand Down Expand Up @@ -234,15 +234,15 @@ func (p *AddonProvisioner) getInstalledAddons(cluster api.Cluster) (map[string]d
return result, nil
}

func (p *AddonProvisioner) installAddon(clusterID string, config gitops.AddonConfig) error {
func (p *AddonProvisioner) installAddon(clusterID string, clusterName string, config gitops.AddonConfig) error {
addonInstallation, err := p.newInstallation(config)
if err != nil {
return err
}
if err = p.ocmClient.CreateAddonInstallation(clusterID, addonInstallation); err != nil {
return fmt.Errorf("create addon %s in ocm: %w", config.ID, err)
}
glog.V(5).Infof("Addon %s has been installed on the cluster %s", config.ID, clusterID)
glog.V(5).Infof("Addon %s has been installed on the cluster %s (%s)", config.ID, clusterID, clusterName)
return nil
}

Expand All @@ -261,9 +261,9 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters
return installation, nil
}

func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error {
func (p *AddonProvisioner) updateAddon(clusterID string, clusterName string, config gitops.AddonConfig) error {
if p.backoffUpgradeRequest(config.ID, clusterID) {
glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID)
glog.V(5).Infof("update addon request backoff for cluster: %s (%s)", clusterID, clusterName)
return nil
}

Expand All @@ -275,7 +275,7 @@ func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConf
if err := p.ocmClient.UpdateAddonInstallation(clusterID, update); err != nil {
return fmt.Errorf("update addon %s: %w", update.ID(), err)
}
glog.V(5).Infof("Addon %s has been updated on the cluster %s", config.ID, clusterID)
glog.V(5).Infof("Addon %s has been updated on the cluster %s (%s)", config.ID, clusterID, clusterName)
return nil
}

Expand All @@ -284,11 +284,11 @@ func (p *AddonProvisioner) backoffUpgradeRequest(addonID string, clusterID strin
return p.lastStatusPerInstall[id] != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
}

func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error {
func (p *AddonProvisioner) uninstallAddon(clusterID string, clusterName string, addonID string) error {
if err := p.ocmClient.DeleteAddonInstallation(clusterID, addonID); err != nil {
return fmt.Errorf("uninstall addon %s: %w", addonID, err)
}
glog.V(5).Infof("Addon %s has been uninstalled from the cluster %s", addonID, clusterID)
glog.V(5).Infof("Addon %s has been uninstalled from the cluster %s (%s)", addonID, clusterID, clusterName)
return nil
}

Expand Down
19 changes: 13 additions & 6 deletions internal/central/pkg/services/clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/api/dbapi"
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/clusters"
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/clusters/types"
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/config"

"github.com/golang/glog"
"github.com/stackrox/acs-fleet-manager/pkg/metrics"
Expand Down Expand Up @@ -63,15 +64,17 @@ type ClusterService interface {
}

type clusterService struct {
connectionFactory *db.ConnectionFactory
providerFactory clusters.ProviderFactory
connectionFactory *db.ConnectionFactory
providerFactory clusters.ProviderFactory
dataplaneClusterConfig *config.DataplaneClusterConfig
}

// NewClusterService creates a new client for the OSD Cluster Service
func NewClusterService(connectionFactory *db.ConnectionFactory, providerFactory clusters.ProviderFactory) ClusterService {
func NewClusterService(connectionFactory *db.ConnectionFactory, providerFactory clusters.ProviderFactory, dataplaneClusterConfig *config.DataplaneClusterConfig) ClusterService {
return &clusterService{
connectionFactory: connectionFactory,
providerFactory: providerFactory,
connectionFactory: connectionFactory,
providerFactory: providerFactory,
dataplaneClusterConfig: dataplaneClusterConfig,
}
}

Expand Down Expand Up @@ -331,7 +334,11 @@ func (c clusterService) DeleteByClusterID(clusterID string) *apiErrors.ServiceEr
return apiErrors.NewWithCause(apiErrors.ErrorGeneral, err, "Unable to delete cluster with cluster_id %s", clusterID)
}

glog.Infof("Cluster %s deleted successful", clusterID)
clusterName := ""
if c.dataplaneClusterConfig != nil {
clusterName = c.dataplaneClusterConfig.FindClusterNameByClusterID(clusterID)
}
glog.Infof("Cluster %s (%s) deleted successful", clusterID, clusterName)
metrics.IncreaseClusterSuccessOperationsCountMetric(constants.ClusterOperationDelete)
return nil
}
Expand Down
22 changes: 15 additions & 7 deletions internal/central/pkg/services/data_plane_central.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/pkg/errors"
"github.com/stackrox/acs-fleet-manager/internal/central/constants"
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/api/dbapi"
"github.com/stackrox/acs-fleet-manager/internal/central/pkg/config"
"github.com/stackrox/acs-fleet-manager/pkg/api"
"github.com/stackrox/acs-fleet-manager/pkg/db"
serviceError "github.com/stackrox/acs-fleet-manager/pkg/errors"
Expand All @@ -35,21 +36,24 @@ type DataPlaneCentralService interface {
}

type dataPlaneCentralService struct {
centralService CentralService
clusterService ClusterService
connectionFactory *db.ConnectionFactory
centralService CentralService
clusterService ClusterService
connectionFactory *db.ConnectionFactory
dataplaneClusterConfig *config.DataplaneClusterConfig
}

// NewDataPlaneCentralService ...
func NewDataPlaneCentralService(
centralSrv CentralService,
clusterSrv ClusterService,
connectionFactory *db.ConnectionFactory,
dataplaneClusterConfig *config.DataplaneClusterConfig,
) DataPlaneCentralService {
return &dataPlaneCentralService{
centralService: centralSrv,
clusterService: clusterSrv,
connectionFactory: connectionFactory,
centralService: centralSrv,
clusterService: clusterSrv,
connectionFactory: connectionFactory,
dataplaneClusterConfig: dataplaneClusterConfig,
}
}

Expand Down Expand Up @@ -167,7 +171,11 @@ func (s *dataPlaneCentralService) setCentralClusterFailed(centralRequest *dbapi.
metrics.UpdateCentralRequestsStatusSinceCreatedMetric(constants.CentralRequestStatusFailed, centralRequest.ID, centralRequest.ClusterID, time.Since(centralRequest.CreatedAt))
metrics.IncreaseCentralTotalOperationsCountMetric(constants.CentralOperationCreate)
}
logger.Logger.Errorf("Central status for Central ID '%s' in ClusterID '%s' reported as failed by Fleet Shard Operator: '%s'", centralRequest.ID, centralRequest.ClusterID, errMessage)
clusterName := ""
if s.dataplaneClusterConfig != nil {
clusterName = s.dataplaneClusterConfig.FindClusterNameByClusterID(centralRequest.ClusterID)
}
logger.Logger.Errorf("Central status for Central ID '%s' in ClusterID '%s' (%s) reported as failed by Fleet Shard Operator: '%s'", centralRequest.ID, centralRequest.ClusterID, clusterName, errMessage)

return nil
}
Expand Down
51 changes: 32 additions & 19 deletions internal/central/pkg/workers/clusters_mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ func (c *ClusterManager) processDeprovisioningClusters() []error {

for i := range deprovisioningClusters {
cluster := deprovisioningClusters[i]
glog.V(10).Infof("deprovision cluster ClusterID = %s", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.V(10).Infof("deprovision cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName)
metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterDeprovisioning)
if err := c.reconcileDeprovisioningCluster(&cluster); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to reconcile deprovisioning cluster %s", cluster.ID))
Expand All @@ -211,7 +212,8 @@ func (c *ClusterManager) processCleanupClusters() []error {
}

for _, cluster := range cleanupClusters {
glog.V(10).Infof("cleanup cluster ClusterID = %s", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.V(10).Infof("cleanup cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName)
metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterCleanup)
if err := c.reconcileCleanupCluster(cluster); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to reconcile cleanup cluster %s", cluster.ID))
Expand All @@ -234,7 +236,8 @@ func (c *ClusterManager) processAcceptedClusters() []error {

for i := range acceptedClusters {
cluster := acceptedClusters[i]
glog.V(10).Infof("accepted cluster ClusterID = %s", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.V(10).Infof("accepted cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName)
metrics.UpdateClusterStatusSinceCreatedMetric(cluster, api.ClusterAccepted)
if err := c.reconcileAcceptedCluster(&cluster); err != nil {
errs = append(errs, errors.Wrapf(err, "failed to reconcile accepted cluster %s", cluster.ID))
Expand All @@ -258,7 +261,8 @@ func (c *ClusterManager) processProvisioningClusters() []error {
// process each local pending cluster and compare to the underlying ocm cluster
for i := range provisioningClusters {
provisioningCluster := provisioningClusters[i]
glog.V(10).Infof("provisioning cluster ClusterID = %s", provisioningCluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(provisioningCluster.ClusterID)
glog.V(10).Infof("provisioning cluster ClusterID = %s (%s)", provisioningCluster.ClusterID, clusterName)
metrics.UpdateClusterStatusSinceCreatedMetric(provisioningCluster, api.ClusterProvisioning)
_, err := c.reconcileClusterStatus(&provisioningCluster)
if err != nil {
Expand All @@ -285,7 +289,8 @@ func (c *ClusterManager) processProvisionedClusters() []error {

// process each local provisioned cluster and apply necessary terraforming
for _, provisionedCluster := range provisionedClusters {
glog.V(10).Infof("provisioned cluster ClusterID = %s", provisionedCluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(provisionedCluster.ClusterID)
glog.V(10).Infof("provisioned cluster ClusterID = %s (%s)", provisionedCluster.ClusterID, clusterName)
metrics.UpdateClusterStatusSinceCreatedMetric(provisionedCluster, api.ClusterProvisioned)
err := c.reconcileProvisionedCluster(provisionedCluster)
if err != nil {
Expand Down Expand Up @@ -371,7 +376,8 @@ func (c *ClusterManager) reconcileDeprovisioningCluster(cluster *api.Cluster) er
}

// cluster has been removed from cluster service. Mark it for cleanup
glog.Infof("Cluster %s has been removed from cluster service.", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.Infof("Cluster %s (%s) has been removed from cluster service.", cluster.ClusterID, clusterName)
updateStatusErr := c.ClusterService.UpdateStatus(*cluster, api.ClusterCleanup)
if updateStatusErr != nil {
return errors.Wrapf(updateStatusErr, "Failed to update deprovisioning cluster %s status to 'cleanup'", cluster.ClusterID)
Expand All @@ -381,9 +387,10 @@ func (c *ClusterManager) reconcileDeprovisioningCluster(cluster *api.Cluster) er
}

func (c *ClusterManager) reconcileCleanupCluster(cluster api.Cluster) error {
glog.Infof("Removing Dataplane cluster %s fleetshard service account", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.Infof("Removing Dataplane cluster %s (%s) fleetshard service account", cluster.ClusterID, clusterName)

glog.Infof("Soft deleting the Dataplane cluster %s from the database", cluster.ClusterID)
glog.Infof("Soft deleting the Dataplane cluster %s (%s) from the database", cluster.ClusterID, clusterName)
deleteError := c.ClusterService.DeleteByClusterID(cluster.ClusterID)
if deleteError != nil {
return errors.Wrapf(deleteError, "Failed to soft delete Dataplane cluster %s from the database", cluster.ClusterID)
Expand All @@ -393,7 +400,8 @@ func (c *ClusterManager) reconcileCleanupCluster(cluster api.Cluster) error {

func (c *ClusterManager) reconcileReadyCluster(cluster api.Cluster) error {
if !c.DataplaneClusterConfig.IsReadyDataPlaneClustersReconcileEnabled() {
glog.Infof("Reconcile of dataplane ready clusters is disabled. Skipped reconcile of ready ClusterID '%s'", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.Infof("Reconcile of dataplane ready clusters is disabled. Skipped reconcile of ready ClusterID '%s' (%s)", cluster.ClusterID, clusterName)
return nil
}

Expand Down Expand Up @@ -452,13 +460,14 @@ func (c *ClusterManager) reconcileClusterInstanceType(cluster api.Cluster) error

// reconcileEmptyCluster checks wether a cluster is empty and mark it for deletion
func (c *ClusterManager) reconcileEmptyCluster(cluster api.Cluster) (bool, error) {
glog.V(10).Infof("check if cluster is empty, ClusterID = %s", cluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(cluster.ClusterID)
glog.V(10).Infof("check if cluster is empty, ClusterID = %s (%s)", cluster.ClusterID, clusterName)
clusterFromDb, err := c.ClusterService.FindNonEmptyClusterByID(cluster.ClusterID)
if err != nil {
return false, err
}
if clusterFromDb != nil {
glog.V(10).Infof("cluster is not empty, ClusterID = %s", cluster.ClusterID)
glog.V(10).Infof("cluster is not empty, ClusterID = %s (%s)", cluster.ClusterID, clusterName)
return false, nil
}

Expand All @@ -473,7 +482,7 @@ func (c *ClusterManager) reconcileEmptyCluster(cluster api.Cluster) (bool, error

siblingClusterCount := clustersByRegionAndCloudProvider[0]
if siblingClusterCount.Count <= 1 { // sibling cluster not found
glog.V(10).Infof("no valid sibling found for cluster ClusterID = %s", cluster.ClusterID)
glog.V(10).Infof("no valid sibling found for cluster ClusterID = %s (%s)", cluster.ClusterID, clusterName)
return false, nil
}

Expand Down Expand Up @@ -564,7 +573,8 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error {
if err := c.ClusterService.RegisterClusterJob(&clusterRequest); err != nil {
return []error{errors.Wrapf(err, "Failed to register new cluster %s with config file", p.ClusterID)}
}
glog.Infof("Registered a new cluster with config file: %s ", p.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(p.ClusterID)
glog.Infof("Registered a new cluster with config file: %s (%s)", p.ClusterID, clusterName)
}

// Update existing clusters.
Expand All @@ -589,7 +599,8 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error {
continue
}
diff := cmp.Diff(*cluster, newCluster)
glog.Infof("Updating data-plane cluster %s. Changes in cluster configuration:\n", manualCluster.ClusterID)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(manualCluster.ClusterID)
glog.Infof("Updating data-plane cluster %s (%s). Changes in cluster configuration:\n", manualCluster.ClusterID, clusterName)
for _, diffLine := range strings.Split(diff, "\n") {
glog.Infoln(diffLine)
}
Expand Down Expand Up @@ -624,12 +635,14 @@ func (c *ClusterManager) reconcileClusterWithManualConfig() []error {
}

var idsOfClustersToDeprovision []string
for _, c := range centralInstanceCount {
if c.Count > 0 {
glog.Infof("Excess cluster %s is not going to be deleted because it has %d centrals.", c.Clusterid, c.Count)
for _, centralCount := range centralInstanceCount {
if centralCount.Count > 0 {
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(centralCount.Clusterid)
glog.Infof("Excess cluster %s (%s) is not going to be deleted because it has %d centrals.", centralCount.Clusterid, clusterName, centralCount.Count)
} else {
glog.Infof("Excess cluster is going to be deleted %s", c.Clusterid)
idsOfClustersToDeprovision = append(idsOfClustersToDeprovision, c.Clusterid)
clusterName := c.DataplaneClusterConfig.FindClusterNameByClusterID(centralCount.Clusterid)
glog.Infof("Excess cluster is going to be deleted %s (%s)", centralCount.Clusterid, clusterName)
idsOfClustersToDeprovision = append(idsOfClustersToDeprovision, centralCount.Clusterid)
}
}

Expand Down
8 changes: 5 additions & 3 deletions internal/central/pkg/workers/clusters_mgr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ func TestClusterManager_processReadyClusters_emptyConfig(t *testing.T) {
}
c := &ClusterManager{
ClusterManagerOptions: ClusterManagerOptions{
ClusterService: clusterService,
GitOpsConfigProvider: provider,
DataplaneClusterConfig: &config.DataplaneClusterConfig{},
ClusterService: clusterService,
GitOpsConfigProvider: provider,
DataplaneClusterConfig: &config.DataplaneClusterConfig{
ClusterConfig: &config.ClusterConfig{},
},
},
}
errs := c.processReadyClusters()
Expand Down
Loading