From 417206e946cc2245dc4c8260d2b953f0b54e2dc7 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Mon, 12 Jun 2023 23:37:58 +0800 Subject: [PATCH 01/15] :Added initial parts of scheduler framework --- pkg/scheduler/framework/framework.go | 148 +++++++++++++++++++++++++++ pkg/scheduler/framework/profile.go | 5 + 2 files changed, 153 insertions(+) create mode 100644 pkg/scheduler/framework/framework.go diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go new file mode 100644 index 000000000..3d9b5899b --- /dev/null +++ b/pkg/scheduler/framework/framework.go @@ -0,0 +1,148 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +// package framework features the scheduler framework, which the scheduler runs to schedule +// a placement to most appropriate clusters. +package framework + +import ( + "context" + "fmt" + + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + fleetv1 "go.goms.io/fleet/apis/v1" + "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" +) + +const ( + eventRecorderNameTemplate = "scheduler-framework-%s" +) + +// Handle is an interface which allows plugins to set themselves up with the scheduler framework. +type Handle interface { + // Client returns a cached client. + Client() client.Client + // Manager returns a controller manager; this is mostly used for setting up a new informer + // (indirectly) via a reconciler. + Manager() ctrl.Manager + // APIReader returns an uncached read-only client, which allows direct (uncached) access to the API server. + APIReader() client.Reader + // EventRecorder returns an event recorder. + EventRecorder() record.EventRecorder +} + +// Framework is an interface which scheduler framework should implement. +type Framework interface { + Handle + + // RunSchedulerCycleFor performs scheduling for a policy snapshot. + RunSchedulingCycleFor(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, resources *fleetv1.ClusterResourceSnapshot) (result ctrl.Result, err error) +} + +// framework implements the Framework interface. +type framework struct { + // profile is the scheduling profile in use by the scheduler framework; it includes + // the plugins to run at each extension point. + profile *Profile + + // client is the (cached) client in use by the scheduler framework for accessing Kubernetes API server. + client client.Client + // apiReader is the uncached read-only client in use by the scheduler framework for accessing + // Kubernetes API server; in most cases client should be used instead, unless consistency becomes + // a serious concern. + // TO-DO (chenyu1): explore the possbilities of using a mutation cache for better performance. + apiReader client.Reader + // manager is the controller manager in use by the scheduler framework. + manager ctrl.Manager + // eventRecorder is the event recorder in use by the scheduler framework. + eventRecorder record.EventRecorder + + // parallelizer is a utility which helps run tasks in parallel. + parallelizer *parallelizer.Parallerlizer +} + +var ( + // Verify that framework implements Framework (and consequently, Handle). + _ Framework = &framework{} +) + +// frameworkOptions is the options for a scheduler framework. +type frameworkOptions struct { + // numOfWorkers is the number of workers the scheduler framework will use to parallelize tasks, + // e.g., calling plugins. + numOfWorkers int +} + +// Option is the function for configuring a scheduler framework. +type Option func(*frameworkOptions) + +// defaultFrameworkOptions is the default options for a scheduler framework. +var defaultFrameworkOptions = frameworkOptions{numOfWorkers: parallelizer.DefaultNumOfWorkers} + +// WithNumOfWorkers sets the number of workers to use for a scheduler framework. +func WithNumOfWorkers(numOfWorkers int) Option { + return func(fo *frameworkOptions) { + fo.numOfWorkers = numOfWorkers + } +} + +// NewFramework returns a new scheduler framework. +func NewFramework(profile *Profile, manager ctrl.Manager, opts ...Option) Framework { + options := defaultFrameworkOptions + for _, opt := range opts { + opt(&options) + } + + // In principle, the scheduler needs to set up informers for resources it is interested in, + // primarily clusters, snapshots, and bindings. In our current architecture, however, + // some (if not all) of the informers may have already been set up by other controllers + // sharing the same controller manager, e.g. cluster watcher. Therefore, here no additional + // informers are explicitly set up. + // + // Note that setting up an informer is achieved by setting up an no-op (at this moment) + // reconciler, as it does not seem to possible to directly manipulate the informers (cache) in + // use by a controller runtime manager via public API. In the long run, the reconciles might + // be useful for setting up some common states for the scheduler, e.g., a resource model. + // + // Also note that an indexer might need to be set up for improved performance. + + return &framework{ + profile: profile, + client: manager.GetClient(), + apiReader: manager.GetAPIReader(), + manager: manager, + eventRecorder: manager.GetEventRecorderFor(fmt.Sprintf(eventRecorderNameTemplate, profile.Name())), + parallelizer: parallelizer.NewParallelizer(options.numOfWorkers), + } +} + +// Client returns the (cached) client in use by the scheduler framework. +func (f *framework) Client() client.Client { + return f.client +} + +// Manager returns the controller manager in use by the scheduler framework. +func (f *framework) Manager() ctrl.Manager { + return f.manager +} + +// APIReader returns the (uncached) read-only client in use by the scheduler framework. +func (f *framework) APIReader() client.Reader { + return f.apiReader +} + +// EventRecorder returns the event recorder in use by the scheduler framework. +func (f *framework) EventRecorder() record.EventRecorder { + return f.eventRecorder +} + +// RunSchedulingCycleFor performs scheduling for a policy snapshot. +func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, resources *fleetv1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive + // Not yet implemented. + return ctrl.Result{}, nil +} diff --git a/pkg/scheduler/framework/profile.go b/pkg/scheduler/framework/profile.go index 6283e32df..38ff63a94 100644 --- a/pkg/scheduler/framework/profile.go +++ b/pkg/scheduler/framework/profile.go @@ -61,6 +61,11 @@ func (profile *Profile) WithScorePlugin(plugin ScorePlugin) *Profile { return profile } +// Name returns the name of the profile. +func (profile *Profile) Name() string { + return profile.name +} + // NewProfile creates scheduling profile. func NewProfile(name string) *Profile { return &Profile{ From 7ccf01c51b8dd22c056ffdcce85f3325020c5d95 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Mon, 12 Jun 2023 23:46:54 +0800 Subject: [PATCH 02/15] Minor fixes --- pkg/scheduler/framework/framework.go | 3 ++- pkg/scheduler/framework/interface.go | 5 ++++- pkg/scheduler/framework/profile_test.go | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 3d9b5899b..4b48ee89a 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -3,7 +3,7 @@ Copyright (c) Microsoft Corporation. Licensed under the MIT license. */ -// package framework features the scheduler framework, which the scheduler runs to schedule +// Package framework features the scheduler framework, which the scheduler runs to schedule // a placement to most appropriate clusters. package framework @@ -20,6 +20,7 @@ import ( ) const ( + // eventRecorderNameTemplate is the template used to format event recorder name for a scheduler framework. eventRecorderNameTemplate = "scheduler-framework-%s" ) diff --git a/pkg/scheduler/framework/interface.go b/pkg/scheduler/framework/interface.go index 882906a0c..12173f463 100644 --- a/pkg/scheduler/framework/interface.go +++ b/pkg/scheduler/framework/interface.go @@ -14,7 +14,10 @@ import ( // Plugin is the interface which all scheduler plugins should implement. type Plugin interface { Name() string - // TO-DO (chenyu1): add a method to help plugin set up the framework as needed. + + // SetUpWithFramework helps a plugin to set it up with a scheduler framework, such as + // spinning up an informer. + SetUpWithFramework(handle Handle) } // PostBatchPlugin is the interface which all plugins that would like to run at the PostBatch diff --git a/pkg/scheduler/framework/profile_test.go b/pkg/scheduler/framework/profile_test.go index 2e39c69ce..55a1b24bb 100644 --- a/pkg/scheduler/framework/profile_test.go +++ b/pkg/scheduler/framework/profile_test.go @@ -51,6 +51,9 @@ func (p *DummyAllPurposePlugin) Score(ctx context.Context, state CycleStatePlugi return &ClusterScore{}, nil } +// SetUpWithFramework is a no-op to satisfy the Plugin interface. +func (p *DummyAllPurposePlugin) SetUpWithFramework(handle Handle) {} // nolint:revive + // TestProfile tests the basic ops of a Profile. func TestProfile(t *testing.T) { profile := NewProfile(dummyProfileName) From 07fd71fe744742c11b15611f40e31ccb2ce92954 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 21:40:15 +0800 Subject: [PATCH 03/15] Minor fixes --- pkg/scheduler/framework/framework.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 4b48ee89a..bed3b433f 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -24,7 +24,8 @@ const ( eventRecorderNameTemplate = "scheduler-framework-%s" ) -// Handle is an interface which allows plugins to set themselves up with the scheduler framework. +// Handle is an interface which allows plugins to access some shared structs (e.g., client, manager) +// and set themselves up with the scheduler framework (e.g., sign up for an informer). type Handle interface { // Client returns a cached client. Client() client.Client From 15a0f88abde7b93dde8c9bcb146819598a7631c6 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 15 Jun 2023 14:09:58 +0800 Subject: [PATCH 04/15] Rebased --- pkg/scheduler/framework/framework.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index bed3b433f..66c81010e 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -15,7 +15,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - fleetv1 "go.goms.io/fleet/apis/v1" + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" ) @@ -43,7 +43,7 @@ type Framework interface { Handle // RunSchedulerCycleFor performs scheduling for a policy snapshot. - RunSchedulingCycleFor(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, resources *fleetv1.ClusterResourceSnapshot) (result ctrl.Result, err error) + RunSchedulingCycleFor(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, resources *fleetv1beta1.ClusterResourceSnapshot) (result ctrl.Result, err error) } // framework implements the Framework interface. @@ -144,7 +144,7 @@ func (f *framework) EventRecorder() record.EventRecorder { } // RunSchedulingCycleFor performs scheduling for a policy snapshot. -func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, resources *fleetv1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive +func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, resources *fleetv1beta1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive // Not yet implemented. return ctrl.Result{}, nil } From 26ca9c701d4bcec5aa7aca579ce67dfe1756c878 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 15 Jun 2023 14:12:42 +0800 Subject: [PATCH 05/15] Minor fixes --- .../v1beta1/zz_generated.deepcopy.go | 2 +- pkg/scheduler/framework/framework.go | 24 +++++++++---------- pkg/scheduler/framework/profile_test.go | 1 + 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/apis/placement/v1beta1/zz_generated.deepcopy.go b/apis/placement/v1beta1/zz_generated.deepcopy.go index eb12e1242..d78c05e29 100644 --- a/apis/placement/v1beta1/zz_generated.deepcopy.go +++ b/apis/placement/v1beta1/zz_generated.deepcopy.go @@ -12,7 +12,7 @@ package v1beta1 import ( corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 66c81010e..19bb5e5e6 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -32,8 +32,8 @@ type Handle interface { // Manager returns a controller manager; this is mostly used for setting up a new informer // (indirectly) via a reconciler. Manager() ctrl.Manager - // APIReader returns an uncached read-only client, which allows direct (uncached) access to the API server. - APIReader() client.Reader + // UncachedReader returns an uncached read-only client, which allows direct (uncached) access to the API server. + UncachedReader() client.Reader // EventRecorder returns an event recorder. EventRecorder() record.EventRecorder } @@ -54,11 +54,11 @@ type framework struct { // client is the (cached) client in use by the scheduler framework for accessing Kubernetes API server. client client.Client - // apiReader is the uncached read-only client in use by the scheduler framework for accessing + // uncachedReader is the uncached read-only client in use by the scheduler framework for accessing // Kubernetes API server; in most cases client should be used instead, unless consistency becomes // a serious concern. // TO-DO (chenyu1): explore the possbilities of using a mutation cache for better performance. - apiReader client.Reader + uncachedReader client.Reader // manager is the controller manager in use by the scheduler framework. manager ctrl.Manager // eventRecorder is the event recorder in use by the scheduler framework. @@ -114,12 +114,12 @@ func NewFramework(profile *Profile, manager ctrl.Manager, opts ...Option) Framew // Also note that an indexer might need to be set up for improved performance. return &framework{ - profile: profile, - client: manager.GetClient(), - apiReader: manager.GetAPIReader(), - manager: manager, - eventRecorder: manager.GetEventRecorderFor(fmt.Sprintf(eventRecorderNameTemplate, profile.Name())), - parallelizer: parallelizer.NewParallelizer(options.numOfWorkers), + profile: profile, + client: manager.GetClient(), + uncachedReader: manager.GetAPIReader(), + manager: manager, + eventRecorder: manager.GetEventRecorderFor(fmt.Sprintf(eventRecorderNameTemplate, profile.Name())), + parallelizer: parallelizer.NewParallelizer(options.numOfWorkers), } } @@ -134,8 +134,8 @@ func (f *framework) Manager() ctrl.Manager { } // APIReader returns the (uncached) read-only client in use by the scheduler framework. -func (f *framework) APIReader() client.Reader { - return f.apiReader +func (f *framework) UncachedReader() client.Reader { + return f.uncachedReader } // EventRecorder returns the event recorder in use by the scheduler framework. diff --git a/pkg/scheduler/framework/profile_test.go b/pkg/scheduler/framework/profile_test.go index 55a1b24bb..3d82b681e 100644 --- a/pkg/scheduler/framework/profile_test.go +++ b/pkg/scheduler/framework/profile_test.go @@ -10,6 +10,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) From 825f9b47b2c58d6d853c0212c44ec04accf730c9 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 19:02:49 +0800 Subject: [PATCH 06/15] Added more scheduler framework logic --- pkg/scheduler/framework/framework.go | 102 +++++++- pkg/scheduler/framework/framework_test.go | 286 ++++++++++++++++++++++ pkg/scheduler/framework/utils.go | 56 +++++ pkg/utils/common.go | 15 ++ 4 files changed, 458 insertions(+), 1 deletion(-) create mode 100644 pkg/scheduler/framework/framework_test.go create mode 100644 pkg/scheduler/framework/utils.go diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 19bb5e5e6..b7b189d84 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -11,12 +11,15 @@ import ( "context" "fmt" + "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" + "go.goms.io/fleet/pkg/utils" ) const ( @@ -145,6 +148,103 @@ func (f *framework) EventRecorder() record.EventRecorder { // RunSchedulingCycleFor performs scheduling for a policy snapshot. func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, resources *fleetv1beta1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive - // Not yet implemented. + errorFormat := "failed to run scheduling cycle for policy %s: %w" + + // Collect all clusters. + // + // Note that clusters here are listed from the cached client for improved performance. This is + // safe in consistency as it is guaranteed that the scheduler will receive all events for cluster + // changes eventually. + // + // TO-DO (chenyu1): assign variable(s) when more logic is added. + _, err = f.collectClusters(ctx) + if err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + + // Collect all bindings. + // + // Note that for consistency reasons, bindings are listed directly from the API server; this helps + // avoid a classic read-after-write consistency issue, which, though should only happen when there + // are connectivity issues and/or API server is overloaded, can lead to over-scheduling in adverse + // scenarios. It is true that even when bindings are over-scheduled, the scheduler can still correct + // the situation in the next cycle; however, considering that placing resources to clusters, unlike + // pods to nodes, is more expensive, it is better to avoid over-scheduling in the first place. + // + // This, of course, has additional performance overhead (and may further exacerbate API server + // overloading). In the long run we might still want to resort to a cached situtation. + // + // TO-DO (chenyu1): explore the possbilities of using a mutation cache for better performance. + bindings, err := f.collectBindings(ctx, policy) + if err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + + // Parse the bindings, find out + // * active bindings, i.e., bindings that are not marked for deletion; and + // * bindings that are already marked for deletion, but still have the dispatcher finalizer + // present; + // * bindings that are already marked for deletion, and no longer have the dispatcher finalizer + // + // Note that the scheduler only considers a binding to be deleted if it is marked for deletion + // and it no longer has the dispatcher finalizer. This helps avoid a rare racing condition + // where the scheduler binds a cluster to a resource placement, even though the dispatcher has + // not started, or is still in the process of, removing the same set of resources from + // the cluster, triggered by a recently deleted binding. + // + // TO-DO (chenyu1): assign variable(s) when more logic is added. + _, _, deletedWithoutDispatcherFinalizer := classifyBindings(bindings) + + // If a binding has been marked for deletion and no longer has the dispatcher finalizer, the scheduler + // removes its own finalizer from it, to clear it for eventual deletion. + if err := f.removeSchedulerFinalizerFromBindings(ctx, deletedWithoutDispatcherFinalizer); err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + + // Not yet fully implemented. return ctrl.Result{}, nil } + +// collectClusters lists all clusters in the cache. +func (f *framework) collectClusters(ctx context.Context) ([]fleetv1beta1.MemberCluster, error) { + errorFormat := "failed to collect clusters: %w" + + clusterList := &fleetv1beta1.MemberClusterList{} + if err := f.client.List(ctx, clusterList, &client.ListOptions{}); err != nil { + return nil, fmt.Errorf(errorFormat, err) + } + return clusterList.Items, nil +} + +// collectBindings lists all bindings associated with a CRP **using the uncached client**. +func (f *framework) collectBindings(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot) ([]fleetv1beta1.ClusterResourceBinding, error) { + errorFormat := "failed to collect bindings: %w" + + bindingOwner, err := extractOwnerCRPNameFromPolicySnapshot(policy) + if err != nil { + // This branch should never run in most cases, as the a policy snapshot is expected to be + // owned by a CRP. + return nil, fmt.Errorf(errorFormat, err) + } + + bindingList := &fleetv1beta1.ClusterResourceBindingList{} + labelSelector := labels.SelectorFromSet(labels.Set{fleetv1beta1.CRPTrackingLabel: bindingOwner}) + // List bindings directly from the API server. + if err := f.uncachedReader.List(ctx, bindingList, &client.ListOptions{LabelSelector: labelSelector}); err != nil { + return nil, fmt.Errorf(errorFormat, err) + } + return bindingList.Items, nil +} + +// removeSchedulerFinalizerFromBindings removes the scheduler finalizer from a list of bindings. +func (f *framework) removeSchedulerFinalizerFromBindings(ctx context.Context, bindings []*fleetv1beta1.ClusterResourceBinding) error { + errorFormat := "failed to remove scheduler finalizer from binding %s: %w" + + for _, binding := range bindings { + controllerutil.RemoveFinalizer(binding, utils.SchedulerFinalizer) + if err := f.client.Update(ctx, binding, &client.UpdateOptions{}); err != nil { + return fmt.Errorf(errorFormat, binding.Name, err) + } + } + return nil +} diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go new file mode 100644 index 000000000..7281da170 --- /dev/null +++ b/pkg/scheduler/framework/framework_test.go @@ -0,0 +1,286 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package framework + +import ( + "context" + "log" + "os" + "testing" + + "github.com/google/go-cmp/cmp" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + fleetv1 "go.goms.io/fleet/apis/v1" + "go.goms.io/fleet/pkg/utils" +) + +const ( + CRPName = "test-placement" + policyName = "test-policy" + bindingName = "test-binding" +) + +// TO-DO (chenyu1): expand the test cases as development stablizes. + +// TestMain sets up the test environment. +func TestMain(m *testing.M) { + // Add custom APIs to the runtime scheme. + if err := fleetv1.AddToScheme(scheme.Scheme); err != nil { + log.Fatalf("failed to add custom APIs to the runtime scheme: %v", err) + } + + os.Exit(m.Run()) +} + +// TestCollectClusters tests the collectClusters method. +func TestCollectClusters(t *testing.T) { + cluster := fleetv1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-1", + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme.Scheme). + WithObjects(&cluster). + Build() + // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. + f := &framework{ + client: fakeClient, + } + + ctx := context.Background() + clusters, err := f.collectClusters(ctx) + if err != nil { + t.Fatalf("collectClusters() = %v, %v, want no error", clusters, err) + } + + wantClusters := []fleetv1.MemberCluster{cluster} + if !cmp.Equal(clusters, wantClusters) { + t.Fatalf("collectClusters() = %v, %v, want %v, nil", clusters, err, wantClusters) + } +} + +// TestExtractOwnerCRPNameFromPolicySnapshot tests the extractOwnerCRPNameFromPolicySnapshot method. +func TestExtractOwnerCRPNameFromPolicySnapshot(t *testing.T) { + testCases := []struct { + name string + policy *fleetv1.ClusterPolicySnapshot + expectToFail bool + }{ + { + name: "policy with CRP owner reference", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + OwnerReferences: []metav1.OwnerReference{ + { + Kind: utils.CRPV1GVK.Kind, + Name: CRPName, + }, + }, + }, + }, + }, + { + name: "policy without CRP owner reference", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + OwnerReferences: []metav1.OwnerReference{}, + }, + }, + expectToFail: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + owner, err := extractOwnerCRPNameFromPolicySnapshot(tc.policy) + if tc.expectToFail { + if err == nil { + t.Fatalf("extractOwnerCRPNameFromPolicySnapshot() = %v, %v, want cannot find owner ref error", owner, err) + } + return + } + + if err != nil || owner != CRPName { + t.Fatalf("extractOwnerCRPNameFromPolicySnapshot() = %v, %v, want %v, no error", owner, err, CRPName) + } + }) + } +} + +// TestCollectBindings tests the collectBindings method. +func TestCollectBindings(t *testing.T) { + binding := &fleetv1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + Labels: map[string]string{ + fleetv1.CRPTrackingLabel: CRPName, + }, + }, + } + altCRPName := "another-test-placement" + + testCases := []struct { + name string + binding *fleetv1.ClusterResourceBinding + policy *fleetv1.ClusterPolicySnapshot + expectToFail bool + expectToFindNoBindings bool + }{ + { + name: "found matching bindings", + binding: binding, + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + OwnerReferences: []metav1.OwnerReference{ + { + Kind: utils.CRPV1GVK.Kind, + Name: CRPName, + }, + }, + }, + }, + }, + { + name: "no owner reference in policy", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + }, + expectToFail: true, + }, + { + name: "no matching bindings", + binding: binding, + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + OwnerReferences: []metav1.OwnerReference{ + { + Kind: utils.CRPV1GVK.Kind, + Name: altCRPName, + }, + }, + }, + }, + expectToFindNoBindings: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fakeClientBuilder := fake.NewClientBuilder().WithScheme(scheme.Scheme) + if tc.binding != nil { + fakeClientBuilder.WithObjects(tc.binding) + } + fakeClient := fakeClientBuilder.Build() + // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. + f := &framework{ + apiReader: fakeClient, + } + + ctx := context.Background() + bindings, err := f.collectBindings(ctx, tc.policy) + if tc.expectToFail { + if err == nil { + t.Fatalf("collectBindings() = %v, %v, want failed to collect bindings error", bindings, err) + } + return + } + + if err != nil { + t.Fatalf("collectBindings() = %v, %v, want %v, no error", bindings, err, binding) + } + wantBindings := []fleetv1.ClusterResourceBinding{} + if !tc.expectToFindNoBindings { + wantBindings = append(wantBindings, *binding) + } + if !cmp.Equal(bindings, wantBindings) { + t.Fatalf("collectBindings() = %v, %v, want %v, no error", bindings, err, wantBindings) + } + }) + } +} + +// TestClassifyBindings tests the classifyBindings function. +func TestClassifyBindings(t *testing.T) { + timestamp := metav1.Now() + activeBinding := &fleetv1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "active-binding", + }, + } + deletedBindingWithDispatcherFinalizer := &fleetv1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleted-binding-with-dispatcher-finalizer", + DeletionTimestamp: ×tamp, + Finalizers: []string{ + utils.DispatcherFinalizer, + }, + }, + } + deletedBindingWithoutDispatcherFinalizer := &fleetv1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "deleted-binding-without-dispatcher-finalizer", + DeletionTimestamp: ×tamp, + }, + } + + active, deletedWith, deletedWithout := classifyBindings([]fleetv1.ClusterResourceBinding{*activeBinding, *deletedBindingWithDispatcherFinalizer, *deletedBindingWithoutDispatcherFinalizer}) + if diff := cmp.Diff(active, []*fleetv1.ClusterResourceBinding{activeBinding}); diff != "" { + t.Errorf("classifyBindings() active = %v, want %v", active, []*fleetv1.ClusterResourceBinding{activeBinding}) + } + if !cmp.Equal(deletedWith, []*fleetv1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) { + t.Errorf("classifyBindings() deletedWithDispatcherFinalizer = %v, want %v", deletedWith, []*fleetv1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) + } + if !cmp.Equal(deletedWithout, []*fleetv1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) { + t.Errorf("classifyBindings() deletedWithoutDispatcherFinalizer = %v, want %v", deletedWithout, []*fleetv1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) + } +} + +// TestRemoveSchedulerFinalizerFromBindings tests the removeSchedulerFinalizerFromBindings method. +func TestRemoveSchedulerFinalizerFromBindings(t *testing.T) { + binding := &fleetv1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + Finalizers: []string{utils.SchedulerFinalizer}, + }, + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme.Scheme). + WithObjects(binding). + Build() + // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. + f := &framework{ + client: fakeClient, + } + + ctx := context.Background() + if err := f.removeSchedulerFinalizerFromBindings(ctx, []*fleetv1.ClusterResourceBinding{binding}); err != nil { + t.Fatalf("removeSchedulerFinalizerFromBindings() = %v, want no error", err) + } + + // Verify that the finalizer has been removed. + updatedBinding := &fleetv1.ClusterResourceBinding{} + if err := fakeClient.Get(ctx, types.NamespacedName{Name: bindingName}, updatedBinding); err != nil { + t.Fatalf("Binding Get(%v) = %v, want no error", bindingName, err) + } + + if controllerutil.ContainsFinalizer(updatedBinding, utils.SchedulerFinalizer) { + t.Fatalf("Binding %s finalizers = %v, want no scheduler finalizer", bindingName, updatedBinding.Finalizers) + } +} diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go new file mode 100644 index 000000000..eb7c02027 --- /dev/null +++ b/pkg/scheduler/framework/utils.go @@ -0,0 +1,56 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package framework + +import ( + "fmt" + + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + fleetv1 "go.goms.io/fleet/apis/v1" + "go.goms.io/fleet/pkg/utils" +) + +// extractOwnerCRPNameFromPolicySnapshot extracts the name of the owner CRP from the policy snapshot. +func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (string, error) { + var owner string + for _, ownerRef := range policy.OwnerReferences { + if ownerRef.Kind == utils.CRPV1GVK.Kind { + owner = ownerRef.Name + break + } + } + if len(owner) == 0 { + return "", fmt.Errorf("cannot find owner reference for policy snapshot %v", policy.Name) + } + return owner, nil +} + +// classifyBindings categorizes bindings into three groups: +// * active: active bindings, that is, bindings that are not marked for deletion; and +// * deletedWithDispatcherFinalizer: bindings that are marked for deletion, but still has the dispatcher finalizer present; and +// * deletedWithoutDispatcherFinalizer: bindings that are marked for deletion, and the dispatcher finalizer is already removed. +func classifyBindings(bindings []fleetv1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1.ClusterResourceBinding) { + // Pre-allocate arrays. + active = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + deletedWithDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + deletedWithoutDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + + for idx := range bindings { + binding := bindings[idx] + if binding.DeletionTimestamp != nil { + if controllerutil.ContainsFinalizer(&binding, utils.DispatcherFinalizer) { + deletedWithDispatcherFinalizer = append(deletedWithDispatcherFinalizer, &binding) + } else { + deletedWithoutDispatcherFinalizer = append(deletedWithoutDispatcherFinalizer, &binding) + } + } else { + active = append(active, &binding) + } + } + + return active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer +} diff --git a/pkg/utils/common.go b/pkg/utils/common.go index 2a8d915c5..54ec9a4fe 100644 --- a/pkg/utils/common.go +++ b/pkg/utils/common.go @@ -24,6 +24,7 @@ import ( "k8s.io/klog/v2" workv1alpha1 "sigs.k8s.io/work-api/pkg/apis/v1alpha1" + fleetv1 "go.goms.io/fleet/apis/v1" fleetv1alpha1 "go.goms.io/fleet/apis/v1alpha1" "go.goms.io/fleet/pkg/utils/informer" ) @@ -59,6 +60,14 @@ const ( // MemberClusterFinalizer is used to make sure that we handle gc of all the member cluster resources on the hub cluster. MemberClusterFinalizer = "work.fleet.azure.com/membercluster-finalizer" + // DispatcherFinalizer is added by the dispatcher to make sure that a binding can only be deleted if the dispatcher + // has removed all selected resources from the bound cluster. + DispatcherFinalizer = "fleet.io/dispatcher-cleanup" + + // SchedulerFinalizer is added by the scheduler to make sure that a binding can only be deleted if the scheduler + // has relieved it from scheduling consideration. + SchedulerFinalizer = "fleet.io/scheduler-cleanup" + // LastWorkUpdateTimeAnnotationKey is used to mark the last update time on a work object. LastWorkUpdateTimeAnnotationKey = "work.fleet.azure.com/last-update-time" ) @@ -141,6 +150,12 @@ var ( Version: corev1.SchemeGroupVersion.Version, Resource: "services", } + + CRPV1GVK = schema.GroupVersionKind{ + Group: fleetv1.GroupVersion.Group, + Version: fleetv1.GroupVersion.Version, + Kind: "ClusterResourcePlacement", + } ) // RandSecureInt returns a uniform random value in [1, max] or panic. From 9e199862d2c7db05d4032cba4bf3c5c5eb6d20d7 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 20:23:00 +0800 Subject: [PATCH 07/15] Minor fixes --- .../placement/v1beta1/policysnapshot_types.go | 5 ++ pkg/scheduler/framework/framework.go | 8 +++ pkg/scheduler/framework/framework_test.go | 72 +++++++++++++++++++ pkg/scheduler/framework/utils.go | 17 +++++ 4 files changed, 102 insertions(+) diff --git a/apis/placement/v1beta1/policysnapshot_types.go b/apis/placement/v1beta1/policysnapshot_types.go index 07eb2e597..6046555cf 100644 --- a/apis/placement/v1beta1/policysnapshot_types.go +++ b/apis/placement/v1beta1/policysnapshot_types.go @@ -16,6 +16,11 @@ const ( // PolicySnapshotNameFmt is clusterPolicySnapshot name format: {CRPName}-{PolicySnapshotIndex}. PolicySnapshotNameFmt = "%s-%d" + + // NumOfClustersAnnotation is an annotation that indicates the desired number of clusters where + // the selected resources should be placed. It is annotated on policy snapshots and is sync'd + // from the CRP to the currently active policy snapshot. + NumOfClustersAnnotation = fleetPrefix + "numOfClusters" ) // +genclient diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index b7b189d84..4764de1f4 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -150,6 +150,14 @@ func (f *framework) EventRecorder() record.EventRecorder { func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, resources *fleetv1beta1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive errorFormat := "failed to run scheduling cycle for policy %s: %w" + // Retrieve the desired number of clusters from the policy. + // + // TO-DO (chenyu1): assign variable(s) when more logic is added. + _, err = extractNumOfClustersFromPolicySnapshot(policy) + if err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + // Collect all clusters. // // Note that clusters here are listed from the cached client for improved performance. This is diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index 7281da170..b8c625f3e 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -40,6 +40,78 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// TestExtractNumOfClustersFromPolicySnapshot tests the extractNumOfClustersFromPolicySnapshot function. +func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { + testCases := []struct { + name string + policy *fleetv1.ClusterPolicySnapshot + wantNumOfClusters int + expectedToFail bool + }{ + { + name: "valid annotation", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + Annotations: map[string]string{ + fleetv1.NumOfClustersAnnotation: "1", + }, + }, + }, + wantNumOfClusters: 1, + }, + { + name: "no annotation", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + }, + expectedToFail: true, + }, + { + name: "invalid annotation: not an integer", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + Annotations: map[string]string{ + fleetv1.NumOfClustersAnnotation: "abc", + }, + }, + }, + expectedToFail: true, + }, + { + name: "invalid annotation: negative integer", + policy: &fleetv1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + Annotations: map[string]string{ + fleetv1.NumOfClustersAnnotation: "-1", + }, + }, + }, + expectedToFail: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + numOfClusters, err := extractNumOfClustersFromPolicySnapshot(tc.policy) + if tc.expectedToFail { + if err == nil { + t.Fatalf("extractNumOfClustersFromPolicySnapshot() = %v, %v, want error", numOfClusters, err) + } + return + } + + if numOfClusters != tc.wantNumOfClusters { + t.Fatalf("extractNumOfClustersFromPolicySnapshot() = %v, %v, want %v, nil", numOfClusters, err, tc.wantNumOfClusters) + } + }) + } +} + // TestCollectClusters tests the collectClusters method. func TestCollectClusters(t *testing.T) { cluster := fleetv1.MemberCluster{ diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index eb7c02027..80a6e180e 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -7,6 +7,7 @@ package framework import ( "fmt" + "strconv" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -14,6 +15,22 @@ import ( "go.goms.io/fleet/pkg/utils" ) +// extractNumOfClustersFromPolicySnapshot extracts the numOfClusters from the policy snapshot. +func extractNumOfClustersFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (int, error) { + numOfClustersStr, ok := policy.Annotations[fleetv1.NumOfClustersAnnotation] + if !ok { + return 0, fmt.Errorf("cannot find annotation %s", fleetv1.NumOfClustersAnnotation) + } + + // Cast the annotation to an integer; throw an error if the cast cannot be completed or the value is negative. + numOfClusters, err := strconv.Atoi(numOfClustersStr) + if err != nil || numOfClusters < 0 { + return 0, fmt.Errorf("invalid annotation %s: Atoi(%s) = %v, %v", fleetv1.NumOfClustersAnnotation, numOfClustersStr, numOfClusters, err) + } + + return numOfClusters, nil +} + // extractOwnerCRPNameFromPolicySnapshot extracts the name of the owner CRP from the policy snapshot. func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (string, error) { var owner string From a1877a0c82943ab09395f81fb7de7873c35ae21b Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 15 Jun 2023 14:36:27 +0800 Subject: [PATCH 08/15] Rebased --- pkg/scheduler/framework/framework.go | 24 ++++--- pkg/scheduler/framework/framework_test.go | 80 +++++++++++------------ pkg/scheduler/framework/utils.go | 23 +++---- pkg/utils/common.go | 9 +-- 4 files changed, 72 insertions(+), 64 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 4764de1f4..890bca4e3 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -13,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -20,6 +21,7 @@ import ( fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/controller" ) const ( @@ -148,14 +150,15 @@ func (f *framework) EventRecorder() record.EventRecorder { // RunSchedulingCycleFor performs scheduling for a policy snapshot. func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, resources *fleetv1beta1.ClusterResourceSnapshot) (result ctrl.Result, err error) { //nolint:revive - errorFormat := "failed to run scheduling cycle for policy %s: %w" + errorMessage := "failed to run scheduling cycle" // Retrieve the desired number of clusters from the policy. // // TO-DO (chenyu1): assign variable(s) when more logic is added. _, err = extractNumOfClustersFromPolicySnapshot(policy) if err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Collect all clusters. @@ -167,7 +170,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // TO-DO (chenyu1): assign variable(s) when more logic is added. _, err = f.collectClusters(ctx) if err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Collect all bindings. @@ -185,7 +189,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // TO-DO (chenyu1): explore the possbilities of using a mutation cache for better performance. bindings, err := f.collectBindings(ctx, policy) if err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Parse the bindings, find out @@ -206,7 +211,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // If a binding has been marked for deletion and no longer has the dispatcher finalizer, the scheduler // removes its own finalizer from it, to clear it for eventual deletion. if err := f.removeSchedulerFinalizerFromBindings(ctx, deletedWithoutDispatcherFinalizer); err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Not yet fully implemented. @@ -219,7 +225,7 @@ func (f *framework) collectClusters(ctx context.Context) ([]fleetv1beta1.MemberC clusterList := &fleetv1beta1.MemberClusterList{} if err := f.client.List(ctx, clusterList, &client.ListOptions{}); err != nil { - return nil, fmt.Errorf(errorFormat, err) + return nil, controller.NewAPIServerError(fmt.Errorf(errorFormat, err)) } return clusterList.Items, nil } @@ -232,14 +238,14 @@ func (f *framework) collectBindings(ctx context.Context, policy *fleetv1beta1.Cl if err != nil { // This branch should never run in most cases, as the a policy snapshot is expected to be // owned by a CRP. - return nil, fmt.Errorf(errorFormat, err) + return nil, controller.NewUnexpectedBehaviorError(fmt.Errorf(errorFormat, err)) } bindingList := &fleetv1beta1.ClusterResourceBindingList{} labelSelector := labels.SelectorFromSet(labels.Set{fleetv1beta1.CRPTrackingLabel: bindingOwner}) // List bindings directly from the API server. if err := f.uncachedReader.List(ctx, bindingList, &client.ListOptions{LabelSelector: labelSelector}); err != nil { - return nil, fmt.Errorf(errorFormat, err) + return nil, controller.NewAPIServerError(fmt.Errorf(errorFormat, err)) } return bindingList.Items, nil } @@ -251,7 +257,7 @@ func (f *framework) removeSchedulerFinalizerFromBindings(ctx context.Context, bi for _, binding := range bindings { controllerutil.RemoveFinalizer(binding, utils.SchedulerFinalizer) if err := f.client.Update(ctx, binding, &client.UpdateOptions{}); err != nil { - return fmt.Errorf(errorFormat, binding.Name, err) + return controller.NewAPIServerError(fmt.Errorf(errorFormat, binding.Name, err)) } } return nil diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index b8c625f3e..a1d8e61ea 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -18,7 +18,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - fleetv1 "go.goms.io/fleet/apis/v1" + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils" ) @@ -33,7 +33,7 @@ const ( // TestMain sets up the test environment. func TestMain(m *testing.M) { // Add custom APIs to the runtime scheme. - if err := fleetv1.AddToScheme(scheme.Scheme); err != nil { + if err := fleetv1beta1.AddToScheme(scheme.Scheme); err != nil { log.Fatalf("failed to add custom APIs to the runtime scheme: %v", err) } @@ -44,17 +44,17 @@ func TestMain(m *testing.M) { func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { testCases := []struct { name string - policy *fleetv1.ClusterPolicySnapshot + policy *fleetv1beta1.ClusterPolicySnapshot wantNumOfClusters int expectedToFail bool }{ { name: "valid annotation", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, Annotations: map[string]string{ - fleetv1.NumOfClustersAnnotation: "1", + fleetv1beta1.NumOfClustersAnnotation: "1", }, }, }, @@ -62,7 +62,7 @@ func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { }, { name: "no annotation", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, }, @@ -71,11 +71,11 @@ func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { }, { name: "invalid annotation: not an integer", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, Annotations: map[string]string{ - fleetv1.NumOfClustersAnnotation: "abc", + fleetv1beta1.NumOfClustersAnnotation: "abc", }, }, }, @@ -83,11 +83,11 @@ func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { }, { name: "invalid annotation: negative integer", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, Annotations: map[string]string{ - fleetv1.NumOfClustersAnnotation: "-1", + fleetv1beta1.NumOfClustersAnnotation: "-1", }, }, }, @@ -114,7 +114,7 @@ func TestExtractNumOfClustersFromPolicySnapshot(t *testing.T) { // TestCollectClusters tests the collectClusters method. func TestCollectClusters(t *testing.T) { - cluster := fleetv1.MemberCluster{ + cluster := fleetv1beta1.MemberCluster{ ObjectMeta: metav1.ObjectMeta{ Name: "cluster-1", }, @@ -135,7 +135,7 @@ func TestCollectClusters(t *testing.T) { t.Fatalf("collectClusters() = %v, %v, want no error", clusters, err) } - wantClusters := []fleetv1.MemberCluster{cluster} + wantClusters := []fleetv1beta1.MemberCluster{cluster} if !cmp.Equal(clusters, wantClusters) { t.Fatalf("collectClusters() = %v, %v, want %v, nil", clusters, err, wantClusters) } @@ -145,17 +145,17 @@ func TestCollectClusters(t *testing.T) { func TestExtractOwnerCRPNameFromPolicySnapshot(t *testing.T) { testCases := []struct { name string - policy *fleetv1.ClusterPolicySnapshot + policy *fleetv1beta1.ClusterPolicySnapshot expectToFail bool }{ { name: "policy with CRP owner reference", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, OwnerReferences: []metav1.OwnerReference{ { - Kind: utils.CRPV1GVK.Kind, + Kind: utils.CRPV1Beta1GVK.Kind, Name: CRPName, }, }, @@ -164,7 +164,7 @@ func TestExtractOwnerCRPNameFromPolicySnapshot(t *testing.T) { }, { name: "policy without CRP owner reference", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, OwnerReferences: []metav1.OwnerReference{}, @@ -193,11 +193,11 @@ func TestExtractOwnerCRPNameFromPolicySnapshot(t *testing.T) { // TestCollectBindings tests the collectBindings method. func TestCollectBindings(t *testing.T) { - binding := &fleetv1.ClusterResourceBinding{ + binding := &fleetv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: bindingName, Labels: map[string]string{ - fleetv1.CRPTrackingLabel: CRPName, + fleetv1beta1.CRPTrackingLabel: CRPName, }, }, } @@ -205,20 +205,20 @@ func TestCollectBindings(t *testing.T) { testCases := []struct { name string - binding *fleetv1.ClusterResourceBinding - policy *fleetv1.ClusterPolicySnapshot + binding *fleetv1beta1.ClusterResourceBinding + policy *fleetv1beta1.ClusterPolicySnapshot expectToFail bool expectToFindNoBindings bool }{ { name: "found matching bindings", binding: binding, - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, OwnerReferences: []metav1.OwnerReference{ { - Kind: utils.CRPV1GVK.Kind, + Kind: utils.CRPV1Beta1GVK.Kind, Name: CRPName, }, }, @@ -227,7 +227,7 @@ func TestCollectBindings(t *testing.T) { }, { name: "no owner reference in policy", - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, }, @@ -237,12 +237,12 @@ func TestCollectBindings(t *testing.T) { { name: "no matching bindings", binding: binding, - policy: &fleetv1.ClusterPolicySnapshot{ + policy: &fleetv1beta1.ClusterPolicySnapshot{ ObjectMeta: metav1.ObjectMeta{ Name: policyName, OwnerReferences: []metav1.OwnerReference{ { - Kind: utils.CRPV1GVK.Kind, + Kind: utils.CRPV1Beta1GVK.Kind, Name: altCRPName, }, }, @@ -261,7 +261,7 @@ func TestCollectBindings(t *testing.T) { fakeClient := fakeClientBuilder.Build() // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. f := &framework{ - apiReader: fakeClient, + uncachedReader: fakeClient, } ctx := context.Background() @@ -276,7 +276,7 @@ func TestCollectBindings(t *testing.T) { if err != nil { t.Fatalf("collectBindings() = %v, %v, want %v, no error", bindings, err, binding) } - wantBindings := []fleetv1.ClusterResourceBinding{} + wantBindings := []fleetv1beta1.ClusterResourceBinding{} if !tc.expectToFindNoBindings { wantBindings = append(wantBindings, *binding) } @@ -290,12 +290,12 @@ func TestCollectBindings(t *testing.T) { // TestClassifyBindings tests the classifyBindings function. func TestClassifyBindings(t *testing.T) { timestamp := metav1.Now() - activeBinding := &fleetv1.ClusterResourceBinding{ + activeBinding := &fleetv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "active-binding", }, } - deletedBindingWithDispatcherFinalizer := &fleetv1.ClusterResourceBinding{ + deletedBindingWithDispatcherFinalizer := &fleetv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "deleted-binding-with-dispatcher-finalizer", DeletionTimestamp: ×tamp, @@ -304,28 +304,28 @@ func TestClassifyBindings(t *testing.T) { }, }, } - deletedBindingWithoutDispatcherFinalizer := &fleetv1.ClusterResourceBinding{ + deletedBindingWithoutDispatcherFinalizer := &fleetv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "deleted-binding-without-dispatcher-finalizer", DeletionTimestamp: ×tamp, }, } - active, deletedWith, deletedWithout := classifyBindings([]fleetv1.ClusterResourceBinding{*activeBinding, *deletedBindingWithDispatcherFinalizer, *deletedBindingWithoutDispatcherFinalizer}) - if diff := cmp.Diff(active, []*fleetv1.ClusterResourceBinding{activeBinding}); diff != "" { - t.Errorf("classifyBindings() active = %v, want %v", active, []*fleetv1.ClusterResourceBinding{activeBinding}) + active, deletedWith, deletedWithout := classifyBindings([]fleetv1beta1.ClusterResourceBinding{*activeBinding, *deletedBindingWithDispatcherFinalizer, *deletedBindingWithoutDispatcherFinalizer}) + if diff := cmp.Diff(active, []*fleetv1beta1.ClusterResourceBinding{activeBinding}); diff != "" { + t.Errorf("classifyBindings() active = %v, want %v", active, []*fleetv1beta1.ClusterResourceBinding{activeBinding}) } - if !cmp.Equal(deletedWith, []*fleetv1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) { - t.Errorf("classifyBindings() deletedWithDispatcherFinalizer = %v, want %v", deletedWith, []*fleetv1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) + if !cmp.Equal(deletedWith, []*fleetv1beta1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) { + t.Errorf("classifyBindings() deletedWithDispatcherFinalizer = %v, want %v", deletedWith, []*fleetv1beta1.ClusterResourceBinding{deletedBindingWithDispatcherFinalizer}) } - if !cmp.Equal(deletedWithout, []*fleetv1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) { - t.Errorf("classifyBindings() deletedWithoutDispatcherFinalizer = %v, want %v", deletedWithout, []*fleetv1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) + if !cmp.Equal(deletedWithout, []*fleetv1beta1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) { + t.Errorf("classifyBindings() deletedWithoutDispatcherFinalizer = %v, want %v", deletedWithout, []*fleetv1beta1.ClusterResourceBinding{deletedBindingWithoutDispatcherFinalizer}) } } // TestRemoveSchedulerFinalizerFromBindings tests the removeSchedulerFinalizerFromBindings method. func TestRemoveSchedulerFinalizerFromBindings(t *testing.T) { - binding := &fleetv1.ClusterResourceBinding{ + binding := &fleetv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: bindingName, Finalizers: []string{utils.SchedulerFinalizer}, @@ -342,12 +342,12 @@ func TestRemoveSchedulerFinalizerFromBindings(t *testing.T) { } ctx := context.Background() - if err := f.removeSchedulerFinalizerFromBindings(ctx, []*fleetv1.ClusterResourceBinding{binding}); err != nil { + if err := f.removeSchedulerFinalizerFromBindings(ctx, []*fleetv1beta1.ClusterResourceBinding{binding}); err != nil { t.Fatalf("removeSchedulerFinalizerFromBindings() = %v, want no error", err) } // Verify that the finalizer has been removed. - updatedBinding := &fleetv1.ClusterResourceBinding{} + updatedBinding := &fleetv1beta1.ClusterResourceBinding{} if err := fakeClient.Get(ctx, types.NamespacedName{Name: bindingName}, updatedBinding); err != nil { t.Fatalf("Binding Get(%v) = %v, want no error", bindingName, err) } diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index 80a6e180e..f13959a94 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -11,31 +11,32 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - fleetv1 "go.goms.io/fleet/apis/v1" + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/controller" ) // extractNumOfClustersFromPolicySnapshot extracts the numOfClusters from the policy snapshot. -func extractNumOfClustersFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (int, error) { - numOfClustersStr, ok := policy.Annotations[fleetv1.NumOfClustersAnnotation] +func extractNumOfClustersFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySnapshot) (int, error) { + numOfClustersStr, ok := policy.Annotations[fleetv1beta1.NumOfClustersAnnotation] if !ok { - return 0, fmt.Errorf("cannot find annotation %s", fleetv1.NumOfClustersAnnotation) + return 0, controller.NewUnexpectedBehaviorError(fmt.Errorf("cannot find annotation %s", fleetv1beta1.NumOfClustersAnnotation)) } // Cast the annotation to an integer; throw an error if the cast cannot be completed or the value is negative. numOfClusters, err := strconv.Atoi(numOfClustersStr) if err != nil || numOfClusters < 0 { - return 0, fmt.Errorf("invalid annotation %s: Atoi(%s) = %v, %v", fleetv1.NumOfClustersAnnotation, numOfClustersStr, numOfClusters, err) + return 0, controller.NewUnexpectedBehaviorError(fmt.Errorf("invalid annotation %s: Atoi(%s) = %v, %v", fleetv1beta1.NumOfClustersAnnotation, numOfClustersStr, numOfClusters, err)) } return numOfClusters, nil } // extractOwnerCRPNameFromPolicySnapshot extracts the name of the owner CRP from the policy snapshot. -func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (string, error) { +func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySnapshot) (string, error) { var owner string for _, ownerRef := range policy.OwnerReferences { - if ownerRef.Kind == utils.CRPV1GVK.Kind { + if ownerRef.Kind == utils.CRPV1Beta1GVK.Kind { owner = ownerRef.Name break } @@ -50,11 +51,11 @@ func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot // * active: active bindings, that is, bindings that are not marked for deletion; and // * deletedWithDispatcherFinalizer: bindings that are marked for deletion, but still has the dispatcher finalizer present; and // * deletedWithoutDispatcherFinalizer: bindings that are marked for deletion, and the dispatcher finalizer is already removed. -func classifyBindings(bindings []fleetv1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1.ClusterResourceBinding) { +func classifyBindings(bindings []fleetv1beta1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1beta1.ClusterResourceBinding) { // Pre-allocate arrays. - active = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) - deletedWithDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) - deletedWithoutDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + active = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) + deletedWithDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) + deletedWithoutDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) for idx := range bindings { binding := bindings[idx] diff --git a/pkg/utils/common.go b/pkg/utils/common.go index 54ec9a4fe..95f09d33b 100644 --- a/pkg/utils/common.go +++ b/pkg/utils/common.go @@ -24,7 +24,7 @@ import ( "k8s.io/klog/v2" workv1alpha1 "sigs.k8s.io/work-api/pkg/apis/v1alpha1" - fleetv1 "go.goms.io/fleet/apis/v1" + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" fleetv1alpha1 "go.goms.io/fleet/apis/v1alpha1" "go.goms.io/fleet/pkg/utils/informer" ) @@ -151,9 +151,10 @@ var ( Resource: "services", } - CRPV1GVK = schema.GroupVersionKind{ - Group: fleetv1.GroupVersion.Group, - Version: fleetv1.GroupVersion.Version, + // TO-DO (chenyu1): Add more common GVRs/GVKs here. + CRPV1Beta1GVK = schema.GroupVersionKind{ + Group: fleetv1beta1.GroupVersion.Group, + Version: fleetv1beta1.GroupVersion.Version, Kind: "ClusterResourcePlacement", } ) From 20bd7d2561edf3239fc8ea784094464d7fda1f3c Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 19:02:49 +0800 Subject: [PATCH 09/15] Added more scheduler framework logic --- pkg/scheduler/framework/utils.go | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index f13959a94..a2ca7683b 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -7,6 +7,7 @@ package framework import ( "fmt" +<<<<<<< HEAD "strconv" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -37,6 +38,20 @@ func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySna var owner string for _, ownerRef := range policy.OwnerReferences { if ownerRef.Kind == utils.CRPV1Beta1GVK.Kind { +======= + + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + fleetv1 "go.goms.io/fleet/apis/v1" + "go.goms.io/fleet/pkg/utils" +) + +// extractOwnerCRPNameFromPolicySnapshot extracts the name of the owner CRP from the policy snapshot. +func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (string, error) { + var owner string + for _, ownerRef := range policy.OwnerReferences { + if ownerRef.Kind == utils.CRPV1GVK.Kind { +>>>>>>> 2223a3a (Added more scheduler framework logic) owner = ownerRef.Name break } @@ -51,11 +66,19 @@ func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySna // * active: active bindings, that is, bindings that are not marked for deletion; and // * deletedWithDispatcherFinalizer: bindings that are marked for deletion, but still has the dispatcher finalizer present; and // * deletedWithoutDispatcherFinalizer: bindings that are marked for deletion, and the dispatcher finalizer is already removed. +<<<<<<< HEAD func classifyBindings(bindings []fleetv1beta1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1beta1.ClusterResourceBinding) { // Pre-allocate arrays. active = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) deletedWithDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) deletedWithoutDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) +======= +func classifyBindings(bindings []fleetv1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1.ClusterResourceBinding) { + // Pre-allocate arrays. + active = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + deletedWithDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) + deletedWithoutDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) +>>>>>>> 2223a3a (Added more scheduler framework logic) for idx := range bindings { binding := bindings[idx] From ca24bcf7f378520f78e9b67b72169c7c36604339 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 20:23:00 +0800 Subject: [PATCH 10/15] Minor fixes --- pkg/scheduler/framework/framework.go | 8 ++++++++ pkg/scheduler/framework/utils.go | 23 ----------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 890bca4e3..636c8714f 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -161,6 +161,14 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be return ctrl.Result{}, err } + // Retrieve the desired number of clusters from the policy. + // + // TO-DO (chenyu1): assign variable(s) when more logic is added. + _, err = extractNumOfClustersFromPolicySnapshot(policy) + if err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + // Collect all clusters. // // Note that clusters here are listed from the cached client for improved performance. This is diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index a2ca7683b..f13959a94 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -7,7 +7,6 @@ package framework import ( "fmt" -<<<<<<< HEAD "strconv" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -38,20 +37,6 @@ func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySna var owner string for _, ownerRef := range policy.OwnerReferences { if ownerRef.Kind == utils.CRPV1Beta1GVK.Kind { -======= - - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - - fleetv1 "go.goms.io/fleet/apis/v1" - "go.goms.io/fleet/pkg/utils" -) - -// extractOwnerCRPNameFromPolicySnapshot extracts the name of the owner CRP from the policy snapshot. -func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot) (string, error) { - var owner string - for _, ownerRef := range policy.OwnerReferences { - if ownerRef.Kind == utils.CRPV1GVK.Kind { ->>>>>>> 2223a3a (Added more scheduler framework logic) owner = ownerRef.Name break } @@ -66,19 +51,11 @@ func extractOwnerCRPNameFromPolicySnapshot(policy *fleetv1.ClusterPolicySnapshot // * active: active bindings, that is, bindings that are not marked for deletion; and // * deletedWithDispatcherFinalizer: bindings that are marked for deletion, but still has the dispatcher finalizer present; and // * deletedWithoutDispatcherFinalizer: bindings that are marked for deletion, and the dispatcher finalizer is already removed. -<<<<<<< HEAD func classifyBindings(bindings []fleetv1beta1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1beta1.ClusterResourceBinding) { // Pre-allocate arrays. active = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) deletedWithDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) deletedWithoutDispatcherFinalizer = make([]*fleetv1beta1.ClusterResourceBinding, 0, len(bindings)) -======= -func classifyBindings(bindings []fleetv1.ClusterResourceBinding) (active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer []*fleetv1.ClusterResourceBinding) { - // Pre-allocate arrays. - active = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) - deletedWithDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) - deletedWithoutDispatcherFinalizer = make([]*fleetv1.ClusterResourceBinding, 0, len(bindings)) ->>>>>>> 2223a3a (Added more scheduler framework logic) for idx := range bindings { binding := bindings[idx] From 7a6d40b28202d2f6dcaff3fc27ed64901718b3a8 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 14 Jun 2023 20:49:00 +0800 Subject: [PATCH 11/15] Minor fixes --- apis/placement/v1beta1/binding_types.go | 3 + ...nt.karavel.io_clusterresourcebindings.yaml | 43 ++++ pkg/scheduler/framework/framework.go | 178 ++++++++++++- pkg/scheduler/framework/utils.go | 101 ++++++++ pkg/utils/condition/condition.go | 41 +++ pkg/utils/condition/condition_test.go | 234 ++++++++++++++++++ 6 files changed, 594 insertions(+), 6 deletions(-) create mode 100644 pkg/utils/condition/condition.go create mode 100644 pkg/utils/condition/condition_test.go diff --git a/apis/placement/v1beta1/binding_types.go b/apis/placement/v1beta1/binding_types.go index cdefbb892..88efec0e8 100644 --- a/apis/placement/v1beta1/binding_types.go +++ b/apis/placement/v1beta1/binding_types.go @@ -40,6 +40,9 @@ type ResourceBindingSpec struct { // TargetCluster is the name of the cluster that the scheduler assigns the resources to. TargetCluster string `json:"targetCluster"` + + // ClusterDecision explains why the scheduler makes this binding. + ClusterDecision ClusterDecision `json:"clusterDecision"` } // ResourceBindingStatus represents the current status of a ClusterResourceBinding. diff --git a/config/crd/bases/placement.karavel.io_clusterresourcebindings.yaml b/config/crd/bases/placement.karavel.io_clusterresourcebindings.yaml index 87fbee0be..c46de4143 100644 --- a/config/crd/bases/placement.karavel.io_clusterresourcebindings.yaml +++ b/config/crd/bases/placement.karavel.io_clusterresourcebindings.yaml @@ -47,6 +47,48 @@ spec: spec: description: The desired state of ClusterResourceBinding. properties: + clusterDecision: + description: ClusterDecision explains why the scheduler makes this + binding. + properties: + clusterName: + description: ClusterName is the name of the ManagedCluster. If + it is not empty, its value should be unique cross all placement + decisions for the Placement. + type: string + clusterScore: + description: ClusterScore represents the score of the cluster + calculated by the scheduler. + properties: + affinityScore: + description: AffinityScore represents the affinity score of + the cluster calculated by the last scheduling decision based + on the preferred affinity selector. An affinity score may + not present if the cluster does not meet the required affinity. + format: int32 + type: integer + priorityScore: + description: TopologySpreadScore represents the priority score + of the cluster calculated by the last scheduling decision + based on the topology spread applied to the cluster. A priority + score may not present if the cluster does not meet the topology + spread. + format: int32 + type: integer + type: object + reason: + description: Reason represents the reason why the cluster is selected + or not. + type: string + selected: + description: Selected indicates if this cluster is selected by + the scheduler. + type: boolean + required: + - clusterName + - reason + - selected + type: object resourceSnapshotName: description: ResourceSnapshotName is the name of the resource snapshot that this resource binding points to. If the resources are divided @@ -58,6 +100,7 @@ spec: assigns the resources to. type: string required: + - clusterDecision - resourceSnapshotName - targetCluster type: object diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 636c8714f..99ea22c4e 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -10,7 +10,10 @@ package framework import ( "context" "fmt" + "sort" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -21,12 +24,27 @@ import ( fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/pkg/utils/condition" "go.goms.io/fleet/pkg/utils/controller" ) const ( // eventRecorderNameTemplate is the template used to format event recorder name for a scheduler framework. eventRecorderNameTemplate = "scheduler-framework-%s" + + // maxClusterDecisionCount controls the maximum number of decisions added to the policy snapshot status. + // + // Note that all picked clusters will have their associated decisions on the status, even if the number + // exceeds this limit. The limit is mostly for filling up the status with reasons why a cluster is + // **not** picked by the scheduler, when there are enough number of clusters to choose from, and not + // enough picked clusters. + maxClusterDecisionCount = 20 + + // The reasons and messages for scheduled conditions. + fullyScheduledReason = "SchedulingCompleted" + fullyScheduledMessage = "all required number of bindings have been created" + notFullyScheduledReason = "Pendingscheduling" + notFullyScheduledMessage = "might not have enough bindings created" ) // Handle is an interface which allows plugins to access some shared structs (e.g., client, manager) @@ -162,9 +180,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be } // Retrieve the desired number of clusters from the policy. - // - // TO-DO (chenyu1): assign variable(s) when more logic is added. - _, err = extractNumOfClustersFromPolicySnapshot(policy) + numOfClusters, err := extractNumOfClustersFromPolicySnapshot(policy) if err != nil { return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) } @@ -212,9 +228,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // where the scheduler binds a cluster to a resource placement, even though the dispatcher has // not started, or is still in the process of, removing the same set of resources from // the cluster, triggered by a recently deleted binding. - // - // TO-DO (chenyu1): assign variable(s) when more logic is added. - _, _, deletedWithoutDispatcherFinalizer := classifyBindings(bindings) + active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer := classifyBindings(bindings) // If a binding has been marked for deletion and no longer has the dispatcher finalizer, the scheduler // removes its own finalizer from it, to clear it for eventual deletion. @@ -223,6 +237,103 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be return ctrl.Result{}, err } + // Check if the scheduler should downscale, i.e., remove some bindings. + // Note that the scheduler will only down-scale if + // * the policy is of the PickN type; and + // * the desired number of bindings is less the number of active bindings. + // + // Once again, note that the scheduler only considers a binding to be deleted if it is marked for deletion + // and does not have the dispatcher finalizer. Scheduler will be triggered again for such bindings, + // and a up-scaling may happen then, if appropriate. + act, downscaleCount := shouldDownscale(policy, numOfClusters, active) + + // Downscale if necessary; older (ranked by CreationTimestamp) bindings will be picked first. + // + // This assumes a monotonic clock. + if act { + remaining, err := f.downscale(ctx, active, downscaleCount) + if err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + + // Prepare new scheduling decisions. + newSchedulingDecisions := prepareNewSchedulingDecisions(policy, remaining, deletedWithDispatcherFinalizer) + // Prepare new scheduling condition. + // + // In the case of downscaling, the scheduler considers the policy to be fully scheduled. + newSchedulingCondition := fullyScheduledCondition(policy) + + // Update the policy snapshot status; since a downscaling has occurred, this update is always requied, hence + // no sameness (no change) checks are necessary. + // + // Note that the op would fail if the policy snapshot is not the latest, so that consistency is + // preserved. + if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + + // Return immediately as there are no more bindings for the scheduler to scheduler at this moment. + return ctrl.Result{}, nil + } + + // If no downscaling is needed, update the policy snapshot status any way. + // + // This is needed as a number of situations (e.g., POST/PUT failures) may lead to inconsistencies between + // the decisions added to the policy snapshot status and the actual list of bindings. + + // Collect current decisions and conditions for sameness (no change) checks. + currentSchedulingDecisions := policy.Status.ClusterDecisions + currentSchedulingCondition := meta.FindStatusCondition(policy.Status.Conditions, string(fleetv1.PolicySnapshotScheduled)) + + // Check if the scheduler needs to take action; a scheduling cycle is only needed if + // * the policy is of the PickAll type; or + // * the policy is of the PickN type, and currently there are not enough number of bindings. + if !shouldSchedule(policy, numOfClusters, len(active)+len(deletedWithDispatcherFinalizer)) { + // No action is needed; however, a status refresh might be warranted. + + // Prepare new scheduling decisions. + newSchedulingDecisions := prepareNewSchedulingDecisions(policy, active, deletedWithDispatcherFinalizer) + // Prepare new scheduling condition. + // + // In this case, since no action is needed, the scheduler considers the policy to be fully scheduled. + newSchedulingCondition := fullyScheduledCondition(policy) + + // Check if a refresh is warranted; the scheduler only update the status when there is a change in + // scheduling decisions and/or the scheduling condition. + if !equalDecisons(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { + // Update the policy snapshot status. + // + // Note that the op would fail if the policy snapshot is not the latest, so that consistency is + // preserved. + if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + } + + // Return immediate as there no more bindings for the scheduler to schedule at this moment. + return ctrl.Result{}, nil + } + + // The scheduler needs to take action; refresh the status first. + + // Prepare new scheduling decisions. + newSchedulingDecisions := prepareNewSchedulingDecisions(policy, active, deletedWithDispatcherFinalizer) + // Prepare new scheduling condition. + // + // In this case, since action is needed, the scheduler marks the policy as not fully scheduled. + newSchedulingCondition := notFullyScheduledCondition(policy, numOfClusters) + // Check if a refresh is warranted; the scheduler only update the status when there is a change in + // scheduling decisions and/or the scheduling condition. + if !equalDecisons(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { + // Update the policy snapshot status. + // + // Note that the op would fail if the policy snapshot is not the latest, so that consistency is + // preserved. + if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { + return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + } + } + // Not yet fully implemented. return ctrl.Result{}, nil } @@ -270,3 +381,58 @@ func (f *framework) removeSchedulerFinalizerFromBindings(ctx context.Context, bi } return nil } + +// sortByCreationTimestampBindings is a wrapper which implements Sort.Interface, which allows easy +// sorting of bindings by their CreationTimestamps. +type sortByCreationTimestampBindings struct { + bindings []*fleetv1.ClusterResourceBinding +} + +// Len() is for implementing Sort.Interface. +func (s sortByCreationTimestampBindings) Len() int { + return len(s.bindings) +} + +// Swap() is for implementing Sort.Interface. +func (s sortByCreationTimestampBindings) Swap(i, j int) { + s.bindings[i], s.bindings[j] = s.bindings[j], s.bindings[i] +} + +// Less() is for implementing Sort.Interface. +func (s sortByCreationTimestampBindings) Less(i, j int) bool { + return s.bindings[i].CreationTimestamp.Before(&(s.bindings[j].CreationTimestamp)) +} + +// downscale performs downscaling, removing some number of bindings. It picks the oldest (by CreationTimestamp) +// bindings first. +func (f *framework) downscale(ctx context.Context, active []*fleetv1.ClusterResourceBinding, count int) ([]*fleetv1.ClusterResourceBinding, error) { + errorFormat := "failed to delete binding %s: %w" + + // Sort the bindings by their CreationTimestamps. + sorted := sortByCreationTimestampBindings{bindings: active} + sort.Sort(sorted) + + // Delete the first count number of bindings. + bindingsToDelete := sorted.bindings[:count] + for _, binding := range bindingsToDelete { + if err := f.client.Delete(ctx, binding, &client.DeleteOptions{}); err != nil { + return nil, fmt.Errorf(errorFormat, binding.Name, err) + } + } + + // Return the remaining bindings. + return sorted.bindings[count:], nil +} + +// updatePolicySnapshotStatus updates the status of a policy snapshot, setting new scheduling decisions +// and condition on the object. +func (f *framework) updatePolicySnapshotStatus(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, decisions []fleetv1.ClusterDecision, condition metav1.Condition) error { + errorFormat := "failed to update policy snapshot status: %w" + + policy.Status.ClusterDecisions = decisions + meta.SetStatusCondition(&policy.Status.Conditions, condition) + if err := f.client.Status().Update(ctx, policy, &client.UpdateOptions{}); err != nil { + return fmt.Errorf(errorFormat, err) + } + return nil +} diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index f13959a94..8fd34a3ae 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -7,8 +7,10 @@ package framework import ( "fmt" + "reflect" "strconv" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" @@ -72,3 +74,102 @@ func classifyBindings(bindings []fleetv1beta1.ClusterResourceBinding) (active, d return active, deletedWithDispatcherFinalizer, deletedWithoutDispatcherFinalizer } + +// shouldDownscale checks if the scheduler needs to perform some downscaling, and (if so) how many bindings +// it should remove. +func shouldDownscale(policy *fleetv1.ClusterPolicySnapshot, numOfClusters int, active []*fleetv1.ClusterResourceBinding) (act bool, count int) { + if policy.Spec.Policy.PlacementType == fleetv1.PickNPlacementType && numOfClusters < len(active) { + return true, len(active) - numOfClusters + } + return false, 0 +} + +// prepareNewSchedulingDecisions returns a list of new scheduling decisions, in accordance with the list +// of existing bindings. +func prepareNewSchedulingDecisions(policy *fleetv1.ClusterPolicySnapshot, existing ...[]*fleetv1.ClusterResourceBinding) []fleetv1.ClusterDecision { + // Pre-allocate arrays. + current := policy.Status.ClusterDecisions + desired := make([]fleetv1.ClusterDecision, 0, len(existing)) + + // Build new scheduling decisions. + for _, bindings := range existing { + for _, binding := range bindings { + desired = append(desired, binding.Spec.ClusterDecision) + } + } + + // Move some decisions from unbound clusters, if there are still enough room. + if diff := maxClusterDecisionCount - len(current); diff > 0 { + for _, decision := range current { + if !decision.Selected { + desired = append(desired, decision) + diff-- + if diff == 0 { + break + } + } + } + } + + return desired +} + +// fullySchedulingCondition returns a condition for fully scheduled policy snapshot. +func fullyScheduledCondition(policy *fleetv1.ClusterPolicySnapshot) metav1.Condition { + return metav1.Condition{ + Type: string(fleetv1.PolicySnapshotScheduled), + Status: metav1.ConditionTrue, + ObservedGeneration: policy.Generation, + LastTransitionTime: metav1.Now(), + Reason: fullyScheduledReason, + Message: fullyScheduledMessage, + } +} + +// shouldSchedule checks if the scheduler needs to perform some scheduling, and (if so) how many bindings. +// +// A scheduling cycle is only needed if +// * the policy is of the PickAll type; or +// * the policy is of the PickN type, and currently there are not enough number of bindings. +func shouldSchedule(policy *fleetv1.ClusterPolicySnapshot, numOfClusters, existingBindingsCount int) bool { + if policy.Spec.Policy.PlacementType == fleetv1.PickAllPlacementType { + return true + } + + return numOfClusters > existingBindingsCount +} + +// equalDecisions returns if two arrays of ClusterDecisions are equal; it returns true if +// every decision in one array is also present in the other array regardless of their indexes, +// and vice versa. +func equalDecisons(current, desired []fleetv1.ClusterDecision) bool { + desiredDecisionByCluster := make(map[string]fleetv1.ClusterDecision, len(desired)) + for _, decision := range desired { + desiredDecisionByCluster[decision.ClusterName] = decision + } + + for _, decision := range current { + // Note that it will return false if no matching decision can be found. + if !reflect.DeepEqual(decision, desiredDecisionByCluster[decision.ClusterName]) { + return false + } + } + + return len(current) == len(desired) +} + +// notFullyScheduledCondition returns a condition for not fully scheduled policy snapshot. +func notFullyScheduledCondition(policy *fleetv1.ClusterPolicySnapshot, desiredCount int) metav1.Condition { + message := notFullyScheduledMessage + if policy.Spec.Policy.PlacementType == fleetv1.PickNPlacementType { + message = fmt.Sprintf("%s: expected count %d, current count %d", message, policy.Spec.Policy.NumberOfClusters, desiredCount) + } + return metav1.Condition{ + Type: string(fleetv1.PolicySnapshotScheduled), + Status: metav1.ConditionFalse, + ObservedGeneration: policy.Generation, + LastTransitionTime: metav1.Now(), + Reason: notFullyScheduledReason, + Message: message, + } +} diff --git a/pkg/utils/condition/condition.go b/pkg/utils/condition/condition.go new file mode 100644 index 000000000..47b945779 --- /dev/null +++ b/pkg/utils/condition/condition.go @@ -0,0 +1,41 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +// Package condition provides condition related utils. +package condition + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EqualCondition compares one condition with another; it ignores the LastTransitionTime and Message fields, +// and will consider the ObservedGeneration values from the two conditions a match if the current +// condition is newer. +func EqualCondition(current, desired *metav1.Condition) bool { + if current == nil && desired == nil { + return true + } + return current != nil && + desired != nil && + current.Type == desired.Type && + current.Status == desired.Status && + current.Reason == desired.Reason && + current.ObservedGeneration >= desired.ObservedGeneration +} + +// EqualConditionIgnoreReason compares one condition with another; it ignores the Reason, LastTransitionTime, and +// Message fields, and will consider the ObservedGeneration values from the two conditions a match if the current +// condition is newer. +func EqualConditionIgnoreReason(current, desired *metav1.Condition) bool { + if current == nil && desired == nil { + return true + } + + return current != nil && + desired != nil && + current.Type == desired.Type && + current.Status == desired.Status && + current.ObservedGeneration >= desired.ObservedGeneration +} diff --git a/pkg/utils/condition/condition_test.go b/pkg/utils/condition/condition_test.go new file mode 100644 index 000000000..5d4a18fb7 --- /dev/null +++ b/pkg/utils/condition/condition_test.go @@ -0,0 +1,234 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package condition + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + conditionType = "some-type" + altConditionType = "some-other-type" + reason = "some reason" + altReason = "some other reason" + message = "some message" + altMessage = "some other message" +) + +func TestEqualCondition(t *testing.T) { + tests := []struct { + name string + current *metav1.Condition + desired *metav1.Condition + want bool + }{ + { + name: "both are nil", + current: nil, + desired: nil, + want: true, + }, + { + name: "current is nil", + current: nil, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: message, + ObservedGeneration: 1, + }, + want: false, + }, + { + name: "messages are different", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: message, + ObservedGeneration: 1, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: altMessage, + ObservedGeneration: 1, + }, + want: true, + }, + { + name: "observedGenerations are different (current is larger)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: message, + ObservedGeneration: 2, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: altMessage, + ObservedGeneration: 1, + }, + want: true, + }, + { + name: "observedGenerations are different (current is smaller)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: message, + ObservedGeneration: 3, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: reason, + Message: altMessage, + ObservedGeneration: 4, + }, + want: false, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := EqualCondition(tc.current, tc.desired) + if !cmp.Equal(got, tc.want) { + t.Errorf("EqualCondition() = %v, want %v", got, tc.want) + } + }) + } +} + +// TestEqualConditionIgnoreReason tests the EqualConditionIgnoreReason function. +func TestEqualConditionIgnoreReason(t *testing.T) { + testCases := []struct { + name string + current *metav1.Condition + desired *metav1.Condition + want bool + }{ + { + name: "nil conditions", + current: nil, + desired: nil, + want: true, + }, + { + name: "current is nil", + current: nil, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + ObservedGeneration: 7, + }, + want: false, + }, + { + name: "conditions are equal", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + ObservedGeneration: 0, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + ObservedGeneration: 0, + }, + want: true, + }, + { + name: "conditions are equal (different reasons)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + Reason: reason, + ObservedGeneration: 0, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + Reason: altReason, + ObservedGeneration: 0, + }, + want: true, + }, + { + name: "conditions are not equal (different type)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + }, + desired: &metav1.Condition{ + Type: altConditionType, + Status: metav1.ConditionUnknown, + ObservedGeneration: 1, + }, + want: false, + }, + { + name: "conditions are not equal (different status)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionFalse, + ObservedGeneration: 4, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + ObservedGeneration: 4, + }, + want: false, + }, + { + name: "conditions are equal (current condition is newer)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + ObservedGeneration: 3, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + ObservedGeneration: 2, + }, + want: true, + }, + { + name: "conditions are not equal (current condition is older)", + current: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionFalse, + ObservedGeneration: 5, + }, + desired: &metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionFalse, + ObservedGeneration: 6, + }, + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if got := EqualConditionIgnoreReason(tc.current, tc.desired); got != tc.want { + t.Fatalf("EqualConditionIgnoreReason(%+v, %+v) = %t, want %t", + tc.current, tc.desired, got, tc.want) + } + }) + } +} From 1521907568806019961de39e3a01c174c8f1ab37 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 15 Jun 2023 14:56:06 +0800 Subject: [PATCH 12/15] Rebased --- pkg/scheduler/framework/framework.go | 48 ++++++++++++---------------- pkg/scheduler/framework/utils.go | 28 ++++++++-------- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 99ea22c4e..8581bef06 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -171,20 +171,12 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be errorMessage := "failed to run scheduling cycle" // Retrieve the desired number of clusters from the policy. - // - // TO-DO (chenyu1): assign variable(s) when more logic is added. - _, err = extractNumOfClustersFromPolicySnapshot(policy) + numOfClusters, err := extractNumOfClustersFromPolicySnapshot(policy) if err != nil { klog.ErrorS(err, errorMessage, klog.KObj(policy)) return ctrl.Result{}, err } - // Retrieve the desired number of clusters from the policy. - numOfClusters, err := extractNumOfClustersFromPolicySnapshot(policy) - if err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) - } - // Collect all clusters. // // Note that clusters here are listed from the cached client for improved performance. This is @@ -253,7 +245,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be if act { remaining, err := f.downscale(ctx, active, downscaleCount) if err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Prepare new scheduling decisions. @@ -269,7 +262,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Note that the op would fail if the policy snapshot is not the latest, so that consistency is // preserved. if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } // Return immediately as there are no more bindings for the scheduler to scheduler at this moment. @@ -283,7 +277,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Collect current decisions and conditions for sameness (no change) checks. currentSchedulingDecisions := policy.Status.ClusterDecisions - currentSchedulingCondition := meta.FindStatusCondition(policy.Status.Conditions, string(fleetv1.PolicySnapshotScheduled)) + currentSchedulingCondition := meta.FindStatusCondition(policy.Status.Conditions, string(fleetv1beta1.PolicySnapshotScheduled)) // Check if the scheduler needs to take action; a scheduling cycle is only needed if // * the policy is of the PickAll type; or @@ -306,7 +300,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Note that the op would fail if the policy snapshot is not the latest, so that consistency is // preserved. if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } } @@ -330,7 +325,8 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Note that the op would fail if the policy snapshot is not the latest, so that consistency is // preserved. if err := f.updatePolicySnapshotStatus(ctx, policy, newSchedulingDecisions, newSchedulingCondition); err != nil { - return ctrl.Result{}, fmt.Errorf(errorFormat, policy.Name, err) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, err } } @@ -384,55 +380,53 @@ func (f *framework) removeSchedulerFinalizerFromBindings(ctx context.Context, bi // sortByCreationTimestampBindings is a wrapper which implements Sort.Interface, which allows easy // sorting of bindings by their CreationTimestamps. -type sortByCreationTimestampBindings struct { - bindings []*fleetv1.ClusterResourceBinding -} +type sortByCreationTimestampBindings []*fleetv1beta1.ClusterResourceBinding // Len() is for implementing Sort.Interface. func (s sortByCreationTimestampBindings) Len() int { - return len(s.bindings) + return len(s) } // Swap() is for implementing Sort.Interface. func (s sortByCreationTimestampBindings) Swap(i, j int) { - s.bindings[i], s.bindings[j] = s.bindings[j], s.bindings[i] + s[i], s[j] = s[j], s[i] } // Less() is for implementing Sort.Interface. func (s sortByCreationTimestampBindings) Less(i, j int) bool { - return s.bindings[i].CreationTimestamp.Before(&(s.bindings[j].CreationTimestamp)) + return s[i].CreationTimestamp.Before(&(s[j].CreationTimestamp)) } // downscale performs downscaling, removing some number of bindings. It picks the oldest (by CreationTimestamp) // bindings first. -func (f *framework) downscale(ctx context.Context, active []*fleetv1.ClusterResourceBinding, count int) ([]*fleetv1.ClusterResourceBinding, error) { +func (f *framework) downscale(ctx context.Context, active []*fleetv1beta1.ClusterResourceBinding, count int) ([]*fleetv1beta1.ClusterResourceBinding, error) { errorFormat := "failed to delete binding %s: %w" // Sort the bindings by their CreationTimestamps. - sorted := sortByCreationTimestampBindings{bindings: active} + sorted := sortByCreationTimestampBindings(active) sort.Sort(sorted) // Delete the first count number of bindings. - bindingsToDelete := sorted.bindings[:count] + bindingsToDelete := sorted[:count] for _, binding := range bindingsToDelete { if err := f.client.Delete(ctx, binding, &client.DeleteOptions{}); err != nil { - return nil, fmt.Errorf(errorFormat, binding.Name, err) + return nil, controller.NewAPIServerError(fmt.Errorf(errorFormat, binding.Name, err)) } } // Return the remaining bindings. - return sorted.bindings[count:], nil + return sorted[count:], nil } // updatePolicySnapshotStatus updates the status of a policy snapshot, setting new scheduling decisions // and condition on the object. -func (f *framework) updatePolicySnapshotStatus(ctx context.Context, policy *fleetv1.ClusterPolicySnapshot, decisions []fleetv1.ClusterDecision, condition metav1.Condition) error { +func (f *framework) updatePolicySnapshotStatus(ctx context.Context, policy *fleetv1beta1.ClusterPolicySnapshot, decisions []fleetv1beta1.ClusterDecision, condition metav1.Condition) error { errorFormat := "failed to update policy snapshot status: %w" policy.Status.ClusterDecisions = decisions meta.SetStatusCondition(&policy.Status.Conditions, condition) if err := f.client.Status().Update(ctx, policy, &client.UpdateOptions{}); err != nil { - return fmt.Errorf(errorFormat, err) + return controller.NewAPIServerError(fmt.Errorf(errorFormat, err)) } return nil } diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index 8fd34a3ae..f10a4d99c 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -28,7 +28,7 @@ func extractNumOfClustersFromPolicySnapshot(policy *fleetv1beta1.ClusterPolicySn // Cast the annotation to an integer; throw an error if the cast cannot be completed or the value is negative. numOfClusters, err := strconv.Atoi(numOfClustersStr) if err != nil || numOfClusters < 0 { - return 0, controller.NewUnexpectedBehaviorError(fmt.Errorf("invalid annotation %s: Atoi(%s) = %v, %v", fleetv1beta1.NumOfClustersAnnotation, numOfClustersStr, numOfClusters, err)) + return 0, controller.NewUnexpectedBehaviorError(fmt.Errorf("invalid annotation %s: %s is not a valid count: %w", fleetv1beta1.NumOfClustersAnnotation, numOfClustersStr, err)) } return numOfClusters, nil @@ -77,8 +77,8 @@ func classifyBindings(bindings []fleetv1beta1.ClusterResourceBinding) (active, d // shouldDownscale checks if the scheduler needs to perform some downscaling, and (if so) how many bindings // it should remove. -func shouldDownscale(policy *fleetv1.ClusterPolicySnapshot, numOfClusters int, active []*fleetv1.ClusterResourceBinding) (act bool, count int) { - if policy.Spec.Policy.PlacementType == fleetv1.PickNPlacementType && numOfClusters < len(active) { +func shouldDownscale(policy *fleetv1beta1.ClusterPolicySnapshot, numOfClusters int, active []*fleetv1beta1.ClusterResourceBinding) (act bool, count int) { + if policy.Spec.Policy.PlacementType == fleetv1beta1.PickNPlacementType && numOfClusters < len(active) { return true, len(active) - numOfClusters } return false, 0 @@ -86,10 +86,10 @@ func shouldDownscale(policy *fleetv1.ClusterPolicySnapshot, numOfClusters int, a // prepareNewSchedulingDecisions returns a list of new scheduling decisions, in accordance with the list // of existing bindings. -func prepareNewSchedulingDecisions(policy *fleetv1.ClusterPolicySnapshot, existing ...[]*fleetv1.ClusterResourceBinding) []fleetv1.ClusterDecision { +func prepareNewSchedulingDecisions(policy *fleetv1beta1.ClusterPolicySnapshot, existing ...[]*fleetv1beta1.ClusterResourceBinding) []fleetv1beta1.ClusterDecision { // Pre-allocate arrays. current := policy.Status.ClusterDecisions - desired := make([]fleetv1.ClusterDecision, 0, len(existing)) + desired := make([]fleetv1beta1.ClusterDecision, 0, len(existing)) // Build new scheduling decisions. for _, bindings := range existing { @@ -115,9 +115,9 @@ func prepareNewSchedulingDecisions(policy *fleetv1.ClusterPolicySnapshot, existi } // fullySchedulingCondition returns a condition for fully scheduled policy snapshot. -func fullyScheduledCondition(policy *fleetv1.ClusterPolicySnapshot) metav1.Condition { +func fullyScheduledCondition(policy *fleetv1beta1.ClusterPolicySnapshot) metav1.Condition { return metav1.Condition{ - Type: string(fleetv1.PolicySnapshotScheduled), + Type: string(fleetv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionTrue, ObservedGeneration: policy.Generation, LastTransitionTime: metav1.Now(), @@ -131,8 +131,8 @@ func fullyScheduledCondition(policy *fleetv1.ClusterPolicySnapshot) metav1.Condi // A scheduling cycle is only needed if // * the policy is of the PickAll type; or // * the policy is of the PickN type, and currently there are not enough number of bindings. -func shouldSchedule(policy *fleetv1.ClusterPolicySnapshot, numOfClusters, existingBindingsCount int) bool { - if policy.Spec.Policy.PlacementType == fleetv1.PickAllPlacementType { +func shouldSchedule(policy *fleetv1beta1.ClusterPolicySnapshot, numOfClusters, existingBindingsCount int) bool { + if policy.Spec.Policy.PlacementType == fleetv1beta1.PickAllPlacementType { return true } @@ -142,8 +142,8 @@ func shouldSchedule(policy *fleetv1.ClusterPolicySnapshot, numOfClusters, existi // equalDecisions returns if two arrays of ClusterDecisions are equal; it returns true if // every decision in one array is also present in the other array regardless of their indexes, // and vice versa. -func equalDecisons(current, desired []fleetv1.ClusterDecision) bool { - desiredDecisionByCluster := make(map[string]fleetv1.ClusterDecision, len(desired)) +func equalDecisons(current, desired []fleetv1beta1.ClusterDecision) bool { + desiredDecisionByCluster := make(map[string]fleetv1beta1.ClusterDecision, len(desired)) for _, decision := range desired { desiredDecisionByCluster[decision.ClusterName] = decision } @@ -159,13 +159,13 @@ func equalDecisons(current, desired []fleetv1.ClusterDecision) bool { } // notFullyScheduledCondition returns a condition for not fully scheduled policy snapshot. -func notFullyScheduledCondition(policy *fleetv1.ClusterPolicySnapshot, desiredCount int) metav1.Condition { +func notFullyScheduledCondition(policy *fleetv1beta1.ClusterPolicySnapshot, desiredCount int) metav1.Condition { message := notFullyScheduledMessage - if policy.Spec.Policy.PlacementType == fleetv1.PickNPlacementType { + if policy.Spec.Policy.PlacementType == fleetv1beta1.PickNPlacementType { message = fmt.Sprintf("%s: expected count %d, current count %d", message, policy.Spec.Policy.NumberOfClusters, desiredCount) } return metav1.Condition{ - Type: string(fleetv1.PolicySnapshotScheduled), + Type: string(fleetv1beta1.PolicySnapshotScheduled), Status: metav1.ConditionFalse, ObservedGeneration: policy.Generation, LastTransitionTime: metav1.Now(), From af458d2394443ddaa0dc51b4d9e295bc0ab47f7b Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 15 Jun 2023 22:14:07 +0800 Subject: [PATCH 13/15] Added unit tests --- pkg/scheduler/framework/framework.go | 4 +- pkg/scheduler/framework/framework_test.go | 428 +++++++++++++++++++++- pkg/scheduler/framework/utils.go | 2 +- 3 files changed, 428 insertions(+), 6 deletions(-) diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 8581bef06..703ae0d51 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -294,7 +294,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Check if a refresh is warranted; the scheduler only update the status when there is a change in // scheduling decisions and/or the scheduling condition. - if !equalDecisons(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { + if !equalDecisions(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { // Update the policy snapshot status. // // Note that the op would fail if the policy snapshot is not the latest, so that consistency is @@ -319,7 +319,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be newSchedulingCondition := notFullyScheduledCondition(policy, numOfClusters) // Check if a refresh is warranted; the scheduler only update the status when there is a change in // scheduling decisions and/or the scheduling condition. - if !equalDecisons(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { + if !equalDecisions(currentSchedulingDecisions, newSchedulingDecisions) || condition.EqualCondition(currentSchedulingCondition, &newSchedulingCondition) { // Update the policy snapshot status. // // Note that the op would fail if the policy snapshot is not the latest, so that consistency is diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index a1d8e61ea..cde4260c5 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -9,9 +9,13 @@ import ( "context" "log" "os" + "sort" "testing" + "time" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" @@ -23,9 +27,16 @@ import ( ) const ( - CRPName = "test-placement" - policyName = "test-policy" - bindingName = "test-binding" + CRPName = "test-placement" + policyName = "test-policy" + bindingName = "test-binding" + altBindingName = "another-test-binding" + clusterName = "bravelion" + altClusterName = "smartcat" +) + +var ( + ignoredCondFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") ) // TO-DO (chenyu1): expand the test cases as development stablizes. @@ -356,3 +367,414 @@ func TestRemoveSchedulerFinalizerFromBindings(t *testing.T) { t.Fatalf("Binding %s finalizers = %v, want no scheduler finalizer", bindingName, updatedBinding.Finalizers) } } + +// TestShouldDownscale tests the shouldDownscale function. +func TestShouldDownscale(t *testing.T) { + testCases := []struct { + name string + policy *fleetv1beta1.ClusterPolicySnapshot + numOfClusters int + active []*fleetv1beta1.ClusterResourceBinding + wantAct bool + wantCount int + }{ + { + name: "should not downscale (pick all)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickAllPlacementType, + }, + }, + }, + }, + { + name: "should not downscale (enough bindings)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickNPlacementType, + }, + }, + }, + numOfClusters: 1, + active: []*fleetv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + }, + }, + }, + { + name: "should downscale (not enough bindings)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickNPlacementType, + }, + }, + }, + numOfClusters: 0, + active: []*fleetv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + }, + }, + wantAct: true, + wantCount: 1, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + act, count := shouldDownscale(tc.policy, tc.numOfClusters, tc.active) + if act != tc.wantAct || count != tc.wantCount { + t.Fatalf("shouldDownscale() = %v, %v, want %v, %v", act, count, tc.wantAct, tc.wantCount) + } + }) + } +} + +// TestSortByCreationTimestampBindingsWrapper checks if the sortByCreationTimestampBindings wrapper implements +// sort.Interface correctly, i.e, if it is sortable by CreationTimestamp. +func TestSortByCreationTimestampBindingsWrapper(t *testing.T) { + timestampA := metav1.Now() + timestampB := metav1.NewTime(time.Now().Add(time.Second)) + + sorted := sortByCreationTimestampBindings([]*fleetv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + CreationTimestamp: timestampB, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + CreationTimestamp: timestampA, + }, + }, + }) + sort.Sort(sorted) + + wantSorted := sortByCreationTimestampBindings([]*fleetv1beta1.ClusterResourceBinding{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + CreationTimestamp: timestampA, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + CreationTimestamp: timestampB, + }, + }, + }) + if !cmp.Equal(sorted, wantSorted) { + t.Fatalf("sortByCreationTimestamp, got %v, want %v", sorted, wantSorted) + } +} + +// TestDownscale tests the downscale method. +func TestDownscale(t *testing.T) { + timestampA := metav1.Now() + timestampB := metav1.NewTime(time.Now().Add(time.Second)) + + bindingA := fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + CreationTimestamp: timestampA, + }, + } + bindingB := fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: altBindingName, + CreationTimestamp: timestampB, + }, + } + + active := []*fleetv1beta1.ClusterResourceBinding{&bindingA, &bindingB} + count := 1 + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme.Scheme). + WithObjects(&bindingA, &bindingB). + Build() + // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. + f := &framework{ + client: fakeClient, + } + + ctx := context.Background() + remains, err := f.downscale(ctx, active, count) + wantRemains := []*fleetv1beta1.ClusterResourceBinding{&bindingB} + if err != nil || !cmp.Equal(remains, wantRemains) { + t.Fatalf("downscale(%v, %v) = %v, %v, want %v, no error", active, count, remains, err, wantRemains) + } +} + +// TestPrepareNewSchedulingDecisions tests the prepareNewSchedulingDecisions function. +func TestPrepareNewSchedulingDecisions(t *testing.T) { + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Status: fleetv1beta1.PolicySnapshotStatus{ + ClusterDecisions: []fleetv1beta1.ClusterDecision{ + { + ClusterName: altClusterName, + Selected: false, + }, + }, + }, + } + binding := &fleetv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: bindingName, + }, + Spec: fleetv1beta1.ResourceBindingSpec{ + TargetCluster: clusterName, + ClusterDecision: fleetv1beta1.ClusterDecision{ + ClusterName: clusterName, + Selected: true, + }, + }, + } + + decisions := prepareNewSchedulingDecisions(policy, []*fleetv1beta1.ClusterResourceBinding{binding}) + wantDecisions := []fleetv1beta1.ClusterDecision{ + binding.Spec.ClusterDecision, + policy.Status.ClusterDecisions[0], + } + + if !cmp.Equal(decisions, wantDecisions) { + t.Fatalf("prepareNewSchedulingDecisions(%v, %v) = %v, want %v", policy, binding, decisions, wantDecisions) + } +} + +// TestShouldSchedule tests the shouldSchedule function. +func TestShouldSchedule(t *testing.T) { + testCases := []struct { + name string + policy *fleetv1beta1.ClusterPolicySnapshot + numOfClusters int + existingBindingsCount int + want bool + }{ + { + name: "should schedule (pick all)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickAllPlacementType, + }, + }, + }, + want: true, + }, + { + name: "should not schedule (enough bindings)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickNPlacementType, + }, + }, + }, + numOfClusters: 1, + existingBindingsCount: 1, + }, + { + name: "should schedule (not enough bindings)", + policy: &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + Spec: fleetv1beta1.PolicySnapshotSpec{ + Policy: &fleetv1beta1.PlacementPolicy{ + PlacementType: fleetv1beta1.PickNPlacementType, + }, + }, + }, + numOfClusters: 2, + existingBindingsCount: 1, + want: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if shouldSchedule(tc.policy, tc.numOfClusters, tc.existingBindingsCount) != tc.want { + t.Fatalf("shouldSchedule(%v, %v, %v) = %v, want %v", tc.policy, tc.numOfClusters, tc.existingBindingsCount, !tc.want, tc.want) + } + }) + } +} + +// TestEqualDecisions tests the equalDecisions function. +func TestEqualDecisions(t *testing.T) { + testCases := []struct { + name string + current []fleetv1beta1.ClusterDecision + desired []fleetv1beta1.ClusterDecision + want bool + }{ + { + name: "equal decisions", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + want: true, + }, + { + name: "not equal decisions (different value)", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: false, + }, + }, + }, + { + name: "not equal decisions (empty current)", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{}, + }, + { + name: "not equal decisions (empty desired)", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{}, + }, + { + name: "not equal decisions (no match)", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{ + { + ClusterName: altClusterName, + Selected: true, + }, + }, + }, + { + name: "not equal decisions (different lengths)", + current: []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + { + ClusterName: altClusterName, + Selected: true, + }, + }, + desired: []fleetv1beta1.ClusterDecision{ + { + ClusterName: altClusterName, + Selected: true, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if equalDecisions(tc.current, tc.desired) != tc.want { + t.Fatalf("equalDecisions(%v, %v) = %v, want %v", tc.current, tc.desired, !tc.want, tc.want) + } + }) + } +} + +// TestUpdatePolicySnapshotStatus tests the updatePolicySnapshotStatus method. +func TestUpdatePolicySnapshotStatus(t *testing.T) { + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + } + decisions := []fleetv1beta1.ClusterDecision{ + { + ClusterName: clusterName, + Selected: true, + }, + } + condition := fullyScheduledCondition(policy) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme.Scheme). + WithObjects(policy). + Build() + // Construct framework manually instead of using NewFramework() to avoid mocking the controller manager. + f := &framework{ + client: fakeClient, + } + + ctx := context.Background() + if err := f.updatePolicySnapshotStatus(ctx, policy, decisions, condition); err != nil { + t.Fatalf("updatePolicySnapshotStatus(%v, %v, %v) = %v, want no error", policy, decisions, condition, err) + } + + // Verify that the policy was updated. + updatedPolicy := &fleetv1beta1.ClusterPolicySnapshot{} + if err := fakeClient.Get(ctx, types.NamespacedName{Name: policy.Name}, updatedPolicy); err != nil { + t.Fatalf("clusterPolicySnapshot Get(%v) = %v, want no error", policy.Name, err) + } + + if !cmp.Equal(updatedPolicy.Status.ClusterDecisions, decisions) { + t.Errorf("cluster decisions, got %v, want %v", updatedPolicy.Status.ClusterDecisions, decisions) + } + + updatedCondition := meta.FindStatusCondition(updatedPolicy.Status.Conditions, string(fleetv1beta1.PolicySnapshotScheduled)) + if !cmp.Equal(updatedCondition, &condition, ignoredCondFields) { + t.Errorf("scheduled condition, got %v, want %v", updatedCondition, condition) + } +} diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/utils.go index f10a4d99c..44c61b45b 100644 --- a/pkg/scheduler/framework/utils.go +++ b/pkg/scheduler/framework/utils.go @@ -142,7 +142,7 @@ func shouldSchedule(policy *fleetv1beta1.ClusterPolicySnapshot, numOfClusters, e // equalDecisions returns if two arrays of ClusterDecisions are equal; it returns true if // every decision in one array is also present in the other array regardless of their indexes, // and vice versa. -func equalDecisons(current, desired []fleetv1beta1.ClusterDecision) bool { +func equalDecisions(current, desired []fleetv1beta1.ClusterDecision) bool { desiredDecisionByCluster := make(map[string]fleetv1beta1.ClusterDecision, len(desired)) for _, decision := range desired { desiredDecisionByCluster[decision.ClusterName] = decision From c55ac75b335e9c339097db6b6cad90eb674e2eb6 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Fri, 16 Jun 2023 00:20:16 +0800 Subject: [PATCH 14/15] Temp --- pkg/scheduler/framework/cyclestate.go | 10 + pkg/scheduler/framework/dummy_plugins_test.go | 87 ++++++++ pkg/scheduler/framework/framework.go | 195 +++++++++++++++++- pkg/scheduler/framework/framework_test.go | 11 + pkg/scheduler/framework/profile_test.go | 39 ---- pkg/scheduler/framework/status.go | 20 +- pkg/scheduler/framework/status_test.go | 6 +- 7 files changed, 319 insertions(+), 49 deletions(-) create mode 100644 pkg/scheduler/framework/dummy_plugins_test.go diff --git a/pkg/scheduler/framework/cyclestate.go b/pkg/scheduler/framework/cyclestate.go index 07a10a421..d0029fdd7 100644 --- a/pkg/scheduler/framework/cyclestate.go +++ b/pkg/scheduler/framework/cyclestate.go @@ -8,6 +8,8 @@ package framework import ( "fmt" "sync" + + "k8s.io/apimachinery/pkg/util/sets" ) // StateKey is the key for a state value stored in a CycleState. @@ -32,6 +34,14 @@ type CycleStatePluginReadWriter interface { type CycleState struct { // store is a concurrency-safe store (a map). store sync.Map + + // skippedFilterPlugins is a set of Filter plugins that should be skipped in the current scheduling cycle. + skippedFilterPlugins sets.String + // desiredBatchSize is the desired batch size for the current scheduling cycle. + desiredBatchSize int + // batchSizeLimit is the limit on batch size for the current scheduling cycle, set by + // post-batch plugins. + batchSizeLimit int } // Read retrieves a value from CycleState by a key. diff --git a/pkg/scheduler/framework/dummy_plugins_test.go b/pkg/scheduler/framework/dummy_plugins_test.go new file mode 100644 index 000000000..8579ddc3e --- /dev/null +++ b/pkg/scheduler/framework/dummy_plugins_test.go @@ -0,0 +1,87 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package framework + +import ( + "context" + "fmt" + + fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +const ( + // dummyPluginTriggerTag is a label on policy snapshots, which triggers specific plugins when set, + // to simulate different scenarios. + dummyPluginTriggerTag = "trigger-tag" +) + +// A no-op, dummy plugin which connects to all extension points. +type DummyAllPurposePlugin struct{} + +// Name returns the name of the dummy plugin. +func (p *DummyAllPurposePlugin) Name() string { + return dummyPluginName +} + +// PostBatch implements the PostBatch interface for the dummy plugin. +func (p *DummyAllPurposePlugin) PostBatch(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { //nolint:revive + return 1, nil +} + +// PreFilter implements the PreFilter interface for the dummy plugin. +func (p *DummyAllPurposePlugin) PreFilter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { //nolint:revive + return nil +} + +// Filter implements the Filter interface for the dummy plugin. +func (p *DummyAllPurposePlugin) Filter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { //nolint:revive + return nil +} + +// PreScore implements the PreScore interface for the dummy plugin. +func (p *DummyAllPurposePlugin) PreScore(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { //nolint:revive + return nil +} + +// Score implements the Score interface for the dummy plugin. +func (p *DummyAllPurposePlugin) Score(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (score *ClusterScore, status *Status) { //nolint:revive + return &ClusterScore{}, nil +} + +// SetUpWithFramework is a no-op to satisfy the Plugin interface. +func (p *DummyAllPurposePlugin) SetUpWithFramework(handle Handle) {} // nolint:revive + +// A dummy post batch plugin, which adjusts batch size limit by cluster names. +type dummyPostBatchPlugin struct{} + +// Name returns the name of the dummy plugin. +func (p *dummyPostBatchPlugin) Name() string { + return dummyPluginName +} + +// PostBatch implements the PostBatch interface for the dummy plugin. +// +// It returns a status in accordance with the tag added to the policy. +func (p *dummyPostBatchPlugin) PostBatch(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { //nolint:revive + tag, ok := policy.Labels[dummyPluginTriggerTag] + if !ok { + return 0, FromError(fmt.Errorf("no %s label found", dummyPluginTriggerTag), p.Name()) + } + + switch tag { + case "post-batch-success": + return 1, nil + case "post-batch-internal-error": + return 0, FromError(fmt.Errorf("internal error"), p.Name()) + case "post-batch-skip": + return 0, NewNonErrorStatus(Skip, p.Name()) + case "post-batch-cluster-unschedulable": + // For testing purposes only; normally a post batch plugin will never yield such a status. + return 0, NewNonErrorStatus(ClusterUnschedulable, p.Name()) + default: + return 1, nil + } +} diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 703ae0d51..7e72316ac 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -11,6 +11,7 @@ import ( "context" "fmt" "sort" + "sync/atomic" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -182,9 +183,7 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be // Note that clusters here are listed from the cached client for improved performance. This is // safe in consistency as it is guaranteed that the scheduler will receive all events for cluster // changes eventually. - // - // TO-DO (chenyu1): assign variable(s) when more logic is added. - _, err = f.collectClusters(ctx) + clusters, err := f.collectClusters(ctx) if err != nil { klog.ErrorS(err, errorMessage, klog.KObj(policy)) return ctrl.Result{}, err @@ -330,6 +329,75 @@ func (f *framework) RunSchedulingCycleFor(ctx context.Context, policy *fleetv1be } } + // Enter the actual scheduling stages. + + // Prepare the cycle state for this run. + // + // Note that this state is shared between all plugins and the scheduler framework itself (though some fields are reserved by + // the framework). These resevered fields are never accessed concurrently, as each scheduling run has its own cycle and a run + // is always executed in one single goroutine; plugin access to the state is guarded by sync.Map. + state := NewCycleState() + + // Calculate the batch size. + state.desiredBatchSize = int(*policy.Spec.Policy.NumberOfClusters) - len(active) - len(deletedWithDispatcherFinalizer) + + // The earlier check guarantees that the desired batch size is always positive; however, the scheduler still + // performs a sanity check here; normally this branch will never run. + if state.desiredBatchSize <= 0 { + err = fmt.Errorf("desired batch size is below zero: %d", state.desiredBatchSize) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, controller.NewUnexpectedBehaviorError(err) + } + + // Run pre-batch plugins. + // + // These plugins each yields a batch size limit; the minimum of these limits is used as the actual batch size for + // this scheduling cycle. + // + // Note that any failure would lead to the cancellation of the scheduling cycle. + batchSizeLimit, status := f.runPostBatchPlugins(ctx, state, policy) + if status.IsInteralError() { + klog.ErrorS(status.AsError(), errorMessage, klog.KObj(policy)) + return ctrl.Result{}, controller.NewUnexpectedBehaviorError(status.AsError()) + } + + // A sanity check; normally this branch will never run, as runPostBatchPlugins guarantees that + // the batch size limit is never greater than the desired batch size. + if batchSizeLimit > state.desiredBatchSize { + err := fmt.Errorf("batch size limit is greater than desired batch size: %d > %d", batchSizeLimit, state.desiredBatchSize) + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, controller.NewUnexpectedBehaviorError(err) + } + state.batchSizeLimit = batchSizeLimit + + // Run pre-filter plugins. + // + // Each plugin can: + // * set up some common state for future calls (on different extensions points) in the scheduling cycle; and/or + // * check if it needs to run the the Filter stage. + // Any plugin that would like to be skipped is listed in the cycle state for future reference. + // + // Note that any failure would lead to the cancellation of the scheduling cycle. + if status := f.runPreFilterPlugins(ctx, state, policy); status.IsInteralError() { + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, controller.NewUnexpectedBehaviorError(status.AsError()) + } + + // Run filter plugins. + // + // The scheduler checks each cluster candidate by calling the chain of filter plugins; if any plugin suggests + // that the cluster should not be bound, the cluster is ignored for the rest of the cycle. Note that clusters + // are inspected in parallel. + // + // Note that any failure would lead to the cancellation of the scheduling cycle. + // + // TO-DO (chenyu1): assign variables when the implementation is ready. + _, _, err = f.runFilterPlugins(ctx, state, policy, clusters) + if err != nil { + klog.ErrorS(err, errorMessage, klog.KObj(policy)) + return ctrl.Result{}, controller.NewUnexpectedBehaviorError(err) + } + // Not yet fully implemented. return ctrl.Result{}, nil } @@ -430,3 +498,124 @@ func (f *framework) updatePolicySnapshotStatus(ctx context.Context, policy *flee } return nil } + +// runPostBatchPlugins runs all post batch plugins sequentially. +func (f *framework) runPostBatchPlugins(ctx context.Context, state *CycleState, policy *fleetv1beta1.ClusterPolicySnapshot) (int, *Status) { + minBatchSizeLimit := state.desiredBatchSize + for _, pl := range f.profile.postBatchPlugins { + batchSizeLimit, status := pl.PostBatch(ctx, state, policy) + switch { + case status.IsSuccess(): + if batchSizeLimit < minBatchSizeLimit { + minBatchSizeLimit = batchSizeLimit + } + case status.IsInteralError(): + return 0, status + case status.IsSkip(): // Do nothing. + default: + // Any status that is not Success, InternalError, or Skip is considered an error. + return 0, FromError(fmt.Errorf("postbatch plugin returned an unsupported status: %s", status), pl.Name()) + } + } + + return minBatchSizeLimit, nil +} + +// runPreFilterPlugins runs all pre filter plugins sequentially. +func (f *framework) runPreFilterPlugins(ctx context.Context, state *CycleState, policy *fleetv1beta1.ClusterPolicySnapshot) *Status { + for _, pl := range f.profile.preFilterPlugins { + status := pl.PreFilter(ctx, state, policy) + switch { + case status.IsSuccess(): // Do nothing. + case status.IsInteralError(): + return status + case status.IsSkip(): + state.skippedFilterPlugins.Insert(pl.Name()) + default: + // Any status that is not Success, InternalError, or Skip is considered an error. + return FromError(fmt.Errorf("prefilter plugin returned an unknown status %s", status), pl.Name()) + } + } + + return nil +} + +// runFilterPluginsFor runs filter plugins for a signle cluster. +func (f *framework) runFilterPluginsFor(ctx context.Context, state *CycleState, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) *Status { + for _, pl := range f.profile.filterPlugins { + // Skip the plugin if it is not needed. + if state.skippedFilterPlugins.Has(pl.Name()) { + continue + } + status := pl.Filter(ctx, state, policy, cluster) + switch { + case status.IsSuccess(): // Do nothing. + case status.IsInteralError(): + return status + case status.IsClusterUnschedulable(): + return status + default: + // Any status that is not Success, InternalError, or ClusterUnschedulable is considered an error. + return FromError(fmt.Errorf("filter plugin returned an unknown status %s", status), pl.Name()) + } + } + + return nil +} + +// filteredClusterWithStatus is struct that documents clusters filtered out at the Filter stage, +// along with a plugin status, which documents why a cluster is filtered out. +// +// This struct is used for the purpose of keeping reasons for returning scheduling decision to +// the user. +type filteredClusterWithStatus struct { + cluster *fleetv1beta1.MemberCluster + status *Status +} + +// runFilterPlugins runs filter plugins on clusters in parallel. +func (f *framework) runFilterPlugins(ctx context.Context, state *CycleState, policy *fleetv1beta1.ClusterPolicySnapshot, clusters []fleetv1beta1.MemberCluster) (passed []*fleetv1beta1.MemberCluster, filtered []*filteredClusterWithStatus, err error) { + // Create a child context. + childCtx, cancel := context.WithCancel(ctx) + + // Pre-allocate slices to avoid races. + passed = make([]*fleetv1beta1.MemberCluster, 0, len(clusters)) + var passedIdx int32 = -1 + filtered = make([]*filteredClusterWithStatus, 0, len(clusters)) + var filteredIdx int32 = -1 + + errFlag := parallelizer.NewErrorFlag() + + doWork := func(pieces int) { + cluster := clusters[pieces] + status := f.runFilterPluginsFor(childCtx, state, policy, &cluster) + switch { + case status.IsSuccess(): + // Use atomic add to avoid races. + newPassedIdx := atomic.AddInt32(&passedIdx, 1) + passed[newPassedIdx] = &cluster + case status.IsClusterUnschedulable(): + // Use atomic add to avoid races. + newFilteredIdx := atomic.AddInt32(&filteredIdx, 1) + filtered[newFilteredIdx] = &filteredClusterWithStatus{ + cluster: &cluster, + status: status, + } + default: // An error has occurred. + errFlag.Raise(status.AsError()) + // Cancel the child context, which will lead the parallelizer to stop running tasks. + cancel() + } + } + + // Run inspection in paralle. + // + // Note that the parallel run will be stopped immediately upon encounter of the first error. + f.parallelizer.ParallelizeUntil(childCtx, len(clusters), doWork, "runFilterPlugins") + // Retrieve the first error from the error flag. + if err := errFlag.Lower(); err != nil { + return nil, nil, err + } + + return passed, filtered, nil +} diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index cde4260c5..051c1ad35 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -778,3 +778,14 @@ func TestUpdatePolicySnapshotStatus(t *testing.T) { t.Errorf("scheduled condition, got %v, want %v", updatedCondition, condition) } } + +// Below are a few dummy post batch plugins for testing. +type dummyPostBatchPlugin struct{} + +func (p *dummyPostBatchPlugin) Name() { + return dummyPluginName +} + +func TestRunPostBatchPlugins(t *testing.T) { + +} diff --git a/pkg/scheduler/framework/profile_test.go b/pkg/scheduler/framework/profile_test.go index 3d82b681e..d750c118e 100644 --- a/pkg/scheduler/framework/profile_test.go +++ b/pkg/scheduler/framework/profile_test.go @@ -6,12 +6,9 @@ Licensed under the MIT license. package framework import ( - "context" "testing" "github.com/google/go-cmp/cmp" - - fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) const ( @@ -19,42 +16,6 @@ const ( dummyPluginName = "dummyAllPurposePlugin" ) -// A no-op, dummy plugin which connects to all extension points. -type DummyAllPurposePlugin struct{} - -// Name returns the name of the dummy plugin. -func (p *DummyAllPurposePlugin) Name() string { - return dummyPluginName -} - -// PostBatch implements the PostBatch interface for the dummy plugin. -func (p *DummyAllPurposePlugin) PostBatch(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { //nolint:revive - return 1, nil -} - -// PreFilter implements the PreFilter interface for the dummy plugin. -func (p *DummyAllPurposePlugin) PreFilter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { //nolint:revive - return nil -} - -// Filter implements the Filter interface for the dummy plugin. -func (p *DummyAllPurposePlugin) Filter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { //nolint:revive - return nil -} - -// PreScore implements the PreScore interface for the dummy plugin. -func (p *DummyAllPurposePlugin) PreScore(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { //nolint:revive - return nil -} - -// Score implements the Score interface for the dummy plugin. -func (p *DummyAllPurposePlugin) Score(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (score *ClusterScore, status *Status) { //nolint:revive - return &ClusterScore{}, nil -} - -// SetUpWithFramework is a no-op to satisfy the Plugin interface. -func (p *DummyAllPurposePlugin) SetUpWithFramework(handle Handle) {} // nolint:revive - // TestProfile tests the basic ops of a Profile. func TestProfile(t *testing.T) { profile := NewProfile(dummyProfileName) diff --git a/pkg/scheduler/framework/status.go b/pkg/scheduler/framework/status.go index 5e63d67a9..7327b8f3f 100644 --- a/pkg/scheduler/framework/status.go +++ b/pkg/scheduler/framework/status.go @@ -5,7 +5,10 @@ Licensed under the MIT license. package framework -import "strings" +import ( + "fmt" + "strings" +) // StatusCode is the status code of a Status, returned by a plugin. type StatusCode int @@ -30,7 +33,7 @@ const ( Skip ) -var statusCodeNames = []string{"Success", "InternalError", "ClusterUnschedulable", "Skip"} +var statusCodeNames = []string{"Success", "InternalError", "ClusterUnschedulable", "PreSkip"} // Name returns the name of a status code. func (sc StatusCode) Name() string { @@ -69,8 +72,8 @@ func (s *Status) IsInteralError() bool { return s.code() == internalError } -// IsPreSkip returns if a Status is of the status code Skip. -func (s *Status) IsPreSkip() bool { +// IsSkip returns if a Status is of the status code Skip. +func (s *Status) IsSkip() bool { return s.code() == Skip } @@ -116,6 +119,15 @@ func (s *Status) String() string { return strings.Join(desc, ", ") } +// AsError returns a status as an error; it returns nil if the status is of the internalError code. +func (s *Status) AsError() error { + if !s.IsInteralError() { + return nil + } + + return fmt.Errorf("plugin %s returned an error %s", s.sourcePlugin, s.String()) +} + // NewNonErrorStatus returns a Status with a non-error status code. // To return a Status of the internalError status code, use FromError() instead. func NewNonErrorStatus(code StatusCode, sourcePlugin string, reasons ...string) *Status { diff --git a/pkg/scheduler/framework/status_test.go b/pkg/scheduler/framework/status_test.go index 8e50d6f20..8aca9cb1c 100644 --- a/pkg/scheduler/framework/status_test.go +++ b/pkg/scheduler/framework/status_test.go @@ -51,7 +51,7 @@ func TestNonNilStatusMethods(t *testing.T) { sourcePlugin: dummyPlugin, }, { - name: "status preskip", + name: "status skip", statusCode: Skip, reasons: dummyReasons, sourcePlugin: dummyPlugin, @@ -73,7 +73,7 @@ func TestNonNilStatusMethods(t *testing.T) { status.IsSuccess, status.IsInteralError, status.IsClusterUnschedulable, - status.IsPreSkip, + status.IsSkip, } for idx, checkFunc := range checkFuncs { if wantCheckOutputs[idx] != checkFunc() { @@ -114,7 +114,7 @@ func TestNilStatusMethods(t *testing.T) { status.IsSuccess, status.IsInteralError, status.IsClusterUnschedulable, - status.IsPreSkip, + status.IsSkip, } for idx, checkFunc := range checkFuncs { if wantCheckOutputs[idx] != checkFunc() { From edd98b264cc7656aafccc0029858d3c42d6deb8f Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Mon, 19 Jun 2023 19:33:15 +0800 Subject: [PATCH 15/15] Added more scheduler logic --- pkg/scheduler/framework/cyclestate.go | 5 +- ...y_plugins_test.go => dummyplugins_test.go} | 81 ++- pkg/scheduler/framework/framework.go | 14 +- pkg/scheduler/framework/framework_test.go | 503 +++++++++++++++++- .../framework/{utils.go => frameworkutils.go} | 0 pkg/scheduler/framework/profile_test.go | 3 +- 6 files changed, 564 insertions(+), 42 deletions(-) rename pkg/scheduler/framework/{dummy_plugins_test.go => dummyplugins_test.go} (50%) rename pkg/scheduler/framework/{utils.go => frameworkutils.go} (100%) diff --git a/pkg/scheduler/framework/cyclestate.go b/pkg/scheduler/framework/cyclestate.go index d0029fdd7..b318f568f 100644 --- a/pkg/scheduler/framework/cyclestate.go +++ b/pkg/scheduler/framework/cyclestate.go @@ -64,5 +64,8 @@ func (c *CycleState) Delete(key StateKey) { // NewCycleState creates a CycleState. func NewCycleState() *CycleState { - return &CycleState{} + return &CycleState{ + store: sync.Map{}, + skippedFilterPlugins: sets.NewString(), + } } diff --git a/pkg/scheduler/framework/dummy_plugins_test.go b/pkg/scheduler/framework/dummyplugins_test.go similarity index 50% rename from pkg/scheduler/framework/dummy_plugins_test.go rename to pkg/scheduler/framework/dummyplugins_test.go index 8579ddc3e..0d48acc97 100644 --- a/pkg/scheduler/framework/dummy_plugins_test.go +++ b/pkg/scheduler/framework/dummyplugins_test.go @@ -7,15 +7,15 @@ package framework import ( "context" - "fmt" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) const ( - // dummyPluginTriggerTag is a label on policy snapshots, which triggers specific plugins when set, - // to simulate different scenarios. - dummyPluginTriggerTag = "trigger-tag" + dummyAllPurposePluginName = "dummyAllPurposePlugin" + dummyPostBatchPluginNameFormat = "dummyPostBatchPlugin-%d" + dummyPreFilterPluginNameFormat = "dummyPreFilterPlugin-%d" + dummyFilterPluginNameFormat = "dummyFilterPlugin-%d" ) // A no-op, dummy plugin which connects to all extension points. @@ -23,7 +23,7 @@ type DummyAllPurposePlugin struct{} // Name returns the name of the dummy plugin. func (p *DummyAllPurposePlugin) Name() string { - return dummyPluginName + return dummyAllPurposePluginName } // PostBatch implements the PostBatch interface for the dummy plugin. @@ -54,34 +54,59 @@ func (p *DummyAllPurposePlugin) Score(ctx context.Context, state CycleStatePlugi // SetUpWithFramework is a no-op to satisfy the Plugin interface. func (p *DummyAllPurposePlugin) SetUpWithFramework(handle Handle) {} // nolint:revive -// A dummy post batch plugin, which adjusts batch size limit by cluster names. -type dummyPostBatchPlugin struct{} +// A dummy post batch plugin. +type dummyPostBatchPlugin struct { + name string + runner func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) +} // Name returns the name of the dummy plugin. func (p *dummyPostBatchPlugin) Name() string { - return dummyPluginName + return p.name } // PostBatch implements the PostBatch interface for the dummy plugin. -// -// It returns a status in accordance with the tag added to the policy. func (p *dummyPostBatchPlugin) PostBatch(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { //nolint:revive - tag, ok := policy.Labels[dummyPluginTriggerTag] - if !ok { - return 0, FromError(fmt.Errorf("no %s label found", dummyPluginTriggerTag), p.Name()) - } - - switch tag { - case "post-batch-success": - return 1, nil - case "post-batch-internal-error": - return 0, FromError(fmt.Errorf("internal error"), p.Name()) - case "post-batch-skip": - return 0, NewNonErrorStatus(Skip, p.Name()) - case "post-batch-cluster-unschedulable": - // For testing purposes only; normally a post batch plugin will never yield such a status. - return 0, NewNonErrorStatus(ClusterUnschedulable, p.Name()) - default: - return 1, nil - } + return p.runner(ctx, state, policy) +} + +// SetUpWithFramework is a no-op to satisfy the Plugin interface. +func (p *dummyPostBatchPlugin) SetUpWithFramework(handle Handle) {} // nolint:revive + +// A dummy pre-filter plugin. +type dummyPreFilterPlugin struct { + name string + runner func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) +} + +// Name returns the name of the dummy plugin. +func (p *dummyPreFilterPlugin) Name() string { + return p.name +} + +// PreFilter implements the PreFilter interface for the dummy plugin. +func (p *dummyPreFilterPlugin) PreFilter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { //nolint:revive + return p.runner(ctx, state, policy) } + +// SetUpWithFramework is a no-op to satisfy the Plugin interface. +func (p *dummyPreFilterPlugin) SetUpWithFramework(handle Handle) {} // nolint:revive + +// A dummy filter plugin. +type dummyFilterPlugin struct { + name string + runner func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) +} + +// Name returns the name of the dummy plugin. +func (d *dummyFilterPlugin) Name() string { + return d.name +} + +// Filter implements the Filter interface for the dummy plugin. +func (d *dummyFilterPlugin) Filter(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { //nolint:revive + return d.runner(ctx, state, policy, cluster) +} + +// SetUpWithFramework is a no-op to satisfy the Plugin interface. +func (p *dummyFilterPlugin) SetUpWithFramework(handle Handle) {} // nolint: revive diff --git a/pkg/scheduler/framework/framework.go b/pkg/scheduler/framework/framework.go index 7e72316ac..437a91f85 100644 --- a/pkg/scheduler/framework/framework.go +++ b/pkg/scheduler/framework/framework.go @@ -540,7 +540,7 @@ func (f *framework) runPreFilterPlugins(ctx context.Context, state *CycleState, return nil } -// runFilterPluginsFor runs filter plugins for a signle cluster. +// runFilterPluginsFor runs filter plugins for a single cluster. func (f *framework) runFilterPluginsFor(ctx context.Context, state *CycleState, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) *Status { for _, pl := range f.profile.filterPlugins { // Skip the plugin if it is not needed. @@ -579,9 +579,9 @@ func (f *framework) runFilterPlugins(ctx context.Context, state *CycleState, pol childCtx, cancel := context.WithCancel(ctx) // Pre-allocate slices to avoid races. - passed = make([]*fleetv1beta1.MemberCluster, 0, len(clusters)) + passed = make([]*fleetv1beta1.MemberCluster, len(clusters)) var passedIdx int32 = -1 - filtered = make([]*filteredClusterWithStatus, 0, len(clusters)) + filtered = make([]*filteredClusterWithStatus, len(clusters)) var filteredIdx int32 = -1 errFlag := parallelizer.NewErrorFlag() @@ -591,11 +591,11 @@ func (f *framework) runFilterPlugins(ctx context.Context, state *CycleState, pol status := f.runFilterPluginsFor(childCtx, state, policy, &cluster) switch { case status.IsSuccess(): - // Use atomic add to avoid races. + // Use atomic add to avoid races with minimum overhead. newPassedIdx := atomic.AddInt32(&passedIdx, 1) passed[newPassedIdx] = &cluster case status.IsClusterUnschedulable(): - // Use atomic add to avoid races. + // Use atomic add to avoid races with minimum overhead. newFilteredIdx := atomic.AddInt32(&filteredIdx, 1) filtered[newFilteredIdx] = &filteredClusterWithStatus{ cluster: &cluster, @@ -617,5 +617,9 @@ func (f *framework) runFilterPlugins(ctx context.Context, state *CycleState, pol return nil, nil, err } + // Trim the slices to the actual size. + passed = passed[:passedIdx+1] + filtered = filtered[:filteredIdx+1] + return passed, filtered, nil } diff --git a/pkg/scheduler/framework/framework_test.go b/pkg/scheduler/framework/framework_test.go index 051c1ad35..e4b5fd64f 100644 --- a/pkg/scheduler/framework/framework_test.go +++ b/pkg/scheduler/framework/framework_test.go @@ -7,6 +7,7 @@ package framework import ( "context" + "fmt" "log" "os" "sort" @@ -23,6 +24,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" fleetv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/scheduler/framework/parallelizer" "go.goms.io/fleet/pkg/utils" ) @@ -36,7 +38,8 @@ const ( ) var ( - ignoredCondFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") + ignoredCondFields = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") + ignoredStatusFields = cmpopts.IgnoreFields(Status{}, "reasons", "err") ) // TO-DO (chenyu1): expand the test cases as development stablizes. @@ -779,13 +782,501 @@ func TestUpdatePolicySnapshotStatus(t *testing.T) { } } -// Below are a few dummy post batch plugins for testing. -type dummyPostBatchPlugin struct{} +// TestRunPostBatchPlugins tests the runPostBatchPlugins method. +func TestRunPostBatchPlugins(t *testing.T) { + dummyPostBatchPluginNameA := fmt.Sprintf(dummyPostBatchPluginNameFormat, 0) + dummyPostBatchPluginNameB := fmt.Sprintf(dummyPostBatchPluginNameFormat, 1) + + testCases := []struct { + name string + postBatchPlugins []PostBatchPlugin + desiredBatchSize int + wantBatchLimit int + wantStatus *Status + }{ + { + name: "single plugin, success", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 1, nil + }, + }, + }, + desiredBatchSize: 10, + wantBatchLimit: 1, + }, + { + name: "single plugin, success, oversized", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 15, nil + }, + }, + }, + desiredBatchSize: 10, + wantBatchLimit: 10, + }, + { + name: "multiple plugins, all success", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 2, nil + }, + }, + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 1, nil + }, + }, + }, + desiredBatchSize: 10, + wantBatchLimit: 1, + }, + { + name: "multple plugins, one success, one error", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 0, FromError(fmt.Errorf("internal error"), dummyPostBatchPluginNameA) + }, + }, + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 1, nil + }, + }, + }, + desiredBatchSize: 10, + wantStatus: FromError(fmt.Errorf("internal error"), dummyPostBatchPluginNameA), + }, + { + name: "single plugin, skip", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 0, NewNonErrorStatus(Skip, dummyPostBatchPluginNameA) + }, + }, + }, + desiredBatchSize: 10, + wantBatchLimit: 10, + }, + { + name: "single plugin, unschedulable", + postBatchPlugins: []PostBatchPlugin{ + &dummyPostBatchPlugin{ + name: dummyPostBatchPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (size int, status *Status) { + return 1, NewNonErrorStatus(ClusterUnschedulable, dummyPostBatchPluginNameA) + }, + }, + }, + desiredBatchSize: 10, + wantStatus: FromError(fmt.Errorf("internal error"), dummyPostBatchPluginNameA), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + profile := NewProfile(dummyProfileName) + for _, p := range tc.postBatchPlugins { + profile.WithPostBatchPlugin(p) + } + f := &framework{ + profile: profile, + } -func (p *dummyPostBatchPlugin) Name() { - return dummyPluginName + ctx := context.Background() + state := NewCycleState() + state.desiredBatchSize = tc.desiredBatchSize + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + } + batchLimit, status := f.runPostBatchPlugins(ctx, state, policy) + if batchLimit != tc.wantBatchLimit || !cmp.Equal(status, tc.wantStatus, cmpopts.IgnoreUnexported(Status{}), ignoredStatusFields) { + t.Errorf("runPostBatchPlugins(%v, %v) = %v %v, want %v, %v", state, policy, batchLimit, status, tc.wantBatchLimit, tc.wantStatus) + } + }) + } } -func TestRunPostBatchPlugins(t *testing.T) { +// TestRunPreFilterPlugins tests the runPreFilterPlugins method. +func TestRunPreFilterPlugins(t *testing.T) { + dummyPreFilterPluginNameA := fmt.Sprintf(dummyPreFilterPluginNameFormat, 0) + dummyPreFilterPluginNameB := fmt.Sprintf(dummyPreFilterPluginNameFormat, 1) + + testCases := []struct { + name string + preFilterPlugins []PreFilterPlugin + wantSkippedPluginNames []string + wantStatus *Status + }{ + { + name: "single plugin, success", + preFilterPlugins: []PreFilterPlugin{ + &dummyPreFilterPlugin{ + name: dummyPreFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) *Status { + return nil + }, + }, + }, + }, + { + name: "multiple plugins, one success, one skip", + preFilterPlugins: []PreFilterPlugin{ + &dummyPreFilterPlugin{ + name: dummyPreFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { + return nil + }, + }, + &dummyPreFilterPlugin{ + name: dummyPreFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) (status *Status) { + return NewNonErrorStatus(Skip, dummyPreFilterPluginNameB) + }, + }, + }, + wantSkippedPluginNames: []string{dummyPreFilterPluginNameB}, + }, + { + name: "single plugin, internal error", + preFilterPlugins: []PreFilterPlugin{ + &dummyPreFilterPlugin{ + name: dummyPreFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) *Status { + return FromError(fmt.Errorf("internal error"), dummyPreFilterPluginNameA) + }, + }, + }, + wantStatus: FromError(fmt.Errorf("internal error"), dummyPreFilterPluginNameA), + }, + { + name: "single plugin, unschedulable", + preFilterPlugins: []PreFilterPlugin{ + &dummyPreFilterPlugin{ + name: dummyPreFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot) *Status { + return NewNonErrorStatus(ClusterUnschedulable, dummyPreFilterPluginNameA) + }, + }, + }, + wantStatus: FromError(fmt.Errorf("cluster is unschedulable"), dummyPreFilterPluginNameA), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + profile := NewProfile(dummyProfileName) + for _, p := range tc.preFilterPlugins { + profile.WithPreFilterPlugin(p) + } + f := &framework{ + profile: profile, + } + + ctx := context.Background() + state := NewCycleState() + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + } + + status := f.runPreFilterPlugins(ctx, state, policy) + if !cmp.Equal(status, tc.wantStatus, cmp.AllowUnexported(Status{}), ignoredStatusFields) { + t.Errorf("runPreFilterPlugins(%v, %v) = %v, want %v", state, policy, status, tc.wantStatus) + } + }) + } +} + +// TestRunFilterPluginsFor tests the runFilterPluginsFor method. +func TestRunFilterPluginsFor(t *testing.T) { + dummyFilterPluginNameA := fmt.Sprintf(dummyFilterPluginNameFormat, 0) + dummyFilterPluginNameB := fmt.Sprintf(dummyFilterPluginNameFormat, 1) + + testCases := []struct { + name string + filterPlugins []FilterPlugin + skippedPluginNames []string + wantStatus *Status + }{ + { + name: "single plugin, success", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + }, + }, + { + name: "multiple plugins, one success, one skipped", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + &dummyFilterPlugin{ + name: dummyFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return NewNonErrorStatus(ClusterUnschedulable, dummyFilterPluginNameB) + }, + }, + }, + skippedPluginNames: []string{dummyFilterPluginNameB}, + }, + { + name: "single plugin, internal error", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return FromError(fmt.Errorf("internal error"), dummyFilterPluginNameA) + }, + }, + }, + wantStatus: FromError(fmt.Errorf("internal error"), dummyFilterPluginNameA), + }, + { + name: "multiple plugins, one unschedulable, one success", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return NewNonErrorStatus(ClusterUnschedulable, dummyFilterPluginNameA) + }, + }, + &dummyFilterPlugin{ + name: dummyFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + }, + wantStatus: NewNonErrorStatus(ClusterUnschedulable, dummyFilterPluginNameA), + }, + { + name: "single plugin, skip", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return NewNonErrorStatus(Skip, dummyFilterPluginNameA) + }, + }, + }, + wantStatus: FromError(fmt.Errorf("internal error"), dummyFilterPluginNameA), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + profile := NewProfile(dummyProfileName) + for _, p := range tc.filterPlugins { + profile.WithFilterPlugin(p) + } + f := &framework{ + profile: profile, + } + + ctx := context.Background() + state := NewCycleState() + for _, name := range tc.skippedPluginNames { + state.skippedFilterPlugins.Insert(name) + } + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + } + cluster := &fleetv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + } + + status := f.runFilterPluginsFor(ctx, state, policy, cluster) + if !cmp.Equal(status, tc.wantStatus, cmpopts.IgnoreUnexported(Status{}), ignoredStatusFields) { + t.Errorf("runFilterPluginsFor(%v, %v, %v) = %v, want %v", state, policy, cluster, status, tc.wantStatus) + } + }) + } +} + +// TestRunFilterPlugins tests the runFilterPlugins method. +func TestRunFilterPlugins(t *testing.T) { + dummyFilterPluginNameA := fmt.Sprintf(dummyFilterPluginNameFormat, 0) + dummyFilterPluginNameB := fmt.Sprintf(dummyFilterPluginNameFormat, 1) + + anotherClusterName := "singingbutterfly" + clusters := []fleetv1beta1.MemberCluster{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: altClusterName, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: anotherClusterName, + }, + }, + } + testCases := []struct { + name string + filterPlugins []FilterPlugin + wantPassedClusterNames []string + wantFilteredClusterNames []string + expectedToFail bool + }{ + { + name: "three clusters, two filter plugins, all passed", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + &dummyFilterPlugin{ + name: dummyFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + }, + wantPassedClusterNames: []string{clusterName, altClusterName, anotherClusterName}, + }, + { + name: "three clusters, two filter plugins, two filtered", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + if cluster.Name == clusterName { + return NewNonErrorStatus(ClusterUnschedulable, dummyFilterPluginNameA) + } + return nil + }, + }, + &dummyFilterPlugin{ + name: dummyFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + if cluster.Name == anotherClusterName { + return NewNonErrorStatus(ClusterUnschedulable, dummyFilterPluginNameB) + } + return nil + }, + }, + }, + wantPassedClusterNames: []string{altClusterName}, + wantFilteredClusterNames: []string{clusterName, anotherClusterName}, + }, + { + name: "three clusters, internal error", + filterPlugins: []FilterPlugin{ + &dummyFilterPlugin{ + name: dummyFilterPluginNameA, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + return nil + }, + }, + &dummyFilterPlugin{ + name: dummyFilterPluginNameB, + runner: func(ctx context.Context, state CycleStatePluginReadWriter, policy *fleetv1beta1.ClusterPolicySnapshot, cluster *fleetv1beta1.MemberCluster) (status *Status) { + if cluster.Name == anotherClusterName { + return FromError(fmt.Errorf("internal error"), dummyFilterPluginNameB) + } + return nil + }, + }, + }, + expectedToFail: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + profile := NewProfile(dummyProfileName) + for _, p := range tc.filterPlugins { + profile.WithFilterPlugin(p) + } + f := &framework{ + profile: profile, + parallelizer: parallelizer.NewParallelizer(parallelizer.DefaultNumOfWorkers), + } + + ctx := context.Background() + state := NewCycleState() + policy := &fleetv1beta1.ClusterPolicySnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: policyName, + }, + } + + passed, filtered, err := f.runFilterPlugins(ctx, state, policy, clusters) + if tc.expectedToFail { + if err == nil { + t.Errorf("runFilterPlugins(%v, %v, %v) = %v %v %v, want error", state, policy, clusters, passed, filtered, err) + } + return + } + + // The method runs in parallel; as a result the order cannot be guaranteed. + // Organize the results into maps for easier comparison. + passedMap := make(map[string]bool) + for _, cluster := range passed { + passedMap[cluster.Name] = true + } + wantPassedMap := make(map[string]bool) + for _, name := range tc.wantPassedClusterNames { + wantPassedMap[name] = true + } + + if !cmp.Equal(passedMap, wantPassedMap) { + t.Errorf("passed clusters, got %v, want %v", passedMap, wantPassedMap) + } + + filteredMap := make(map[string]bool) + for _, item := range filtered { + filteredMap[item.cluster.Name] = true + // As a sanity check, verify if all status are of the ClusterUnschedulable status code. + if !item.status.IsClusterUnschedulable() { + t.Errorf("filtered cluster %s status, got %v, want status code ClusterUnschedulable", item.cluster.Name, item.status) + } + } + wantFilteredMap := make(map[string]bool) + for _, name := range tc.wantFilteredClusterNames { + wantFilteredMap[name] = true + } + + if !cmp.Equal(filteredMap, wantFilteredMap) { + t.Errorf("filtered clusters, got %v, want %v", filteredMap, wantFilteredMap) + } + }) + } } diff --git a/pkg/scheduler/framework/utils.go b/pkg/scheduler/framework/frameworkutils.go similarity index 100% rename from pkg/scheduler/framework/utils.go rename to pkg/scheduler/framework/frameworkutils.go diff --git a/pkg/scheduler/framework/profile_test.go b/pkg/scheduler/framework/profile_test.go index d750c118e..e34dff894 100644 --- a/pkg/scheduler/framework/profile_test.go +++ b/pkg/scheduler/framework/profile_test.go @@ -13,7 +13,6 @@ import ( const ( dummyProfileName = "dummyProfile" - dummyPluginName = "dummyAllPurposePlugin" ) // TestProfile tests the basic ops of a Profile. @@ -37,7 +36,7 @@ func TestProfile(t *testing.T) { preScorePlugins: []PreScorePlugin{dummyAllPurposePlugin}, scorePlugins: []ScorePlugin{dummyAllPurposePlugin}, registeredPlugins: map[string]Plugin{ - dummyPluginName: dummyPlugin, + dummyAllPurposePluginName: dummyPlugin, }, }