From 93f3b1b46115358d9c7c8f4e929e629c51af2b05 Mon Sep 17 00:00:00 2001 From: Vijay Sankar Ganesh Date: Tue, 28 Apr 2026 17:01:29 -0700 Subject: [PATCH] BYOR OME changes --- pkg/constants/constants.go | 1 + .../deployment/deployment_reconciler.go | 15 +- .../reconcilers/knative/ksvc_reconciler.go | 8 +- .../reconcilers/lws/lws_reconciler.go | 19 +- .../multinodeprober_reconciler.go | 5 +- .../reconcilers/multinodevllm/ray.go | 6 +- .../inferenceservice/utils/annotations.go | 19 +- .../utils/annotations_test.go | 304 ++---------------- 8 files changed, 78 insertions(+), 299 deletions(-) diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 171fc858..6e5949c3 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -497,6 +497,7 @@ const ( KueueQueueLabelKey = "kueue.x-k8s.io/queue-name" KueueWorkloadPriorityClassLabelKey = "kueue.x-k8s.io/priority-class" KueueEnabledLabelKey = "kueue-enabled" + DACQueueNameLabelKey = "ome.io/dac-queue-name" ) // Model Agent & Model Controller diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go index 45c03474..8e4248bc 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go @@ -38,18 +38,27 @@ func NewDeploymentReconciler(client kclient.Client, return &DeploymentReconciler{ client: client, scheme: scheme, - Deployment: createRawDeployment(componentMeta, componentExt, podSpec), + Deployment: createRawDeploymentWithClient(client, componentMeta, componentExt, podSpec), componentExt: componentExt, } } -func createRawDeployment(componentMeta metav1.ObjectMeta, +func createRawDeployment( + componentMeta metav1.ObjectMeta, + componentExt *v1beta1.ComponentExtensionSpec, + podSpec *corev1.PodSpec) *appsv1.Deployment { + return createRawDeploymentWithClient(nil, componentMeta, componentExt, podSpec) +} + +func createRawDeploymentWithClient( + cl kclient.Client, + componentMeta metav1.ObjectMeta, componentExt *v1beta1.ComponentExtensionSpec, podSpec *corev1.PodSpec) *appsv1.Deployment { podMetadata := componentMeta podMetadata.Labels["app"] = constants.TruncateNameWithMaxLength(componentMeta.Name, 63) - utils.SetPodLabelsFromAnnotations(&podMetadata) + utils.SetPodLabelsFromAnnotationsWithClient(cl, &podMetadata) setDefaultPodSpec(podSpec) deployment := &appsv1.Deployment{ diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/knative/ksvc_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/knative/ksvc_reconciler.go index 546156c0..73c376f6 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/knative/ksvc_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/knative/ksvc_reconciler.go @@ -51,13 +51,15 @@ func NewKsvcReconciler(client client.Client, return &KsvcReconciler{ client: client, scheme: scheme, - Service: createKnativeService(componentMeta, componentExt, podSpec, componentStatus), + Service: createKnativeService(client, componentMeta, componentExt, podSpec, componentStatus), componentExt: componentExt, componentStatus: componentStatus, } } -func createKnativeService(componentMeta metav1.ObjectMeta, +func createKnativeService( + cl client.Client, + componentMeta metav1.ObjectMeta, componentExtension *v1beta1.ComponentExtensionSpec, podSpec *corev1.PodSpec, componentStatus v1beta1.ComponentStatusSpec) *knservingv1.Service { @@ -138,7 +140,7 @@ func createKnativeService(componentMeta metav1.ObjectMeta, return !utils.Includes(constants.RevisionTemplateLabelDisallowedList, key) }) - isvcutils.SetPodLabelsFromAnnotations(&componentMeta) + isvcutils.SetPodLabelsFromAnnotationsWithClient(cl, &componentMeta) service := &knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go index 9abc36dd..5f3e09ad 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go @@ -38,12 +38,23 @@ func NewLWSReconciler(client client.Client, return &LWSReconciler{ client: client, scheme: scheme, - LWS: createLWS(headPod, workerPod, workerSize, componentExt, componentMeta), + LWS: createLWSWithClient(client, headPod, workerPod, workerSize, componentExt, componentMeta), ComponentExt: componentExt, } } -func createLWS(headPod *corev1.PodSpec, +func createLWS( + headPod *corev1.PodSpec, + workerPod *corev1.PodSpec, + workerSize int32, + componentExt *v1beta1.ComponentExtensionSpec, + componentMeta metav1.ObjectMeta) *lws.LeaderWorkerSet { + return createLWSWithClient(nil, headPod, workerPod, workerSize, componentExt, componentMeta) +} + +func createLWSWithClient( + cl client.Client, + headPod *corev1.PodSpec, workerPod *corev1.PodSpec, workerSize int32, componentExt *v1beta1.ComponentExtensionSpec, @@ -55,8 +66,8 @@ func createLWS(headPod *corev1.PodSpec, lwsObjectMeta.Name = constants.LWSName(componentMeta.Name) headPodMeta.Labels["app"] = constants.GetRawServiceLabel(componentMeta.Name) headPodMeta.Labels["ray.io/node-type"] = "head" - utils.SetPodLabelsFromAnnotations(headPodMeta) - utils.SetPodLabelsFromAnnotations(workerPodMeta) + utils.SetPodLabelsFromAnnotationsWithClient(cl, headPodMeta) + utils.SetPodLabelsFromAnnotationsWithClient(cl, workerPodMeta) // Need to remove Prometheus annotations for workerPods as workerPods don't expose endpoints abandonedWorkerPodAnnotations := []string{ diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/multinodeprober_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/multinodeprober_reconciler.go index b1c11cb2..7deebc0f 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/multinodeprober_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/multinodeprober_reconciler.go @@ -46,7 +46,7 @@ func NewMultiNodeProberReconciler( Scheme: "http", Host: fmt.Sprintf("%s.%s.svc.cluster.local", constants.DefaultRayHeadServiceName(componentMeta.Name, i), componentMeta.Namespace), } - dply := createRawDeployment(componentMeta, multiNodeProberConfig, url, i) + dply := createRawDeployment(client, componentMeta, multiNodeProberConfig, url, i) deployments = append(deployments, dply) } return &MultiNodeProberReconciler{ @@ -57,6 +57,7 @@ func NewMultiNodeProberReconciler( } func createRawDeployment( + cl kclient.Client, componentMeta metav1.ObjectMeta, multiNodeProberConfig *controllerconfig.MultiNodeProberConfig, url *knapis.URL, @@ -68,7 +69,7 @@ func createRawDeployment( podMetadata.Labels = make(map[string]string) } podMetadata.Labels["app"] = constants.GetRawServiceLabel(componentMeta.Name) - utils.SetPodLabelsFromAnnotations(podMetadata) + utils.SetPodLabelsFromAnnotationsWithClient(cl, podMetadata) podSpec := getDefaultPodSpec(multiNodeProberConfig, url) deployment := &appsv1.Deployment{ diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/ray.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/ray.go index 6722f8bf..17334cdb 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/ray.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/multinodevllm/ray.go @@ -48,7 +48,7 @@ func NewRayReconciler(client client.Client, rayClusters := make([]*ray.RayCluster, 0, int(*componentExt.MinReplicas)) for i := 0; i < int(*componentExt.MinReplicas); i++ { - rayCluster := createRayCluster(&componentMeta, podSpec, i) + rayCluster := createRayCluster(client, &componentMeta, podSpec, i) rayClusters = append(rayClusters, rayCluster) } @@ -249,10 +249,10 @@ func (r *RayReconciler) deleteExtraRayClusters(existingRayClusters *ray.RayClust return nil } -func createRayCluster(meta *metav1.ObjectMeta, spec *corev1.PodSpec, index int) *ray.RayCluster { +func createRayCluster(cl client.Client, meta *metav1.ObjectMeta, spec *corev1.PodSpec, index int) *ray.RayCluster { clusterName := fmt.Sprintf("%s-%d", meta.Name, index) - utils.SetPodLabelsFromAnnotations(meta) + utils.SetPodLabelsFromAnnotationsWithClient(cl, meta) workerReplicas := int32(constants.DefaultMinReplicas) setLifecycleHooks(spec) diff --git a/pkg/controller/v1beta1/inferenceservice/utils/annotations.go b/pkg/controller/v1beta1/inferenceservice/utils/annotations.go index 5aea9cca..0ea0c2d6 100644 --- a/pkg/controller/v1beta1/inferenceservice/utils/annotations.go +++ b/pkg/controller/v1beta1/inferenceservice/utils/annotations.go @@ -4,6 +4,7 @@ import ( "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/sgl-project/ome/pkg/constants" "github.com/sgl-project/ome/pkg/controller/v1beta1/controllerconfig" @@ -60,6 +61,10 @@ func IsCohereCommand1TFewFTServing(servingPodObjectMeta *metav1.ObjectMeta) bool } func SetPodLabelsFromAnnotations(metadata *metav1.ObjectMeta) { + SetPodLabelsFromAnnotationsWithClient(nil, metadata) +} + +func SetPodLabelsFromAnnotationsWithClient(cl client.Client, metadata *metav1.ObjectMeta) { // Check if the VolcanoQueue annotation exists and set the label if it does. if volcanoQueue, ok := metadata.Annotations[constants.VolcanoQueue]; ok { metadata.Labels[constants.VolcanoQueueName] = volcanoQueue @@ -67,7 +72,7 @@ func SetPodLabelsFromAnnotations(metadata *metav1.ObjectMeta) { } else if dac, ok := metadata.Annotations[constants.DedicatedAICluster]; ok { if _, ok = metadata.Annotations[constants.KueueEnabledLabelKey]; ok { // Kueue case - metadata.Labels[constants.KueueQueueLabelKey] = dac + metadata.Labels[constants.KueueQueueLabelKey] = resolveKueueQueueNameForDedicatedAICluster(cl, metadata, dac) metadata.Labels[constants.KueueWorkloadPriorityClassLabelKey] = constants.DedicatedAiClusterPreemptionWorkloadPriorityClass } else { // Volcano case @@ -82,6 +87,18 @@ func SetPodLabelsFromAnnotations(metadata *metav1.ObjectMeta) { } } +func resolveKueueQueueNameForDedicatedAICluster(_ client.Client, metadata *metav1.ObjectMeta, dacName string) string { + if metadata != nil { + if queueName, ok := metadata.Annotations[constants.DACQueueNameLabelKey]; ok { + queueName = strings.TrimSpace(queueName) + if queueName != "" { + return queueName + } + } + } + return dacName +} + func RemovePodAnnotations(metadata *metav1.ObjectMeta, annotationsToRemove []string) { for _, annotation := range annotationsToRemove { delete(metadata.Annotations, annotation) diff --git a/pkg/controller/v1beta1/inferenceservice/utils/annotations_test.go b/pkg/controller/v1beta1/inferenceservice/utils/annotations_test.go index 67b137e1..75d6ba84 100644 --- a/pkg/controller/v1beta1/inferenceservice/utils/annotations_test.go +++ b/pkg/controller/v1beta1/inferenceservice/utils/annotations_test.go @@ -3,302 +3,40 @@ package utils import ( "testing" - "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/sgl-project/ome/pkg/constants" - "github.com/sgl-project/ome/pkg/controller/v1beta1/controllerconfig" ) -func TestResolveIngressConfig(t *testing.T) { - // Base config from ConfigMap - baseConfig := &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "svc.cluster.local", - DomainTemplate: "{{ .Name }}.{{ .Namespace }}.{{ .IngressDomain }}", - UrlScheme: "http", - PathTemplate: "", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - } - - tests := []struct { - name string - annotations map[string]string - expected *controllerconfig.IngressConfig - }{ - { - name: "no annotations - returns base config", - annotations: map[string]string{}, - expected: baseConfig, - }, - { - name: "custom domain template override", - annotations: map[string]string{ - constants.IngressDomainTemplate: "{{ .Name }}-custom.example.com", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "svc.cluster.local", - DomainTemplate: "{{ .Name }}-custom.example.com", - UrlScheme: "http", - PathTemplate: "", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - }, - }, - { - name: "custom domain and URL scheme", - annotations: map[string]string{ - constants.IngressDomain: "my-domain.com", - constants.IngressURLScheme: "https", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "my-domain.com", - DomainTemplate: "{{ .Name }}.{{ .Namespace }}.{{ .IngressDomain }}", - UrlScheme: "https", - PathTemplate: "", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - }, - }, - { - name: "additional domains with comma separation", - annotations: map[string]string{ - constants.IngressAdditionalDomains: "alt1.com, alt2.com, alt3.com", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "svc.cluster.local", - DomainTemplate: "{{ .Name }}.{{ .Namespace }}.{{ .IngressDomain }}", - UrlScheme: "http", - PathTemplate: "", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - AdditionalIngressDomains: &[]string{"alt1.com", "alt2.com", "alt3.com"}, - }, - }, - { - name: "boolean overrides", - annotations: map[string]string{ - constants.IngressDisableIstioVirtualHost: "true", - constants.IngressDisableCreation: "true", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "svc.cluster.local", - DomainTemplate: "{{ .Name }}.{{ .Namespace }}.{{ .IngressDomain }}", - UrlScheme: "http", - PathTemplate: "", - DisableIstioVirtualHost: true, - DisableIngressCreation: true, - }, - }, - { - name: "path template override", - annotations: map[string]string{ - constants.IngressPathTemplate: "/api/v1/models/{{ .Name }}", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "svc.cluster.local", - DomainTemplate: "{{ .Name }}.{{ .Namespace }}.{{ .IngressDomain }}", - UrlScheme: "http", - PathTemplate: "/api/v1/models/{{ .Name }}", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - }, - }, - { - name: "comprehensive override", - annotations: map[string]string{ - constants.IngressDomainTemplate: "{{ .Name }}-prod.company.com", - constants.IngressDomain: "company.com", - constants.IngressURLScheme: "https", - constants.IngressPathTemplate: "/ml/{{ .Name }}", - constants.IngressAdditionalDomains: "backup.com,mirror.net", - constants.IngressDisableIstioVirtualHost: "false", - constants.IngressDisableCreation: "false", - }, - expected: &controllerconfig.IngressConfig{ - IngressGateway: "knative-serving/knative-ingress-gateway", - IngressServiceName: "istio-ingressgateway.istio-system.svc.cluster.local", - IngressDomain: "company.com", - DomainTemplate: "{{ .Name }}-prod.company.com", - UrlScheme: "https", - PathTemplate: "/ml/{{ .Name }}", - DisableIstioVirtualHost: false, - DisableIngressCreation: false, - AdditionalIngressDomains: &[]string{"backup.com", "mirror.net"}, - }, +func TestSetPodLabelsFromAnnotations_UsesDedicatedAIClusterForKueueQueue(t *testing.T) { + metadata := &metav1.ObjectMeta{ + Annotations: map[string]string{ + constants.DedicatedAICluster: "dac-a", + constants.KueueEnabledLabelKey: "true", }, + Labels: map[string]string{}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := ResolveIngressConfig(baseConfig, tt.annotations) - assert.Equal(t, tt.expected, result) - }) - } -} + SetPodLabelsFromAnnotations(metadata) -func TestGetDeploymentModeFromAnnotations(t *testing.T) { - tests := []struct { - name string - annotations map[string]string - expectedMode constants.DeploymentModeType - expectedFound bool - }{ - { - name: "nil annotations - returns empty and false", - annotations: nil, - expectedMode: "", - expectedFound: false, - }, - { - name: "empty annotations - returns empty and false", - annotations: map[string]string{}, - expectedMode: "", - expectedFound: false, - }, - { - name: "valid Serverless mode", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.Serverless), - }, - expectedMode: constants.Serverless, - expectedFound: true, - }, - { - name: "valid RawDeployment mode", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.RawDeployment), - }, - expectedMode: constants.RawDeployment, - expectedFound: true, - }, - { - name: "valid MultiNodeRayVLLM mode", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.MultiNodeRayVLLM), - }, - expectedMode: constants.MultiNodeRayVLLM, - expectedFound: true, - }, - { - name: "valid MultiNode mode", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.MultiNode), - }, - expectedMode: constants.MultiNode, - expectedFound: true, - }, - { - name: "valid VirtualDeployment mode", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.VirtualDeployment), - }, - expectedMode: constants.VirtualDeployment, - expectedFound: true, - }, - { - name: "invalid deployment mode - returns empty and false", - annotations: map[string]string{ - constants.DeploymentMode: "InvalidMode", - }, - expectedMode: "", - expectedFound: false, - }, - { - name: "empty string deployment mode - returns empty and false", - annotations: map[string]string{ - constants.DeploymentMode: "", - }, - expectedMode: "", - expectedFound: false, - }, - { - name: "other annotations present but no deployment mode", - annotations: map[string]string{ - "some.other/annotation": "value", - }, - expectedMode: "", - expectedFound: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mode, found := GetDeploymentModeFromAnnotations(tt.annotations) - assert.Equal(t, tt.expectedMode, mode) - assert.Equal(t, tt.expectedFound, found) - }) + if got, want := metadata.Labels[constants.KueueQueueLabelKey], "dac-a"; got != want { + t.Fatalf("unexpected kueue queue label: got %q, want %q", got, want) } } -func TestGetDeploymentMode(t *testing.T) { - tests := []struct { - name string - annotations map[string]string - deployConfig *controllerconfig.DeployConfig - expectedMode constants.DeploymentModeType - }{ - { - name: "valid annotation overrides config", - annotations: map[string]string{ - constants.DeploymentMode: string(constants.RawDeployment), - }, - deployConfig: &controllerconfig.DeployConfig{ - DefaultDeploymentMode: string(constants.Serverless), - }, - expectedMode: constants.RawDeployment, - }, - { - name: "no annotation uses config default", - annotations: map[string]string{}, - deployConfig: &controllerconfig.DeployConfig{ - DefaultDeploymentMode: string(constants.Serverless), - }, - expectedMode: constants.Serverless, - }, - { - name: "nil annotations uses config default", - annotations: nil, - deployConfig: &controllerconfig.DeployConfig{ - DefaultDeploymentMode: string(constants.RawDeployment), - }, - expectedMode: constants.RawDeployment, - }, - { - name: "invalid annotation falls back to config default", - annotations: map[string]string{ - constants.DeploymentMode: "InvalidMode", - }, - deployConfig: &controllerconfig.DeployConfig{ - DefaultDeploymentMode: string(constants.MultiNode), - }, - expectedMode: constants.MultiNode, - }, - { - name: "empty config default returns empty string", - annotations: map[string]string{}, - deployConfig: &controllerconfig.DeployConfig{ - DefaultDeploymentMode: "", - }, - expectedMode: "", +func TestSetPodLabelsFromAnnotations_UsesDACQueueOverrideWhenPresent(t *testing.T) { + metadata := &metav1.ObjectMeta{ + Annotations: map[string]string{ + constants.DedicatedAICluster: "dac-a", + constants.KueueEnabledLabelKey: "true", + constants.DACQueueNameLabelKey: "reservation-queue-1", }, + Labels: map[string]string{}, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - mode := GetDeploymentMode(tt.annotations, tt.deployConfig) - assert.Equal(t, tt.expectedMode, mode) - }) + SetPodLabelsFromAnnotations(metadata) + + if got, want := metadata.Labels[constants.KueueQueueLabelKey], "reservation-queue-1"; got != want { + t.Fatalf("unexpected kueue queue label: got %q, want %q", got, want) } }