From ee75f8cced809c33fbbc3face03872b4b6f16313 Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Fri, 3 Apr 2026 16:37:47 -0700 Subject: [PATCH 1/4] initial condition when engine is nil --- pkg/controller/v1beta1/inferenceservice/controller.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/controller/v1beta1/inferenceservice/controller.go b/pkg/controller/v1beta1/inferenceservice/controller.go index 7da7bc32..6b2805ae 100644 --- a/pkg/controller/v1beta1/inferenceservice/controller.go +++ b/pkg/controller/v1beta1/inferenceservice/controller.go @@ -184,6 +184,11 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req isvc.Status.Components = make(map[v1beta1.ComponentType]v1beta1.ComponentStatusSpec) } + // Seed top-level conditions as Unknown on first observation + if isvc.Status.GetCondition(v1beta1.EngineReady) == nil { + isvc.Status.InitializeConditions() + } + // Setup reconcilers r.Log.Info("Reconciling inference service", "apiVersion", isvc.APIVersion, "namespace", isvc.Namespace, "isvc", isvc.Name) isvcConfig, err := controllerconfig.NewInferenceServicesConfig(r.Clientset) From c9915e2afb20fab24c41d01443d0b3d32fe5b773 Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Fri, 3 Apr 2026 16:38:38 -0700 Subject: [PATCH 2/4] update status to return unknown --- .../inferenceservice/status/status_util.go | 60 ++++++++++++------- .../status/status_util_test.go | 14 +++-- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/pkg/controller/v1beta1/inferenceservice/status/status_util.go b/pkg/controller/v1beta1/inferenceservice/status/status_util.go index ddf0b6b5..260b093a 100644 --- a/pkg/controller/v1beta1/inferenceservice/status/status_util.go +++ b/pkg/controller/v1beta1/inferenceservice/status/status_util.go @@ -43,40 +43,53 @@ func (sr *StatusReconciler) getFirstDeployment(deployments []*appsv1.Deployment) return deployments[0], nil } -// getDeploymentCondition extracts condition from deployment +// getDeploymentCondition extracts condition from deployment. func (sr *StatusReconciler) getDeploymentCondition(deployment *appsv1.Deployment, conditionType appsv1.DeploymentConditionType) *apis.Condition { - condition := apis.Condition{} for _, con := range deployment.Status.Conditions { if con.Type == conditionType { - condition.Type = apis.ConditionType(conditionType) - condition.Status = con.Status - condition.Message = con.Message - condition.LastTransitionTime = apis.VolatileTime{ - Inner: con.LastTransitionTime, + return &apis.Condition{ + Type: apis.ConditionType(conditionType), + Status: con.Status, + Message: con.Message, + LastTransitionTime: apis.VolatileTime{ + Inner: con.LastTransitionTime, + }, + Reason: con.Reason, } - condition.Reason = con.Reason - break } } - return &condition + return &apis.Condition{ + Type: apis.ConditionType(conditionType), + Status: v1.ConditionUnknown, + Reason: "DeploymentConditionMissing", + Message: fmt.Sprintf("%s condition is not yet available on the deployment", conditionType), + } } -// getLWSConditions extracts condition from LeaderWorkerSet +// getLWSConditions extracts condition from LeaderWorkerSet. +// When the requested condition type is not yet present, the function returns +// an explicit Unknown condition so that callers never receive a zero-value +// struct that silently falls through SetCondition. func (sr *StatusReconciler) getLWSConditions(lws *lwsspec.LeaderWorkerSet, conditionType lwsspec.LeaderWorkerSetConditionType) *apis.Condition { - condition := apis.Condition{} for _, con := range lws.Status.Conditions { if lwsspec.LeaderWorkerSetConditionType(con.Type) == conditionType { - condition.Type = apis.ConditionType(conditionType) - condition.Status = v1.ConditionStatus(con.Status) - condition.Message = con.Message - condition.LastTransitionTime = apis.VolatileTime{ - Inner: con.LastTransitionTime, + return &apis.Condition{ + Type: apis.ConditionType(conditionType), + Status: v1.ConditionStatus(con.Status), + Message: con.Message, + LastTransitionTime: apis.VolatileTime{ + Inner: con.LastTransitionTime, + }, + Reason: con.Reason, } - condition.Reason = con.Reason - break } } - return &condition + return &apis.Condition{ + Type: apis.ConditionType(conditionType), + Status: v1.ConditionUnknown, + Reason: "LWSConditionMissing", + Message: fmt.Sprintf("%s condition is not yet available on the LeaderWorkerSet", conditionType), + } } // getMultiDeploymentCondition checks conditions across multiple deployments @@ -141,11 +154,12 @@ func (sr *StatusReconciler) setCondition(status *v1beta1.InferenceServiceStatus, func (sr *StatusReconciler) InitializeComponentCondition(status *v1beta1.InferenceServiceStatus, component v1beta1.ComponentType) { readyCondition := sr.getReadyConditionsMap()[component] - // Only initialize if the condition doesn't exist yet - if !status.IsConditionReady(readyCondition) && !status.IsConditionUnknown(readyCondition) { + // Only initialize if the condition doesn't exist yet. + // A nil condition means the status has never been set for this component; + if status.GetCondition(readyCondition) == nil { condition := &apis.Condition{ Type: readyCondition, - Status: v1.ConditionFalse, + Status: v1.ConditionUnknown, Reason: "Initializing", Message: fmt.Sprintf("%s component initializing", component), } diff --git a/pkg/controller/v1beta1/inferenceservice/status/status_util_test.go b/pkg/controller/v1beta1/inferenceservice/status/status_util_test.go index 16a9649b..15050cf3 100644 --- a/pkg/controller/v1beta1/inferenceservice/status/status_util_test.go +++ b/pkg/controller/v1beta1/inferenceservice/status/status_util_test.go @@ -252,7 +252,7 @@ func TestGetDeploymentCondition(t *testing.T) { }, }, { - name: "deployment without requested condition", + name: "deployment without requested condition returns Unknown", deployment: &appsv1.Deployment{ Status: appsv1.DeploymentStatus{ Conditions: []appsv1.DeploymentCondition{ @@ -265,7 +265,10 @@ func TestGetDeploymentCondition(t *testing.T) { }, conditionType: appsv1.DeploymentAvailable, expected: &apis.Condition{ - Type: "", + Type: apis.ConditionType(appsv1.DeploymentAvailable), + Status: corev1.ConditionUnknown, + Reason: "DeploymentConditionMissing", + Message: "Available condition is not yet available on the deployment", }, }, } @@ -335,7 +338,7 @@ func TestGetLWSConditions(t *testing.T) { }, }, { - name: "LWS without requested condition", + name: "LWS without requested condition returns Unknown", lws: &lwsspec.LeaderWorkerSet{ Status: lwsspec.LeaderWorkerSetStatus{ Conditions: []metav1.Condition{ @@ -348,7 +351,10 @@ func TestGetLWSConditions(t *testing.T) { }, conditionType: lwsspec.LeaderWorkerSetAvailable, expected: &apis.Condition{ - Type: "", + Type: apis.ConditionType(lwsspec.LeaderWorkerSetAvailable), + Status: corev1.ConditionUnknown, + Reason: "LWSConditionMissing", + Message: "Available condition is not yet available on the LeaderWorkerSet", }, }, } From e5523c8d07b7a3005e746afdd8584766e35043ac Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Fri, 3 Apr 2026 16:39:04 -0700 Subject: [PATCH 3/4] re fetch deployment and LWS for updated condition --- .../reconcilers/deployment/deployment_reconciler.go | 11 ++++++++++- .../reconcilers/lws/lws_reconciler.go | 13 ++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go index 45c03474..33c1fdb3 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go @@ -216,5 +216,14 @@ func (r *DeploymentReconciler) Reconcile() (*appsv1.Deployment, error) { return nil, opErr } - return r.Deployment, nil + // Re-fetch the deployment so callers get real status (including conditions). + liveDeployment := &appsv1.Deployment{} + if err := r.client.Get(context.TODO(), types.NamespacedName{ + Namespace: r.Deployment.Namespace, + Name: r.Deployment.Name, + }, liveDeployment); err != nil { + log.Error(err, "Failed to re-fetch deployment after create/update", "namespace", r.Deployment.Namespace, "name", r.Deployment.Name) + return nil, err + } + return liveDeployment, nil } diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go index 9abc36dd..804f5fe9 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go @@ -173,7 +173,18 @@ func (r *LWSReconciler) Reconcile() (*lws.LeaderWorkerSet, error) { return nil, opErr } - return r.LWS, nil + // Re-fetch the live LWS so callers get real status (including conditions). + // Returning r.LWS here would hand back the desired-spec object whose + // .Status.Conditions is always empty. + liveLWS := &lws.LeaderWorkerSet{} + if err := r.client.Get(context.TODO(), types.NamespacedName{ + Namespace: r.LWS.Namespace, + Name: r.LWS.Name, + }, liveLWS); err != nil { + log.Error(err, "Failed to re-fetch LWS after create/update", "namespace", r.LWS.Namespace, "name", r.LWS.Name) + return nil, err + } + return liveLWS, nil } func (r *LWSReconciler) checkLeaderWorkerSetExist() (constants.CheckResultType, *lws.LeaderWorkerSet, error) { From e56cc7b200c42e82a900c9d61e0706a5e83faf4f Mon Sep 17 00:00:00 2001 From: Arthur Cheng Date: Tue, 14 Apr 2026 13:35:12 -0700 Subject: [PATCH 4/4] revert re-fetch deployment and LWS --- .../reconcilers/deployment/deployment_reconciler.go | 11 +---------- .../reconcilers/lws/lws_reconciler.go | 13 +------------ 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go index 33c1fdb3..45c03474 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go @@ -216,14 +216,5 @@ func (r *DeploymentReconciler) Reconcile() (*appsv1.Deployment, error) { return nil, opErr } - // Re-fetch the deployment so callers get real status (including conditions). - liveDeployment := &appsv1.Deployment{} - if err := r.client.Get(context.TODO(), types.NamespacedName{ - Namespace: r.Deployment.Namespace, - Name: r.Deployment.Name, - }, liveDeployment); err != nil { - log.Error(err, "Failed to re-fetch deployment after create/update", "namespace", r.Deployment.Namespace, "name", r.Deployment.Name) - return nil, err - } - return liveDeployment, nil + return r.Deployment, nil } diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go index 804f5fe9..9abc36dd 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/lws/lws_reconciler.go @@ -173,18 +173,7 @@ func (r *LWSReconciler) Reconcile() (*lws.LeaderWorkerSet, error) { return nil, opErr } - // Re-fetch the live LWS so callers get real status (including conditions). - // Returning r.LWS here would hand back the desired-spec object whose - // .Status.Conditions is always empty. - liveLWS := &lws.LeaderWorkerSet{} - if err := r.client.Get(context.TODO(), types.NamespacedName{ - Namespace: r.LWS.Namespace, - Name: r.LWS.Name, - }, liveLWS); err != nil { - log.Error(err, "Failed to re-fetch LWS after create/update", "namespace", r.LWS.Namespace, "name", r.LWS.Name) - return nil, err - } - return liveLWS, nil + return r.LWS, nil } func (r *LWSReconciler) checkLeaderWorkerSetExist() (constants.CheckResultType, *lws.LeaderWorkerSet, error) {