diff --git a/charts/ome-serving/README.md b/charts/ome-serving/README.md index 1fc2874f..48f84a8a 100644 --- a/charts/ome-serving/README.md +++ b/charts/ome-serving/README.md @@ -65,6 +65,8 @@ models: clusterScope: true # Create ClusterBaseModel (default: true) namespaceScope: false # Create BaseModel (default: false) namespace: # Required if namespaceScope: true + createModel: true # Create model resource (default: true) + createRuntime: true # Create runtime resource (default: true) vendor: capabilities: [TEXT_TO_TEXT|IMAGE_TEXT_TO_TEXT|EMBEDDING|...] @@ -203,6 +205,41 @@ models: gpus: 2 ``` +### Runtime Only (No Model Creation) + +Create only the ClusterServingRuntime without creating a model resource. Useful when the model already exists or is managed externally: + +```yaml +models: + qwen3-8b: + enabled: true + createModel: false # Don't create ClusterBaseModel/BaseModel + createRuntime: true # Only create ClusterServingRuntime + vendor: qwen + capabilities: [TEXT_TO_TEXT] + hfModelId: Qwen/Qwen3-8B + runtime: + gpus: 1 +``` + +### Model Only (No Runtime Creation) + +Create only the model resource without a runtime. Useful when using a shared runtime: + +```yaml +models: + qwen3-8b: + enabled: true + createModel: true # Create ClusterBaseModel + createRuntime: false # Don't create ClusterServingRuntime + vendor: qwen + capabilities: [TEXT_TO_TEXT] + hfModelId: Qwen/Qwen3-8B + path: /raid/models/qwen/qwen3-8b + runtime: + gpus: 1 +``` + ### PD Mode (Prefill-Decode Disaggregated) For models that support disaggregated serving, enable PD mode to deploy with separate prefill (engine) and decode (decoder) components. PD mode uses RDMA/InfiniBand for high-performance inter-node communication. diff --git a/charts/ome-serving/templates/basemodel.yaml b/charts/ome-serving/templates/basemodel.yaml index 0738bea0..ba0b1a22 100644 --- a/charts/ome-serving/templates/basemodel.yaml +++ b/charts/ome-serving/templates/basemodel.yaml @@ -1,5 +1,5 @@ {{- range $modelName, $model := .Values.models }} -{{- if and $model.enabled $model.namespaceScope }} +{{- if and $model.enabled $model.namespaceScope (ne $model.createModel false) }} {{/* Generate storageUri from simplified options: - hfModelId: Qwen/Qwen3-8B → hf://Qwen/Qwen3-8B diff --git a/charts/ome-serving/templates/clusterbasemodel.yaml b/charts/ome-serving/templates/clusterbasemodel.yaml index 9022f685..7c6d7ad2 100644 --- a/charts/ome-serving/templates/clusterbasemodel.yaml +++ b/charts/ome-serving/templates/clusterbasemodel.yaml @@ -1,5 +1,5 @@ {{- range $modelName, $model := .Values.models }} -{{- if and $model.enabled (ne $model.clusterScope false) }} +{{- if and $model.enabled (ne $model.clusterScope false) (ne $model.createModel false) }} {{/* Generate storageUri from simplified options: - hfModelId: Qwen/Qwen3-8B → hf://Qwen/Qwen3-8B diff --git a/charts/ome-serving/templates/clusterservingruntime.yaml b/charts/ome-serving/templates/clusterservingruntime.yaml index 600555d1..a7c0cb40 100644 --- a/charts/ome-serving/templates/clusterservingruntime.yaml +++ b/charts/ome-serving/templates/clusterservingruntime.yaml @@ -1,6 +1,6 @@ {{- $registry := include "ome-serving.modelRegistry" . | fromYaml }} {{- range $modelName, $model := .Values.models }} -{{- if $model.enabled }} +{{- if and $model.enabled (ne $model.createRuntime false) }} {{- $modelInfo := index $registry $modelName }} {{- if not $modelInfo }} {{- fail (printf "Model '%s' not found in registry. Please check the model name or add it to the registry in _helpers.tpl" $modelName) }}