Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 6 additions & 87 deletions charts/ome-serving/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,6 @@ models:
runtime:
gpus: 1

deepseek-rdma:
enabled: false
createRuntime: true
vendor: other
capabilities: [TEXT_TO_TEXT]
hfModelId: deepseek-rdma
runtime:
gpus: 1

deepseek-v2-lite-chat:
enabled: false
createRuntime: true
Expand Down Expand Up @@ -420,24 +411,6 @@ models:
runtime:
gpus: 4

gpt-oss-120b-bf16:
enabled: false
createRuntime: true
vendor: lmsys
capabilities: [TEXT_TO_TEXT]
hfModelId: lmsys/gpt-oss-120b-bf16
runtime:
gpus: 8

gpt-oss-120b-grpc:
enabled: false
createRuntime: true
vendor: openai
capabilities: [TEXT_TO_TEXT]
hfModelId: openai/gpt-oss-120b
runtime:
gpus: 2

gpt-oss-20b:
enabled: false
createRuntime: true
Expand All @@ -447,24 +420,6 @@ models:
runtime:
gpus: 2

gpt-oss-20b-bf16:
enabled: false
createRuntime: true
vendor: lmsys
capabilities: [TEXT_TO_TEXT]
hfModelId: lmsys/gpt-oss-20b-bf16
runtime:
gpus: 2

gpt-oss-20b-grpc:
enabled: false
createRuntime: true
vendor: openai
capabilities: [TEXT_TO_TEXT]
hfModelId: openai/gpt-oss-20b
runtime:
gpus: 1

granite-3-0-3b-a800m-instruct:
enabled: false
createRuntime: true
Expand Down Expand Up @@ -609,7 +564,7 @@ models:
runtime:
gpus: 4

llama-2-13b:
llama-2-13b-hf:
enabled: false
createRuntime: true
vendor: meta
Expand All @@ -627,7 +582,7 @@ models:
runtime:
gpus: 1

llama-2-70b:
llama-2-70b-hf:
enabled: false
createRuntime: true
vendor: meta
Expand Down Expand Up @@ -690,15 +645,6 @@ models:
runtime:
gpus: 1

llama-3-1-8b-instruct-grpc:
enabled: false
createRuntime: true
vendor: meta
capabilities: [TEXT_TO_TEXT]
hfModelId: meta-llama/Llama-3.1-8B-Instruct
runtime:
gpus: 1

llama-3-1-nemotron-70b-instruct-hf:
enabled: false
createRuntime: true
Expand Down Expand Up @@ -834,24 +780,6 @@ models:
runtime:
gpus: 8

llama-4-maverick-17b-128e-instruct-fp8-grpc:
enabled: false
createRuntime: true
vendor: meta
capabilities: [TEXT_TO_TEXT]
hfModelId: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
runtime:
gpus: 8

llama-4-maverick-17b-128e-instruct-fp8-pd-grpc:
enabled: false
createRuntime: true
vendor: meta
capabilities: [TEXT_TO_TEXT]
hfModelId: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
runtime:
gpus: 8

llama-4-scout-17b-16e-instruct:
enabled: false
createRuntime: true
Expand Down Expand Up @@ -951,23 +879,14 @@ models:
runtime:
gpus: 4

mistral-7b-instruct:
enabled: false
createRuntime: true
vendor: mistral
capabilities: [TEXT_TO_TEXT]
hfModelId: mistralai/Mistral-7B-Instruct-v0.2
runtime:
gpus: 2

mistral-7b-instruct-v0-2:
enabled: false
createRuntime: true
vendor: mistral
capabilities: [TEXT_TO_TEXT]
hfModelId: mistralai/Mistral-7B-Instruct-v0.2
runtime:
gpus: 1
gpus: 2

mistral-7b-instruct-v0-3:
enabled: false
Expand Down Expand Up @@ -996,7 +915,7 @@ models:
runtime:
gpus: 2

mixtral-8x22b:
mixtral-8x22b-v0-1:
enabled: false
createRuntime: true
vendor: mistral
Expand All @@ -1005,7 +924,7 @@ models:
runtime:
gpus: 8

mixtral-8x7b:
mixtral-8x7b-v0-1:
enabled: false
createRuntime: true
vendor: mistral
Expand All @@ -1014,7 +933,7 @@ models:
runtime:
gpus: 4

mixtral-8x7b-instruct:
mixtral-8x7b-instruct-v0-1:
enabled: false
createRuntime: true
vendor: mistral
Expand Down
23 changes: 23 additions & 0 deletions config/models/Qwen/Qwen1.5-110B-Chat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: qwen1-5-110b-chat
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: Qwen
displayName: qwen.qwen1-5-110b-chat
disabled: false
version: "1.0.0"
modelArchitecture: Qwen2ForCausalLM
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.41.2"
modelParameterSize: 110B
storage:
storageUri: hf://Qwen/Qwen1.5-110B-Chat
path: /raid/models/Qwen/Qwen1.5-110B-Chat
key: "hf-token"
23 changes: 23 additions & 0 deletions config/models/Qwen/Qwen1.5-32B-Chat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: qwen1-5-32b-chat
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: Qwen
displayName: qwen.qwen1-5-32b-chat
disabled: false
version: "1.0.0"
modelArchitecture: Qwen2ForCausalLM
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.41.2"
modelParameterSize: 32B
storage:
storageUri: hf://Qwen/Qwen1.5-32B-Chat
path: /raid/models/Qwen/Qwen1.5-32B-Chat
key: "hf-token"
23 changes: 23 additions & 0 deletions config/models/Qwen/Qwen1.5-72B-Chat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: qwen1-5-72b-chat
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: Qwen
displayName: qwen.qwen1-5-72b-chat
disabled: false
version: "1.0.0"
modelArchitecture: Qwen2ForCausalLM
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.41.2"
modelParameterSize: 72B
storage:
storageUri: hf://Qwen/Qwen1.5-72B-Chat
path: /raid/models/Qwen/Qwen1.5-72B-Chat
key: "hf-token"
23 changes: 23 additions & 0 deletions config/models/Qwen/Qwen1.5-7B-Chat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: qwen1-5-7b-chat
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: Qwen
displayName: qwen.qwen1-5-7b-chat
disabled: false
version: "1.0.0"
modelArchitecture: Qwen2ForCausalLM
modelFormat:
name: safetensors
version: "1.0.0"
modelFramework:
name: transformers
version: "4.41.2"
modelParameterSize: 7B
storage:
storageUri: hf://Qwen/Qwen1.5-7B-Chat
path: /raid/models/Qwen/Qwen1.5-7B-Chat
key: "hf-token"
16 changes: 16 additions & 0 deletions config/models/google/gemma-2-27b-it.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# This model is gated on HuggingFace
apiVersion: ome.io/v1beta1
kind: ClusterBaseModel
metadata:
name: gemma-2-27b-it
spec:
modelCapabilities:
- TEXT_TO_TEXT
vendor: google
disabled: false
version: "1.0.0"
displayName: google.gemma-2-27b-it
storage:
storageUri: hf://google/gemma-2-27b-it
path: /raid/models/google/gemma-2-27b-it
key: "hf-token"
Loading