Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
CHARTS_DIR := ./charts

# Define the registry and image tagging
REGISTRY ?= ghcr.io/moirai-internal/ome
REGISTRY ?= ghcr.io/moirai-internal
TAG ?= $(GIT_TAG)
ARCH ?= linux/amd64
MANAGER_IMG ?= $(REGISTRY)/manager:$(TAG)
MANAGER_IMG ?= $(REGISTRY)/ome-manager:$(TAG)

# Git version and commit information for build
version_pkg = github.com/sgl-project/ome/pkg/version
Expand Down
2 changes: 1 addition & 1 deletion charts/ome-resources/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ ome:
cpu: 2
memory: 4Gi
omeAgent:
image: ghcr.io/moirai-internal/genai-ome-agent
image: ghcr.io/moirai-internal/ome-agent
tag: *defaultVersion
authType: InstancePrincipal
compartmentId: ocid1.compartment.oc1..dummy-compartment
Expand Down
3 changes: 3 additions & 0 deletions config/runtimes/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ resources:
- srt/deepseek-rdma-pd-rt.yaml
- srt/deepseek-rdma-rt.yaml
- srt/llama-4-maverick-17b-128e-instruct-fp8-rt.yaml
- srt/llama-4-maverick-17b-128e-instruct-fp8-pd-rt.yaml
- srt/llama-4-scout-17b-16e-instruct-rt.yaml
- srt/llama-4-scout-17b-16e-instruct-pd-rt.yaml
- srt/e5-mistral-7b-instruct-rt.yaml
- srt/llama-3-3-70b-instruct-rt.yaml
- srt/llama-3-3-70b-instruct-pd-rt.yaml
68 changes: 8 additions & 60 deletions config/runtimes/srt/deepseek-rdma-pd-rt.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: ome.io/v1beta1
kind: ClusterServingRuntime
metadata:
name: srt-deepseek-pd-rdma
name: srt-deepseek-rdma-pd
spec:
disabled: false
modelSizeRange:
Expand Down Expand Up @@ -44,22 +44,9 @@ spec:
effect: "NoSchedule"
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
env:
- name: NVSHMEM_ENABLE_NIC_PE_MAPPING
value: "1"
Expand Down Expand Up @@ -137,28 +124,15 @@ spec:
timeoutSeconds: 30
worker:
size: 1
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
command:
- sh
- -c
Expand Down Expand Up @@ -205,7 +179,7 @@ spec:
value: "0"
- name: NVSHMEM_ENABLE_NIC_PE_MAPPING
value: "1"
- name: SGLANG_MOONCAKE_TRANS_THREAD
- name: SGLANG_DISAGGREGATION_THREAD_POOL_SIZE
value: "8"
- name: SGL_ENABLE_JIT_DEEPGEMM
value: "1"
Expand All @@ -225,24 +199,11 @@ spec:
effect: "NoSchedule"
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
env:
- name: SGLANG_MOONCAKE_TRANS_THREAD
- name: SGLANG_DISAGGREGATION_THREAD_POOL_SIZE
value: "16"
- name: SGL_ENABLE_JIT_DEEPGEMM
value: "1"
Expand Down Expand Up @@ -309,28 +270,15 @@ spec:
timeoutSeconds: 30
worker:
size: 1
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
command:
- sh
- -c
Expand Down Expand Up @@ -372,7 +320,7 @@ spec:
value: "0"
- name: NVSHMEM_IB_TRAFFIC_CLASS
value: "16"
- name: SGLANG_MOONCAKE_TRANS_THREAD
- name: SGLANG_DISAGGREGATION_THREAD_POOL_SIZE
value: "16"
- name: SGL_ENABLE_JIT_DEEPGEMM
value: "1"
Expand Down
36 changes: 5 additions & 31 deletions config/runtimes/srt/deepseek-rdma-rt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ spec:
routerConfig:
runner:
name: router
image: ghcr.io/moirai-internal/sgl-router:dev2
image: ghcr.io/moirai-internal/sgl-router:0.1.4.30f2a44
resources:
limits:
cpu: "1"
Expand Down Expand Up @@ -75,30 +75,17 @@ spec:
effect: "NoSchedule"
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
command:
- sh
- -c
- >
python3 -m sglang.launch_server
--host 0.0.0.0 --port 8080
--model-path ${MODEL_PATH}
--tp 16
--tp-size 16
--nccl-init $(LWS_LEADER_ADDRESS):5000
--nnodes ${LWS_GROUP_SIZE}
--node-rank ${LWS_WORKER_INDEX}
Expand Down Expand Up @@ -139,28 +126,15 @@ spec:
timeoutSeconds: 30
worker:
size: 1
nodeSelector:
oci.oraclecloud.com/rdma.authenticated: "16"
oci.oraclecloud.com/rdma.mlx_issues: "0"
oke.oraclecloud.com/pool.mode: cluster-network
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
dnsPolicy: ClusterFirstWithHostNet
hostNetwork: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.H100.8
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
command:
- sh
- -c
Expand All @@ -169,7 +143,7 @@ spec:
--host 0.0.0.0
--port 8080
--model-path ${MODEL_PATH}
--tp 16
--tp-size 16
--nccl-init $(LWS_LEADER_ADDRESS):5000
--nnodes ${LWS_GROUP_SIZE}
--node-rank ${LWS_WORKER_INDEX}
Expand Down
18 changes: 3 additions & 15 deletions config/runtimes/srt/e5-mistral-7b-instruct-rt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
engineConfig:
runner:
name: ome-container
image: ghcr.io/moirai-internal/sgl:dev2
image: docker.io/lmsysorg/sglang:v0.4.8.post1-cu126
ports:
- containerPort: 8080
name: http1
Expand All @@ -38,7 +38,7 @@ spec:
--port=8080 \
--enable-metrics \
--model-path="$MODEL_PATH" \
--tp 1 \
--tp-size 1 \
--is-embedding
volumeMounts:
- mountPath: /dev/shm
Expand Down Expand Up @@ -93,16 +93,4 @@ spec:
volumes:
- name: dshm
emptyDir:
medium: Memory
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node.kubernetes.io/instance-type
operator: In
values:
- BM.GPU.B4.8
- BM.GPU4.8
- BM.GPU.A100-v2.8
- BM.GPU.H100.8
medium: Memory
Loading
Loading