Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/configs/runners.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ b200:
- 'b200-nvd_1'
- 'b200-nvd_2'
- 'b200-nvd_3'
- 'b200-dgxc_1'
- 'b200-dgxc_2'
mi300x:
- 'mi300x-amd_0'
- 'mi300x-amd_1'
Expand Down
65 changes: 65 additions & 0 deletions runners/launch_b200-dgxc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/bash

HF_HUB_CACHE_MOUNT="/raid/hf_hub_cache/"
FRAMEWORK_SUFFIX=$([[ "$FRAMEWORK" == "trt" ]] && printf '_trt' || printf '')
PORT=8888

# Create unique cache directory based on model parameters
MODEL_NAME=$(basename "$MODEL")

server_name="bmk-server"

nvidia-smi

# GPUs must be idle
if nvidia-smi --query-compute-apps=pid --format=csv,noheader | grep -q '[0-9]'; then
echo "[ERROR] GPU busy from previous run"; nvidia-smi; exit 1
fi

set -x
# Use --init flag to run an init process (PID 1) inside container for better signal handling and zombie process cleanup
# Ref: https://www.paolomainardi.com/posts/docker-run-init/

# NCCL_GRAPH_REGISTER tries to automatically enable user buffer registration with CUDA Graphs.
# Disabling it can reduce perf but will improve CI stability. i.e. we won't see vLLM/Sglang crashes.
# Ref: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-graph-register

if [[ "$MODEL" == "nvidia/DeepSeek-R1-0528-FP4" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
if [[ "$OSL" == "8192" ]]; then
export NUM_PROMPTS=$(( CONC * 20 ))
else
export NUM_PROMPTS=$(( CONC * 50 ))
fi
else
export NUM_PROMPTS=$(( CONC * 10 ))
fi

docker run --rm --init --network host --name $server_name \
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE -e DP_ATTENTION \
-e NCCL_GRAPH_REGISTER=0 \
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e NUM_PROMPTS \
Comment thread
ankursingh-nv marked this conversation as resolved.
--entrypoint=/bin/bash \
$(echo "$IMAGE" | sed 's/#/\//') \
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_b200${FRAMEWORK_SUFFIX}_docker.sh"

# Try graceful first
docker stop -t 90 "$server_name" || true
# Wait until it's really dead
docker wait "$server_name" >/dev/null 2>&1 || true
# Force remove if anything lingers
docker rm -f "$server_name" >/dev/null 2>&1 || true

# Give a moment for GPU processes to fully terminate
sleep 2
# Verify GPUs are now idle; if not, print diag and (optionally) reset
if nvidia-smi --query-compute-apps=pid --format=csv,noheader | grep -q '[0-9]'; then
echo "[WARN] After stop, GPU still busy:"; nvidia-smi
# Last resort if driver allows and GPUs appear idle otherwise:
#nvidia-smi --gpu-reset -i 0,1,2,3,4,5,6,7 2>/dev/null || true
fi

nvidia-smi
2 changes: 1 addition & 1 deletion runners/launch_b200-nvd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ docker run --rm --init --network host --name $server_name \
--runtime nvidia --gpus all --ipc host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE \
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL -e PORT=$PORT -e EP_SIZE -e DP_ATTENTION \
-e NCCL_GRAPH_REGISTER=0 \
-e TORCH_CUDA_ARCH_LIST="10.0" -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \
-e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e RESULT_FILENAME -e RANDOM_RANGE_RATIO -e NUM_PROMPTS \
Expand Down
Loading