diff --git a/runners/launch_b200-gmi.sh b/runners/launch_b200-gmi.sh new file mode 100755 index 000000000..272ad14af --- /dev/null +++ b/runners/launch_b200-gmi.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# GMI Cloud b200 runner; mirrors launch_b200-cw.sh; see runners/GMI_QUICKSTART*.md + +set -euo pipefail + +export RUNNER_LABEL="gmi-b200" +export INSTANCE_TYPE="${INSTANCE_TYPE:-gmi-b200}" +export PORT="${PORT:-8888}" +export HF_HUB_CACHE_MOUNT="${HF_HUB_CACHE_MOUNT:-$HOME/.cache/huggingface}" + +MODEL_CODE="${EXP_NAME%%_*}" +FRAMEWORK_SUFFIX=$([[ "${FRAMEWORK:-}" == "trt" ]] && printf '_trt' || printf '') +SPEC_SUFFIX=$([[ "${SPEC_DECODING:-}" == "mtp" ]] && printf '_mtp' || printf '') +SERVER_NAME="${RUNNER_NAME:-gmi-b200-bmk-server}" + +set -x +docker run --rm --network=host --name="$SERVER_NAME" \ + --runtime=nvidia --gpus=all --ipc=host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE" \ + -v "$GITHUB_WORKSPACE:/workspace/" -w /workspace/ \ + -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL \ + -e RUN_EVAL -e EVAL_ONLY -e RUNNER_TYPE -e RESULT_FILENAME -e RANDOM_RANGE_RATIO \ + -e PROFILE -e SGLANG_TORCH_PROFILER_DIR -e VLLM_TORCH_PROFILER_DIR -e VLLM_RPC_TIMEOUT \ + -e FRAMEWORK -e SPEC_DECODING -e PORT -e RUNNER_LABEL -e INSTANCE_TYPE \ + -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \ + --entrypoint=/bin/bash \ + "$IMAGE" \ + "benchmarks/single_node/${MODEL_CODE}_${PRECISION}_b200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh" diff --git a/runners/launch_b300-gmi.sh b/runners/launch_b300-gmi.sh new file mode 100755 index 000000000..25842aa58 --- /dev/null +++ b/runners/launch_b300-gmi.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# GMI Cloud b300 runner; mirrors launch_b300-nv.sh; see runners/GMI_QUICKSTART*.md + +set -euo pipefail + +export RUNNER_LABEL="gmi-b300" +export INSTANCE_TYPE="${INSTANCE_TYPE:-gmi-b300}" +export PORT="${PORT:-8888}" +export HF_HUB_CACHE_MOUNT="${HF_HUB_CACHE_MOUNT:-$HOME/.cache/huggingface}" + +MODEL_CODE="${EXP_NAME%%_*}" +FRAMEWORK_SUFFIX=$([[ "${FRAMEWORK:-}" == "trt" ]] && printf '_trt' || printf '') +SPEC_SUFFIX=$([[ "${SPEC_DECODING:-}" == "mtp" ]] && printf '_mtp' || printf '') + +if [[ "${IS_MULTINODE:-false}" == "true" ]]; then + SCRIPT_PATH="benchmarks/multi_node/${MODEL_CODE}_${PRECISION}_b300_${FRAMEWORK}.sh" +else + SCRIPT_PATH="benchmarks/single_node/${MODEL_CODE}_${PRECISION}_b300${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh" +fi + +set -x +bash "$SCRIPT_PATH" diff --git a/runners/launch_gb200-gmi.sh b/runners/launch_gb200-gmi.sh new file mode 100755 index 000000000..b5025b93e --- /dev/null +++ b/runners/launch_gb200-gmi.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# GMI Cloud gb200 runner; mirrors launch_gb200-nv.sh; see runners/GMI_QUICKSTART*.md + +set -euo pipefail + +export RUNNER_LABEL="gmi-gb200" +export INSTANCE_TYPE="${INSTANCE_TYPE:-gmi-gb200}" +export PORT="${PORT:-8888}" +export HF_HUB_CACHE_MOUNT="${HF_HUB_CACHE_MOUNT:-$HOME/.cache/huggingface}" + +MODEL_CODE="${EXP_NAME%%_*}" +FRAMEWORK_SUFFIX=$([[ "${FRAMEWORK:-}" == "trt" ]] && printf '_trt' || printf '') +SPEC_SUFFIX=$([[ "${SPEC_DECODING:-}" == "mtp" ]] && printf '_mtp' || printf '') + +if [[ "${IS_MULTINODE:-false}" == "true" || "${FRAMEWORK:-}" == dynamo-* ]]; then + SCRIPT_PATH="benchmarks/multi_node/${MODEL_CODE}_${PRECISION}_gb200_${FRAMEWORK}.sh" +else + SCRIPT_PATH="benchmarks/single_node/${MODEL_CODE}_${PRECISION}_gb200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh" +fi + +set -x +bash "$SCRIPT_PATH" diff --git a/runners/launch_h100-gmi.sh b/runners/launch_h100-gmi.sh new file mode 100755 index 000000000..123c3c939 --- /dev/null +++ b/runners/launch_h100-gmi.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# GMI Cloud h100 runner; mirrors launch_h100-cw.sh; see runners/GMI_QUICKSTART*.md + +set -euo pipefail + +export RUNNER_LABEL="gmi-h100" +export INSTANCE_TYPE="${INSTANCE_TYPE:-gmi-h100}" +export PORT="${PORT:-8888}" +export HF_HUB_CACHE_MOUNT="${HF_HUB_CACHE_MOUNT:-$HOME/.cache/huggingface}" + +MODEL_CODE="${EXP_NAME%%_*}" +SERVER_NAME="${RUNNER_NAME:-gmi-h100-bmk-server}" + +set -x +docker run --rm --network=host --name="$SERVER_NAME" \ + --runtime=nvidia --gpus=all --ipc=host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE" \ + -v "$GITHUB_WORKSPACE:/workspace/" -w /workspace/ \ + -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL \ + -e RUN_EVAL -e EVAL_ONLY -e RUNNER_TYPE -e RESULT_FILENAME -e RANDOM_RANGE_RATIO \ + -e PROFILE -e SGLANG_TORCH_PROFILER_DIR -e VLLM_TORCH_PROFILER_DIR -e VLLM_RPC_TIMEOUT \ + -e PORT -e RUNNER_LABEL -e INSTANCE_TYPE \ + -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \ + --entrypoint=/bin/bash \ + "$IMAGE" \ + "benchmarks/single_node/${MODEL_CODE}_${PRECISION}_h100.sh" diff --git a/runners/launch_h200-gmi.sh b/runners/launch_h200-gmi.sh new file mode 100755 index 000000000..a1500ae8b --- /dev/null +++ b/runners/launch_h200-gmi.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# GMI Cloud h200 runner; mirrors launch_h200-cw.sh; see runners/GMI_QUICKSTART*.md + +set -euo pipefail + +export RUNNER_LABEL="gmi-h200" +export INSTANCE_TYPE="${INSTANCE_TYPE:-gmi-h200}" +export PORT="${PORT:-8888}" +export HF_HUB_CACHE_MOUNT="${HF_HUB_CACHE_MOUNT:-$HOME/.cache/huggingface}" + +MODEL_CODE="${EXP_NAME%%_*}" +FRAMEWORK_SUFFIX=$([[ "${FRAMEWORK:-}" == "trt" ]] && printf '_trt' || printf '') +SPEC_SUFFIX=$([[ "${SPEC_DECODING:-}" == "mtp" ]] && printf '_mtp' || printf '') +SERVER_NAME="${RUNNER_NAME:-gmi-h200-bmk-server}" + +set -x +docker run --rm --network=host --name="$SERVER_NAME" \ + --runtime=nvidia --gpus=all --ipc=host --privileged --shm-size=16g --ulimit memlock=-1 --ulimit stack=67108864 \ + -v "$HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE" \ + -v "$GITHUB_WORKSPACE:/workspace/" -w /workspace/ \ + -e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e ISL -e OSL \ + -e RUN_EVAL -e EVAL_ONLY -e RUNNER_TYPE -e RESULT_FILENAME -e RANDOM_RANGE_RATIO \ + -e PROFILE -e SGLANG_TORCH_PROFILER_DIR -e VLLM_TORCH_PROFILER_DIR -e VLLM_RPC_TIMEOUT \ + -e FRAMEWORK -e SPEC_DECODING -e PORT -e RUNNER_LABEL -e INSTANCE_TYPE \ + -e PYTHONPYCACHEPREFIX=/tmp/pycache/ -e CUDA_DEVICE_ORDER=PCI_BUS_ID -e CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" \ + --entrypoint=/bin/bash \ + "$IMAGE" \ + "benchmarks/single_node/${MODEL_CODE}_${PRECISION}_h200${FRAMEWORK_SUFFIX}${SPEC_SUFFIX}.sh"