diff --git a/benchmarks/single_node/agentic/dsv4_fp4_b200.sh b/benchmarks/single_node/agentic/dsv4_fp4_b200.sh new file mode 100755 index 000000000..ad9b1bd51 --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp4_b200.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP4 on B200 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting SGLang server..." +export PYTHONNOUSERSITE=1 + +sglang serve \ + --model-path "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --tp "$TP" \ + --disable-radix-cache \ + --max-running-requests "$((CONC * 3 / 2))" \ + --mem-fraction-static 0.90 \ + --swa-full-tokens-ratio 0.1 \ + --moe-runner-backend flashinfer_mxfp4 \ + --chunked-prefill-size 8192 \ + --disable-flashinfer-autotune \ + --enable-metrics > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true diff --git a/benchmarks/single_node/agentic/dsv4_fp4_b300.sh b/benchmarks/single_node/agentic/dsv4_fp4_b300.sh new file mode 100755 index 000000000..e66fe0abd --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp4_b300.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP4 on B300 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting SGLang server..." +export PYTHONNOUSERSITE=1 + +sglang serve \ + --model-path "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --tp "$TP" \ + --max-running-requests "$((CONC * 3 / 2 > 8 ? CONC * 3 / 2 : 8))" \ + --mem-fraction-static 0.90 \ + --swa-full-tokens-ratio 0.1 \ + --moe-runner-backend flashinfer_mxfp4 \ + --chunked-prefill-size 8192 \ + --disable-flashinfer-autotune \ + --enable-metrics > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true diff --git a/benchmarks/single_node/agentic/dsv4_fp8_h200.sh b/benchmarks/single_node/agentic/dsv4_fp8_h200.sh new file mode 100755 index 000000000..f22a69826 --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp8_h200.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP8 on H200 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting server..." +export VLLM_ENGINE_READY_TIMEOUT_S=3600 + +vllm serve "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --kv-cache-dtype fp8 \ + --block-size 256 \ + --no-enable-prefix-caching \ + --enable-expert-parallel \ + --data-parallel-size "$TP" \ + --max-model-len 800000 \ + --gpu-memory-utilization 0.95 \ + --max-num-seqs 512 \ + --max-num-batched-tokens 512 \ + --no-enable-flashinfer-autotune \ + --compilation-config '{"mode":0,"cudagraph_mode":"FULL_DECODE_ONLY"}' \ + --tokenizer-mode deepseek_v4 \ + --tool-call-parser deepseek_v4 \ + --enable-auto-tool-choice \ + --reasoning-parser deepseek_v4 > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true