From 10a5c20304c46785d09fe293801d0ab3ccafa2e6 Mon Sep 17 00:00:00 2001 From: William Chen <57119977+OCWC22@users.noreply.github.com> Date: Tue, 28 Apr 2026 12:49:07 -0700 Subject: [PATCH] feat(agentx): add DSv4 agentic cells for B200, B300, H200 PR #1201 currently includes DSR1 agentic cells but not DSv4 variants.\nAdd DSv4 B200/B300/H200 cells that mirror the DSR1 B200 agentic flow and reuse benchmark_lib.sh helpers from PR #1201.\nThis is purely additive and does not change replay methodology, metrics, or workflow behavior. Signed-off-by: William Chen <57119977+OCWC22@users.noreply.github.com> --- .../single_node/agentic/dsv4_fp4_b200.sh | 70 +++++++++++++++++ .../single_node/agentic/dsv4_fp4_b300.sh | 69 +++++++++++++++++ .../single_node/agentic/dsv4_fp8_h200.sh | 75 +++++++++++++++++++ 3 files changed, 214 insertions(+) create mode 100755 benchmarks/single_node/agentic/dsv4_fp4_b200.sh create mode 100755 benchmarks/single_node/agentic/dsv4_fp4_b300.sh create mode 100755 benchmarks/single_node/agentic/dsv4_fp8_h200.sh diff --git a/benchmarks/single_node/agentic/dsv4_fp4_b200.sh b/benchmarks/single_node/agentic/dsv4_fp4_b200.sh new file mode 100755 index 000000000..ad9b1bd51 --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp4_b200.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP4 on B200 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting SGLang server..." +export PYTHONNOUSERSITE=1 + +sglang serve \ + --model-path "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --tp "$TP" \ + --disable-radix-cache \ + --max-running-requests "$((CONC * 3 / 2))" \ + --mem-fraction-static 0.90 \ + --swa-full-tokens-ratio 0.1 \ + --moe-runner-backend flashinfer_mxfp4 \ + --chunked-prefill-size 8192 \ + --disable-flashinfer-autotune \ + --enable-metrics > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true diff --git a/benchmarks/single_node/agentic/dsv4_fp4_b300.sh b/benchmarks/single_node/agentic/dsv4_fp4_b300.sh new file mode 100755 index 000000000..e66fe0abd --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp4_b300.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP4 on B300 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting SGLang server..." +export PYTHONNOUSERSITE=1 + +sglang serve \ + --model-path "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --tp "$TP" \ + --max-running-requests "$((CONC * 3 / 2 > 8 ? CONC * 3 / 2 : 8))" \ + --mem-fraction-static 0.90 \ + --swa-full-tokens-ratio 0.1 \ + --moe-runner-backend flashinfer_mxfp4 \ + --chunked-prefill-size 8192 \ + --disable-flashinfer-autotune \ + --enable-metrics > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true diff --git a/benchmarks/single_node/agentic/dsv4_fp8_h200.sh b/benchmarks/single_node/agentic/dsv4_fp8_h200.sh new file mode 100755 index 000000000..f22a69826 --- /dev/null +++ b/benchmarks/single_node/agentic/dsv4_fp8_h200.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +set -euo pipefail +set -x + +# Agentic trace replay benchmark for DSv4 FP8 on H200 using SGLang. +# +# Required env vars: +# MODEL, TP, CONC, RESULT_DIR + +source "$(dirname "$0")/../../benchmark_lib.sh" + +check_env_vars MODEL TP CONC RESULT_DIR + +PORT=${PORT:-8888} +DURATION=${DURATION:-1800} +MAX_DELAY=${MAX_DELAY:-60} +ADVANCE_MIN=${ADVANCE_MIN:-0.0} +ADVANCE_MAX=${ADVANCE_MAX:-0.7} + +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}" +fi + +if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi +nvidia-smi + +# ---- Resolve traces and install deps ---------------------------------------- +resolve_trace_source +install_agentic_deps + +# ---- Start SGLang server ---------------------------------------------------- +SERVER_LOG="$RESULT_DIR/server.log" +mkdir -p "$RESULT_DIR" + +echo "Starting server..." +export VLLM_ENGINE_READY_TIMEOUT_S=3600 + +vllm serve "$MODEL" \ + --host 0.0.0.0 \ + --port "$PORT" \ + --trust-remote-code \ + --kv-cache-dtype fp8 \ + --block-size 256 \ + --no-enable-prefix-caching \ + --enable-expert-parallel \ + --data-parallel-size "$TP" \ + --max-model-len 800000 \ + --gpu-memory-utilization 0.95 \ + --max-num-seqs 512 \ + --max-num-batched-tokens 512 \ + --no-enable-flashinfer-autotune \ + --compilation-config '{"mode":0,"cudagraph_mode":"FULL_DECODE_ONLY"}' \ + --tokenizer-mode deepseek_v4 \ + --tool-call-parser deepseek_v4 \ + --enable-auto-tool-choice \ + --reasoning-parser deepseek_v4 > "$SERVER_LOG" 2>&1 & +SERVER_PID=$! +echo "Server PID: $SERVER_PID" + +wait_for_server_ready --port "$PORT" --server-log "$SERVER_LOG" --server-pid "$SERVER_PID" + +# ---- Run benchmark ---------------------------------------------------------- +build_replay_cmd "$RESULT_DIR" + +echo "$REPLAY_CMD" > "$RESULT_DIR/benchmark_command.txt" + +set -x +$REPLAY_CMD 2>&1 | tee "$RESULT_DIR/benchmark.log" || true +set +x + +write_agentic_result_json "$RESULT_DIR" + +# ---- Post-processing -------------------------------------------------------- +python3 "$AGENTIC_DIR/scripts/analyze_benchmark_distributions.py" \ + "$RESULT_DIR/trace_replay" -o "$RESULT_DIR" 2>&1 || true