Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 10 additions & 27 deletions .github/workflows/example_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,21 @@ jobs:
match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass
delay: 300s

##### PyTorch Example Tests #####
##### PyTorch Example Tests (speculative_decoding requires 26.01 image) #####
torch-pr:
needs: [check-file-changes, wait-checks]
if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
strategy:
fail-fast: false
matrix:
example: [llm_distill, llm_qat, llm_sparsity]
include:
- example: speculative_decoding
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
uses: ./.github/workflows/_example_tests_runner.yml
secrets: inherit
with:
docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }}
example: ${{ matrix.example }}
pip_install_extras: "[hf,dev-test]"
runner: linux-amd64-gpu-l4-latest-1
Expand All @@ -78,36 +81,17 @@ jobs:
fail-fast: false
matrix:
example: [llm_distill, llm_qat, llm_sparsity]
include:
- example: speculative_decoding
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
uses: ./.github/workflows/_example_tests_runner.yml
secrets: inherit
with:
docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }}
example: ${{ matrix.example }}
pip_install_extras: "[hf,dev-test]"
runner: linux-amd64-gpu-h100-latest-2

##### Speculative Decoding Example Tests (requires 26.01 image) #####
speculative-decoding-pr:
needs: [check-file-changes, wait-checks]
if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
uses: ./.github/workflows/_example_tests_runner.yml
secrets: inherit
with:
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
example: speculative_decoding
pip_install_extras: "[hf,dev-test]"
runner: linux-amd64-gpu-l4-latest-1

speculative-decoding-non-pr:
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
uses: ./.github/workflows/_example_tests_runner.yml
secrets: inherit
with:
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
example: speculative_decoding
pip_install_extras: "[hf,dev-test]"
runner: linux-amd64-gpu-h100-latest-2

##### TensorRT-LLM Example Tests #####
trtllm-pr:
needs: [check-file-changes, wait-checks]
Expand Down Expand Up @@ -172,15 +156,14 @@ jobs:
example-pr-required-check:
# Run even if example tests are skipped
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
needs: [check-file-changes, torch-pr, speculative-decoding-pr, trtllm-pr, onnx-pr]
needs: [check-file-changes, torch-pr, trtllm-pr, onnx-pr]
runs-on: ubuntu-latest
steps:
- name: Required GPU tests did not succeed
if: |
needs.check-file-changes.result != 'success' ||
(needs.check-file-changes.outputs.any_changed == 'true' && (
needs.torch-pr.result != 'success' ||
needs.speculative-decoding-pr.result != 'success' ||
needs.trtllm-pr.result != 'success' ||
needs.onnx-pr.result != 'success'
))
Expand Down
22 changes: 19 additions & 3 deletions .github/workflows/gpu_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,16 @@ jobs:
gpu-tests-pr:
needs: [check-file-changes, wait-checks]
if: needs.check-file-changes.outputs.any_changed == 'true'
strategy:
fail-fast: false
matrix:
include:
- example: py312-cuda12-gpu
timeout: 90
- example: py312-cuda12-gpu-megatron
timeout: 120
runs-on: linux-amd64-gpu-l4-latest-1
timeout-minutes: 120
timeout-minutes: ${{ matrix.timeout }}
container: &gpu_container
image: nvcr.io/nvidia/pytorch:25.06-py3
env:
Expand All @@ -74,11 +82,19 @@ jobs:
run: |
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
- name: Run gpu tests
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
run: pip install tox-current-env && tox -e ${{ matrix.example }} --current-env
gpu-tests-non-pr:
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
strategy:
fail-fast: false
matrix:
include:
- example: py312-cuda12-gpu
timeout: 90
- example: py312-cuda12-gpu-megatron
timeout: 120
runs-on: linux-amd64-gpu-h100-latest-2
timeout-minutes: 150
timeout-minutes: ${{ matrix.timeout }}
container: *gpu_container
steps: *gpu_steps
gpu-pr-required-check:
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ disable_error_code = ["attr-defined"]
[tool.pytest.ini_options]
# Default additional options
# Show a short test summary info for all except passed tests with -ra flag
# print execution time for 20 slowest tests and generate coverage reports
addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=20 --strict-markers"
# print execution time for 50 slowest tests and generate coverage reports
addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=50 --strict-markers"
pythonpath = ["tests/"]
markers = [
"manual: Only run when --run-manual is given",
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"pytest-cov",
"pytest-instafail",
"pytest-timeout",
"sentencepiece", # For test_unified_export_megatron.py, test_vllm_fakequant_megatron_export.py
"timm",
"torchprofile>=0.0.4", # For computing flops of CV models
"torchvision",
Expand Down
1 change: 1 addition & 0 deletions tests/gpu_megatron/_extensions
1 change: 1 addition & 0 deletions tests/gpu_megatron/torch/conftest.py
18 changes: 11 additions & 7 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -60,23 +60,27 @@ commands =
[testenv:{py310,py311,py312}-cuda12-gpu]
commands_pre =
# Install deps here so that it gets installed even in --current-env
pip install -U megatron-core
pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git

pip install -e .[all,dev-test]
commands =
# Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
python -m pytest tests/gpu

[testenv:{py310,py311,py312}-cuda12-gpu-megatron]
commands_pre =
# Install deps here so that it gets installed even in --current-env
pip install -U megatron-core

# Skip triton because pytorch-triton is installed in the NGC PyTorch containers
pip install pip-mark-installed
pip-mark-installed triton
pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git

# Install Eagle-3 test dependencies
pip install tiktoken blobfile sentencepiece

# NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
# to avoid possible CUDA version mismatch
pip install -e .[all,dev-test]
commands =
# Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
python -m pytest tests/gpu
python -m pytest tests/gpu_megatron

#############################################
# Code quality checks on all files or on diff
Expand Down