diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml index 8442125f3..c1dab5dab 100644 --- a/.github/workflows/example_tests.yml +++ b/.github/workflows/example_tests.yml @@ -56,7 +56,7 @@ jobs: match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass delay: 300s - ##### PyTorch Example Tests ##### + ##### PyTorch Example Tests (speculative_decoding requires 26.01 image) ##### torch-pr: needs: [check-file-changes, wait-checks] if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true' @@ -64,10 +64,13 @@ jobs: fail-fast: false matrix: example: [llm_distill, llm_qat, llm_sparsity] + include: + - example: speculative_decoding + docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" uses: ./.github/workflows/_example_tests_runner.yml secrets: inherit with: - docker_image: "nvcr.io/nvidia/pytorch:25.06-py3" + docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }} example: ${{ matrix.example }} pip_install_extras: "[hf,dev-test]" runner: linux-amd64-gpu-l4-latest-1 @@ -78,36 +81,17 @@ jobs: fail-fast: false matrix: example: [llm_distill, llm_qat, llm_sparsity] + include: + - example: speculative_decoding + docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" uses: ./.github/workflows/_example_tests_runner.yml secrets: inherit with: - docker_image: "nvcr.io/nvidia/pytorch:25.06-py3" + docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }} example: ${{ matrix.example }} pip_install_extras: "[hf,dev-test]" runner: linux-amd64-gpu-h100-latest-2 - ##### Speculative Decoding Example Tests (requires 26.01 image) ##### - speculative-decoding-pr: - needs: [check-file-changes, wait-checks] - if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true' - uses: ./.github/workflows/_example_tests_runner.yml - secrets: inherit - with: - docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" - example: speculative_decoding - pip_install_extras: "[hf,dev-test]" - runner: linux-amd64-gpu-l4-latest-1 - - speculative-decoding-non-pr: - if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} - uses: ./.github/workflows/_example_tests_runner.yml - secrets: inherit - with: - docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" - example: speculative_decoding - pip_install_extras: "[hf,dev-test]" - runner: linux-amd64-gpu-h100-latest-2 - ##### TensorRT-LLM Example Tests ##### trtllm-pr: needs: [check-file-changes, wait-checks] @@ -172,7 +156,7 @@ jobs: example-pr-required-check: # Run even if example tests are skipped if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }} - needs: [check-file-changes, torch-pr, speculative-decoding-pr, trtllm-pr, onnx-pr] + needs: [check-file-changes, torch-pr, trtllm-pr, onnx-pr] runs-on: ubuntu-latest steps: - name: Required GPU tests did not succeed @@ -180,7 +164,6 @@ jobs: needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && ( needs.torch-pr.result != 'success' || - needs.speculative-decoding-pr.result != 'success' || needs.trtllm-pr.result != 'success' || needs.onnx-pr.result != 'success' )) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index cb4686815..3e55682cd 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -59,8 +59,16 @@ jobs: gpu-tests-pr: needs: [check-file-changes, wait-checks] if: needs.check-file-changes.outputs.any_changed == 'true' + strategy: + fail-fast: false + matrix: + include: + - example: py312-cuda12-gpu + timeout: 90 + - example: py312-cuda12-gpu-megatron + timeout: 120 runs-on: linux-amd64-gpu-l4-latest-1 - timeout-minutes: 120 + timeout-minutes: ${{ matrix.timeout }} container: &gpu_container image: nvcr.io/nvidia/pytorch:25.06-py3 env: @@ -74,11 +82,19 @@ jobs: run: | echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV - name: Run gpu tests - run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env + run: pip install tox-current-env && tox -e ${{ matrix.example }} --current-env gpu-tests-non-pr: if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} + strategy: + fail-fast: false + matrix: + include: + - example: py312-cuda12-gpu + timeout: 90 + - example: py312-cuda12-gpu-megatron + timeout: 120 runs-on: linux-amd64-gpu-h100-latest-2 - timeout-minutes: 150 + timeout-minutes: ${{ matrix.timeout }} container: *gpu_container steps: *gpu_steps gpu-pr-required-check: diff --git a/pyproject.toml b/pyproject.toml index 176866d41..bffa547b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,8 +132,8 @@ disable_error_code = ["attr-defined"] [tool.pytest.ini_options] # Default additional options # Show a short test summary info for all except passed tests with -ra flag -# print execution time for 20 slowest tests and generate coverage reports -addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=20 --strict-markers" +# print execution time for 50 slowest tests and generate coverage reports +addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=50 --strict-markers" pythonpath = ["tests/"] markers = [ "manual: Only run when --run-manual is given", diff --git a/setup.py b/setup.py index 242505302..8f5578e89 100644 --- a/setup.py +++ b/setup.py @@ -77,6 +77,7 @@ "pytest-cov", "pytest-instafail", "pytest-timeout", + "sentencepiece", # For test_unified_export_megatron.py, test_vllm_fakequant_megatron_export.py "timm", "torchprofile>=0.0.4", # For computing flops of CV models "torchvision", diff --git a/tests/gpu_megatron/_extensions b/tests/gpu_megatron/_extensions new file mode 120000 index 000000000..dc4ffce33 --- /dev/null +++ b/tests/gpu_megatron/_extensions @@ -0,0 +1 @@ +../gpu/_extensions/ \ No newline at end of file diff --git a/tests/gpu_megatron/torch/conftest.py b/tests/gpu_megatron/torch/conftest.py new file mode 120000 index 000000000..40eda16c0 --- /dev/null +++ b/tests/gpu_megatron/torch/conftest.py @@ -0,0 +1 @@ +../../gpu/torch/conftest.py \ No newline at end of file diff --git a/tests/gpu/torch/distill/plugins/test_distill_megatron.py b/tests/gpu_megatron/torch/distill/plugins/test_distill_megatron.py similarity index 100% rename from tests/gpu/torch/distill/plugins/test_distill_megatron.py rename to tests/gpu_megatron/torch/distill/plugins/test_distill_megatron.py diff --git a/tests/gpu/torch/export/test_unified_export_megatron.py b/tests/gpu_megatron/torch/export/test_unified_export_megatron.py similarity index 100% rename from tests/gpu/torch/export/test_unified_export_megatron.py rename to tests/gpu_megatron/torch/export/test_unified_export_megatron.py diff --git a/tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py b/tests/gpu_megatron/torch/export/test_vllm_fakequant_megatron_export.py similarity index 100% rename from tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py rename to tests/gpu_megatron/torch/export/test_vllm_fakequant_megatron_export.py diff --git a/tests/gpu/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py b/tests/gpu_megatron/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py similarity index 100% rename from tests/gpu/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py rename to tests/gpu_megatron/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py diff --git a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py b/tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py similarity index 100% rename from tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py rename to tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py diff --git a/tests/gpu/torch/opt/plugins/test_megatron_chaining.py b/tests/gpu_megatron/torch/opt/plugins/test_megatron_chaining.py similarity index 100% rename from tests/gpu/torch/opt/plugins/test_megatron_chaining.py rename to tests/gpu_megatron/torch/opt/plugins/test_megatron_chaining.py diff --git a/tests/gpu/torch/peft/test_megatron_peft.py b/tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py similarity index 100% rename from tests/gpu/torch/peft/test_megatron_peft.py rename to tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py diff --git a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py b/tests/gpu_megatron/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py similarity index 100% rename from tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py rename to tests/gpu_megatron/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py diff --git a/tests/gpu/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py b/tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py similarity index 100% rename from tests/gpu/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py rename to tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py diff --git a/tests/gpu/torch/quantization/plugins/test_apex.py b/tests/gpu_megatron/torch/quantization/plugins/test_apex.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_apex.py rename to tests/gpu_megatron/torch/quantization/plugins/test_apex.py diff --git a/tests/gpu/torch/quantization/plugins/test_megatron.py b/tests/gpu_megatron/torch/quantization/plugins/test_megatron.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_megatron.py rename to tests/gpu_megatron/torch/quantization/plugins/test_megatron.py diff --git a/tests/gpu/torch/quantization/plugins/test_transformer_engine.py b/tests/gpu_megatron/torch/quantization/plugins/test_transformer_engine.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_transformer_engine.py rename to tests/gpu_megatron/torch/quantization/plugins/test_transformer_engine.py diff --git a/tests/gpu/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py b/tests/gpu_megatron/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py similarity index 100% rename from tests/gpu/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py rename to tests/gpu_megatron/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py diff --git a/tests/gpu/torch/speculative/plugins/test_speculative_megatron_modules.py b/tests/gpu_megatron/torch/speculative/plugins/test_speculative_megatron_modules.py similarity index 100% rename from tests/gpu/torch/speculative/plugins/test_speculative_megatron_modules.py rename to tests/gpu_megatron/torch/speculative/plugins/test_speculative_megatron_modules.py diff --git a/tests/gpu/torch/utils/plugins/test_utils_megatron.py b/tests/gpu_megatron/torch/utils/plugins/test_utils_megatron.py similarity index 100% rename from tests/gpu/torch/utils/plugins/test_utils_megatron.py rename to tests/gpu_megatron/torch/utils/plugins/test_utils_megatron.py diff --git a/tox.ini b/tox.ini index ee7acf029..ae296e5bd 100644 --- a/tox.ini +++ b/tox.ini @@ -60,23 +60,27 @@ commands = [testenv:{py310,py311,py312}-cuda12-gpu] commands_pre = # Install deps here so that it gets installed even in --current-env - pip install -U megatron-core pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git + pip install -e .[all,dev-test] +commands = + # Coverage fails with "Can't combine line data with arc data" error so not using "--cov" + python -m pytest tests/gpu + +[testenv:{py310,py311,py312}-cuda12-gpu-megatron] +commands_pre = + # Install deps here so that it gets installed even in --current-env + pip install -U megatron-core + # Skip triton because pytorch-triton is installed in the NGC PyTorch containers pip install pip-mark-installed pip-mark-installed triton pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git - # Install Eagle-3 test dependencies - pip install tiktoken blobfile sentencepiece - - # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env - # to avoid possible CUDA version mismatch pip install -e .[all,dev-test] commands = # Coverage fails with "Can't combine line data with arc data" error so not using "--cov" - python -m pytest tests/gpu + python -m pytest tests/gpu_megatron ############################################# # Code quality checks on all files or on diff