From cdd7d79f952941f09f709f5188621a489305a46d Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Fri, 13 Feb 2026 01:07:11 -0800 Subject: [PATCH 1/2] Separate CI job for Megatron GPU tests Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/gpu_tests.yml | 14 +++++++++++--- tests/gpu_megatron/_extensions | 1 + tests/gpu_megatron/torch/conftest.py | 1 + .../distill/plugins/test_distill_megatron.py | 0 .../export/test_unified_export_megatron.py | 0 .../test_vllm_fakequant_megatron_export.py | 0 .../test_megatron_gpt_dynamic_modules.py | 0 .../test_megatron_mamba_dynamic_modules.py | 0 .../opt/plugins/test_megatron_chaining.py | 0 .../torch/peft/plugins}/test_megatron_peft.py | 0 .../plugins/test_mcore_gpt_minitron_pruning.py | 0 .../test_mcore_mamba_minitron_pruning.py | 0 .../torch/quantization/plugins/test_apex.py | 0 .../quantization/plugins/test_megatron.py | 0 .../plugins/test_transformer_engine.py | 0 .../plugins/test_megatron_sparsity.py | 0 .../test_speculative_megatron_modules.py | 0 .../torch/utils/plugins/test_utils_megatron.py | 0 tox.ini | 18 +++++++++++++----- 19 files changed, 26 insertions(+), 8 deletions(-) create mode 120000 tests/gpu_megatron/_extensions create mode 120000 tests/gpu_megatron/torch/conftest.py rename tests/{gpu => gpu_megatron}/torch/distill/plugins/test_distill_megatron.py (100%) rename tests/{gpu => gpu_megatron}/torch/export/test_unified_export_megatron.py (100%) rename tests/{gpu => gpu_megatron}/torch/export/test_vllm_fakequant_megatron_export.py (100%) rename tests/{gpu => gpu_megatron}/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py (100%) rename tests/{gpu => gpu_megatron}/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py (100%) rename tests/{gpu => gpu_megatron}/torch/opt/plugins/test_megatron_chaining.py (100%) rename tests/{gpu/torch/peft => gpu_megatron/torch/peft/plugins}/test_megatron_peft.py (100%) rename tests/{gpu => gpu_megatron}/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py (100%) rename tests/{gpu => gpu_megatron}/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py (100%) rename tests/{gpu => gpu_megatron}/torch/quantization/plugins/test_apex.py (100%) rename tests/{gpu => gpu_megatron}/torch/quantization/plugins/test_megatron.py (100%) rename tests/{gpu => gpu_megatron}/torch/quantization/plugins/test_transformer_engine.py (100%) rename tests/{gpu => gpu_megatron}/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py (100%) rename tests/{gpu => gpu_megatron}/torch/speculative/plugins/test_speculative_megatron_modules.py (100%) rename tests/{gpu => gpu_megatron}/torch/utils/plugins/test_utils_megatron.py (100%) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index cb4686815..f807363a8 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -59,8 +59,12 @@ jobs: gpu-tests-pr: needs: [check-file-changes, wait-checks] if: needs.check-file-changes.outputs.any_changed == 'true' + strategy: + fail-fast: false + matrix: + example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron] runs-on: linux-amd64-gpu-l4-latest-1 - timeout-minutes: 120 + timeout-minutes: 90 container: &gpu_container image: nvcr.io/nvidia/pytorch:25.06-py3 env: @@ -74,11 +78,15 @@ jobs: run: | echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV - name: Run gpu tests - run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env + run: pip install tox-current-env && tox -e ${{ matrix.example }} --current-env gpu-tests-non-pr: if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} + strategy: + fail-fast: false + matrix: + example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron] runs-on: linux-amd64-gpu-h100-latest-2 - timeout-minutes: 150 + timeout-minutes: 90 container: *gpu_container steps: *gpu_steps gpu-pr-required-check: diff --git a/tests/gpu_megatron/_extensions b/tests/gpu_megatron/_extensions new file mode 120000 index 000000000..dc4ffce33 --- /dev/null +++ b/tests/gpu_megatron/_extensions @@ -0,0 +1 @@ +../gpu/_extensions/ \ No newline at end of file diff --git a/tests/gpu_megatron/torch/conftest.py b/tests/gpu_megatron/torch/conftest.py new file mode 120000 index 000000000..40eda16c0 --- /dev/null +++ b/tests/gpu_megatron/torch/conftest.py @@ -0,0 +1 @@ +../../gpu/torch/conftest.py \ No newline at end of file diff --git a/tests/gpu/torch/distill/plugins/test_distill_megatron.py b/tests/gpu_megatron/torch/distill/plugins/test_distill_megatron.py similarity index 100% rename from tests/gpu/torch/distill/plugins/test_distill_megatron.py rename to tests/gpu_megatron/torch/distill/plugins/test_distill_megatron.py diff --git a/tests/gpu/torch/export/test_unified_export_megatron.py b/tests/gpu_megatron/torch/export/test_unified_export_megatron.py similarity index 100% rename from tests/gpu/torch/export/test_unified_export_megatron.py rename to tests/gpu_megatron/torch/export/test_unified_export_megatron.py diff --git a/tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py b/tests/gpu_megatron/torch/export/test_vllm_fakequant_megatron_export.py similarity index 100% rename from tests/gpu/torch/export/test_vllm_fakequant_megatron_export.py rename to tests/gpu_megatron/torch/export/test_vllm_fakequant_megatron_export.py diff --git a/tests/gpu/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py b/tests/gpu_megatron/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py similarity index 100% rename from tests/gpu/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py rename to tests/gpu_megatron/torch/nas/plugins/test_megatron_gpt_dynamic_modules.py diff --git a/tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py b/tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py similarity index 100% rename from tests/gpu/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py rename to tests/gpu_megatron/torch/nas/plugins/test_megatron_mamba_dynamic_modules.py diff --git a/tests/gpu/torch/opt/plugins/test_megatron_chaining.py b/tests/gpu_megatron/torch/opt/plugins/test_megatron_chaining.py similarity index 100% rename from tests/gpu/torch/opt/plugins/test_megatron_chaining.py rename to tests/gpu_megatron/torch/opt/plugins/test_megatron_chaining.py diff --git a/tests/gpu/torch/peft/test_megatron_peft.py b/tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py similarity index 100% rename from tests/gpu/torch/peft/test_megatron_peft.py rename to tests/gpu_megatron/torch/peft/plugins/test_megatron_peft.py diff --git a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py b/tests/gpu_megatron/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py similarity index 100% rename from tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py rename to tests/gpu_megatron/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py diff --git a/tests/gpu/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py b/tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py similarity index 100% rename from tests/gpu/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py rename to tests/gpu_megatron/torch/prune/plugins/test_mcore_mamba_minitron_pruning.py diff --git a/tests/gpu/torch/quantization/plugins/test_apex.py b/tests/gpu_megatron/torch/quantization/plugins/test_apex.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_apex.py rename to tests/gpu_megatron/torch/quantization/plugins/test_apex.py diff --git a/tests/gpu/torch/quantization/plugins/test_megatron.py b/tests/gpu_megatron/torch/quantization/plugins/test_megatron.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_megatron.py rename to tests/gpu_megatron/torch/quantization/plugins/test_megatron.py diff --git a/tests/gpu/torch/quantization/plugins/test_transformer_engine.py b/tests/gpu_megatron/torch/quantization/plugins/test_transformer_engine.py similarity index 100% rename from tests/gpu/torch/quantization/plugins/test_transformer_engine.py rename to tests/gpu_megatron/torch/quantization/plugins/test_transformer_engine.py diff --git a/tests/gpu/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py b/tests/gpu_megatron/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py similarity index 100% rename from tests/gpu/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py rename to tests/gpu_megatron/torch/sparsity/weight_sparsity/plugins/test_megatron_sparsity.py diff --git a/tests/gpu/torch/speculative/plugins/test_speculative_megatron_modules.py b/tests/gpu_megatron/torch/speculative/plugins/test_speculative_megatron_modules.py similarity index 100% rename from tests/gpu/torch/speculative/plugins/test_speculative_megatron_modules.py rename to tests/gpu_megatron/torch/speculative/plugins/test_speculative_megatron_modules.py diff --git a/tests/gpu/torch/utils/plugins/test_utils_megatron.py b/tests/gpu_megatron/torch/utils/plugins/test_utils_megatron.py similarity index 100% rename from tests/gpu/torch/utils/plugins/test_utils_megatron.py rename to tests/gpu_megatron/torch/utils/plugins/test_utils_megatron.py diff --git a/tox.ini b/tox.ini index ee7acf029..19fd22a48 100644 --- a/tox.ini +++ b/tox.ini @@ -60,23 +60,31 @@ commands = [testenv:{py310,py311,py312}-cuda12-gpu] commands_pre = # Install deps here so that it gets installed even in --current-env - pip install -U megatron-core pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git + # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env + # to avoid possible CUDA version mismatch + pip install -e .[all,dev-test] +commands = + # Coverage fails with "Can't combine line data with arc data" error so not using "--cov" + python -m pytest tests/gpu + +[testenv:{py310,py311,py312}-cuda12-gpu-megatron] +commands_pre = + # Install deps here so that it gets installed even in --current-env + pip install -U megatron-core + # Skip triton because pytorch-triton is installed in the NGC PyTorch containers pip install pip-mark-installed pip-mark-installed triton pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git - # Install Eagle-3 test dependencies - pip install tiktoken blobfile sentencepiece - # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env # to avoid possible CUDA version mismatch pip install -e .[all,dev-test] commands = # Coverage fails with "Can't combine line data with arc data" error so not using "--cov" - python -m pytest tests/gpu + python -m pytest tests/gpu_megatron ############################################# # Code quality checks on all files or on diff From 7f8ccafb19c8f5525bfc61530188613b7190795d Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Fri, 13 Feb 2026 11:38:06 -0800 Subject: [PATCH 2/2] Add back sentencepiece dependency for tests + cleanup workflow Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/example_tests.yml | 37 ++++++++--------------------- .github/workflows/gpu_tests.yml | 16 +++++++++---- pyproject.toml | 4 ++-- setup.py | 1 + tox.ini | 4 ---- 5 files changed, 25 insertions(+), 37 deletions(-) diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml index 8442125f3..c1dab5dab 100644 --- a/.github/workflows/example_tests.yml +++ b/.github/workflows/example_tests.yml @@ -56,7 +56,7 @@ jobs: match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass delay: 300s - ##### PyTorch Example Tests ##### + ##### PyTorch Example Tests (speculative_decoding requires 26.01 image) ##### torch-pr: needs: [check-file-changes, wait-checks] if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true' @@ -64,10 +64,13 @@ jobs: fail-fast: false matrix: example: [llm_distill, llm_qat, llm_sparsity] + include: + - example: speculative_decoding + docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" uses: ./.github/workflows/_example_tests_runner.yml secrets: inherit with: - docker_image: "nvcr.io/nvidia/pytorch:25.06-py3" + docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }} example: ${{ matrix.example }} pip_install_extras: "[hf,dev-test]" runner: linux-amd64-gpu-l4-latest-1 @@ -78,36 +81,17 @@ jobs: fail-fast: false matrix: example: [llm_distill, llm_qat, llm_sparsity] + include: + - example: speculative_decoding + docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" uses: ./.github/workflows/_example_tests_runner.yml secrets: inherit with: - docker_image: "nvcr.io/nvidia/pytorch:25.06-py3" + docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }} example: ${{ matrix.example }} pip_install_extras: "[hf,dev-test]" runner: linux-amd64-gpu-h100-latest-2 - ##### Speculative Decoding Example Tests (requires 26.01 image) ##### - speculative-decoding-pr: - needs: [check-file-changes, wait-checks] - if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true' - uses: ./.github/workflows/_example_tests_runner.yml - secrets: inherit - with: - docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" - example: speculative_decoding - pip_install_extras: "[hf,dev-test]" - runner: linux-amd64-gpu-l4-latest-1 - - speculative-decoding-non-pr: - if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} - uses: ./.github/workflows/_example_tests_runner.yml - secrets: inherit - with: - docker_image: "nvcr.io/nvidia/pytorch:26.01-py3" - example: speculative_decoding - pip_install_extras: "[hf,dev-test]" - runner: linux-amd64-gpu-h100-latest-2 - ##### TensorRT-LLM Example Tests ##### trtllm-pr: needs: [check-file-changes, wait-checks] @@ -172,7 +156,7 @@ jobs: example-pr-required-check: # Run even if example tests are skipped if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }} - needs: [check-file-changes, torch-pr, speculative-decoding-pr, trtllm-pr, onnx-pr] + needs: [check-file-changes, torch-pr, trtllm-pr, onnx-pr] runs-on: ubuntu-latest steps: - name: Required GPU tests did not succeed @@ -180,7 +164,6 @@ jobs: needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && ( needs.torch-pr.result != 'success' || - needs.speculative-decoding-pr.result != 'success' || needs.trtllm-pr.result != 'success' || needs.onnx-pr.result != 'success' )) diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index f807363a8..3e55682cd 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -62,9 +62,13 @@ jobs: strategy: fail-fast: false matrix: - example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron] + include: + - example: py312-cuda12-gpu + timeout: 90 + - example: py312-cuda12-gpu-megatron + timeout: 120 runs-on: linux-amd64-gpu-l4-latest-1 - timeout-minutes: 90 + timeout-minutes: ${{ matrix.timeout }} container: &gpu_container image: nvcr.io/nvidia/pytorch:25.06-py3 env: @@ -84,9 +88,13 @@ jobs: strategy: fail-fast: false matrix: - example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron] + include: + - example: py312-cuda12-gpu + timeout: 90 + - example: py312-cuda12-gpu-megatron + timeout: 120 runs-on: linux-amd64-gpu-h100-latest-2 - timeout-minutes: 90 + timeout-minutes: ${{ matrix.timeout }} container: *gpu_container steps: *gpu_steps gpu-pr-required-check: diff --git a/pyproject.toml b/pyproject.toml index 176866d41..bffa547b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,8 +132,8 @@ disable_error_code = ["attr-defined"] [tool.pytest.ini_options] # Default additional options # Show a short test summary info for all except passed tests with -ra flag -# print execution time for 20 slowest tests and generate coverage reports -addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=20 --strict-markers" +# print execution time for 50 slowest tests and generate coverage reports +addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=50 --strict-markers" pythonpath = ["tests/"] markers = [ "manual: Only run when --run-manual is given", diff --git a/setup.py b/setup.py index 242505302..8f5578e89 100644 --- a/setup.py +++ b/setup.py @@ -77,6 +77,7 @@ "pytest-cov", "pytest-instafail", "pytest-timeout", + "sentencepiece", # For test_unified_export_megatron.py, test_vllm_fakequant_megatron_export.py "timm", "torchprofile>=0.0.4", # For computing flops of CV models "torchvision", diff --git a/tox.ini b/tox.ini index 19fd22a48..ae296e5bd 100644 --- a/tox.ini +++ b/tox.ini @@ -62,8 +62,6 @@ commands_pre = # Install deps here so that it gets installed even in --current-env pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git - # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env - # to avoid possible CUDA version mismatch pip install -e .[all,dev-test] commands = # Coverage fails with "Can't combine line data with arc data" error so not using "--cov" @@ -79,8 +77,6 @@ commands_pre = pip-mark-installed triton pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git - # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env - # to avoid possible CUDA version mismatch pip install -e .[all,dev-test] commands = # Coverage fails with "Can't combine line data with arc data" error so not using "--cov"