NVIDIA-NeMo · chtruong814 · Mar 19, 2025 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
@@ -0,0 +1,42 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+
+A clear and concise description of what the bug is.
+
+**Steps/Code to reproduce bug**
+
+Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
+
+A helpful guide on on how to craft a minimal bug report  http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 
+
+
+**Expected behavior**
+
+A clear and concise description of what you expected to happen.
+
+**Environment overview (please complete the following information)**
+
+ - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
+ - Method of install: [pip install or from source]. Please specify exact commands you used to install.
+ - If method of install is [Docker], provide `docker pull` & `docker run` commands used
+
+**Environment details**
+
+If NVIDIA docker image is used you don't need to specify these.
+Otherwise, please provide:
+- OS version
+- PyTorch version
+- Python version
+
+**Additional context**
+
+Add any other context about the problem here.
+Example: GPU model
@@ -0,0 +1,25 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: feature request
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+
+A clear and concise description of what you want to happen.
+Provide a code snippet on how new APIs/changes would be used by others.
+
+**Describe alternatives you've considered**
+
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+
+Add any other context or screenshots about the feature request here
@@ -0,0 +1,25 @@
+# What does this PR do ?
+
+Add a one line overview of what this PR aims to accomplish.
+
+# Changelog 
+- Please update the [CHANGELOG.md](/CHANGELOG.md) under next version with high level changes in this PR.
+
+# Usage
+* You can potentially add a usage example below
+
+```python
+# Add a code snippet demonstrating how to use this 
+```
+
+# Before your PR is "Ready for review"
+**Pre checks**:
+- [ ] Make sure you read and followed [Contributor guidelines](/CONTRIBUTING.md)
+- [ ] Did you write any new necessary tests?
+- [ ] Did you add or update any necessary documentation? Make sure to also update the [NeMo Framework User Guide](https://docs.nvidia.com/nemo-framework/user-guide/latest/index.html) which contains the tutorials
+
+# Checklist when contributing
+- [ ] TBD
+
+# Additional Information
+* Related to # (issue)
@@ -0,0 +1,18 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+documentation:
+- docs/**
+
+CI:
+- .github/**/*
@@ -0,0 +1,125 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: ~test template
+
+on:
+  workflow_call:
+    inputs:
+      RUNNER:
+        type: string
+        description: Runner to use for test
+        required: true
+      TIMEOUT:
+        type: number
+        description: Max runtime of test in minutes
+        required: false
+        default: 10
+      SCRIPT:
+        type: string
+        description: Test script to execute
+        required: true
+      AFTER_SCRIPT:
+        type: string
+        description: Script to run after main test
+        required: false
+        default: ":"
+      IS_OPTIONAL:
+        type: boolean
+        description: Failure will cancel all other tests if set to true
+        required: false
+        default: false
+    outputs:
+      conclusion:
+        description: Conclusion of main test step
+        value: ${{ jobs.main.outputs.conclusion }}
+      log:
+        description: Last 2000 characters of the test step's log
+        value: ${{ jobs.main.outputs.log }} 
+jobs:
+
+  main:
+    runs-on: ${{ inputs.RUNNER }} 
+    outputs:
+      conclusion: ${{ steps.main.conclusion }}
+      log: ${{ steps.main.outputs.log }}
+    steps:
+        - name: Docker system cleanup
+          run: |
+            docker system prune -a --filter "until=48h" --force || true
+
+        - name: Docker pull image
+          run: |
+            docker pull nemoci.azurecr.io/nemo__placeholder_container:${{ github.run_id }}
+
+        - name: Start container
+          run: |
+            docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \
+              --env TRANSFORMERS_OFFLINE=0 \
+              --env HYDRA_FULL_ERROR=1 \
+              --env HF_HOME=/home/TestData/_placeholder/hf_home \
+              --env _PLACEHOLDER_CI_DIR=/home/TestData/_placeholder \
+              --env _PLACEHOLDER_REPO_DIR=/opt/NeMo-_Placeholder \
+              --volume /mnt/datadrive/TestData/_placeholder/checkpoints:/home/TestData/_placeholder/checkpoints:ro \
+              --volume /mnt/datadrive/TestData/_placeholder/hf_home/hub:/home/TestData/_placeholder/hf_home/hub:ro \
+              nemoci.azurecr.io/nemo__placeholder_container:${{ github.run_id }} \
+              bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
+
+        - id: main
+          name: Run main script
+          timeout-minutes: ${{ inputs.TIMEOUT }}
+          run: |
+            # Print the host driver for debugging
+            nvidia-smi
+            mkdir -p ${{ github.run_id }}
+            cd ${{ github.run_id }}/
+
+            set +e
+            (
+            set -e
+
+            cmd=$(cat <<"RUN_TEST_EOF"
+            nvidia-smi
+            # Sanity check the driver/cuda combo
+            cudaCheck
+            # In case git commands need to be run inside _Placeholder
+            git config --global --add safe.directory $_PLACHOLDER_REPO_DIR
+            ${{ inputs.SCRIPT }}
+            RUN_TEST_EOF
+            )
+            docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "$cmd"
+            ) 2> >(tee err.log)
+
+            EXIT_CODE=$?
+
+            echo "log=$(tail -c 2000 err.log |  base64 -w 0)" >> "$GITHUB_OUTPUT"
+
+            exit $EXIT_CODE
+
+        - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+          if: failure() && inputs.IS_OPTIONAL == false
+
+        - name: after_script
+          if: always() && inputs.AFTER_SCRIPT != ':'
+          run: |
+            cmd=$(cat <<"RUN_TEST_EOF"
+            ${{ inputs.AFTER_SCRIPT }}
+            RUN_TEST_EOF
+            )
+            docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "$cmd"
+
+        - name: Container shutdown
+          if: always()
+          run: |
+            docker container stop nemo_container_${{ github.run_id }} || true
+            docker container rm nemo_container_${{ github.run_id }} || true
@@ -0,0 +1,37 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# name: Build, test, and publish a PyPi wheel (to testpypi)
+
+# on: 
+#   push:
+#     branches:
+#       - main
+#       - 'r**'
+
+# defaults:
+#   run:
+#     shell: bash -x -e -u -o pipefail {0}
+
+# jobs:
+#   build-test-publish-wheel:
+#     uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.22.3
+#     with:
+#       dry-run: true
+#       python-package: nemo__placeholder
+#       python-version: "3.12"
+#     secrets:
+#       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
+#       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+#       SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
+#       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
@@ -0,0 +1,27 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: Create PR to main with cherry-pick from release
+
+on: 
+  push:
+    branches:
+      - main
+
+jobs:
+  cherry-pick:
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.22.7
+    secrets:
+      PAT: ${{ secrets.PAT }}
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}