Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions .github/workflows/_run_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ on:
description: Failure will cancel all other tests if set to true
required: false
default: false
secrets:
HF_TOKEN:
required: true
outputs:
conclusion:
description: Conclusion of main test step
Expand All @@ -60,24 +63,26 @@ jobs:

- name: Docker pull image
run: |
docker pull nemoci.azurecr.io/nemo__placeholder_container:${{ github.run_id }}
docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}

- name: Start container
run: |
docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \
--env TRANSFORMERS_OFFLINE=0 \
--env HYDRA_FULL_ERROR=1 \
--env HF_HOME=/home/TestData/_placeholder/hf_home \
--env _PLACEHOLDER_CI_DIR=/home/TestData/_placeholder \
--env _PLACEHOLDER_REPO_DIR=/opt/NeMo-_Placeholder \
--volume /mnt/datadrive/TestData/_placeholder/checkpoints:/home/TestData/_placeholder/checkpoints:ro \
--volume /mnt/datadrive/TestData/_placeholder/hf_home/hub:/home/TestData/_placeholder/hf_home/hub:ro \
nemoci.azurecr.io/nemo__placeholder_container:${{ github.run_id }} \
--env HF_HOME=/home/TestData/reinforcer/hf_home \
--env REINFORCER_CI_DIR=/home/TestData/reinforcer \
--env REINFORCER_REPO_DIR=/opt/NeMo-Reinforcer \
--volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
--volume /mnt/datadrive/TestData/reinforcer/hf_home/hub:/home/TestData/reinforcer/hf_home/hub \
nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"

- id: main
name: Run main script
timeout-minutes: ${{ inputs.TIMEOUT }}
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
# Print the host driver for debugging
nvidia-smi
Expand All @@ -90,14 +95,13 @@ jobs:

cmd=$(cat <<"RUN_TEST_EOF"
nvidia-smi
# Sanity check the driver/cuda combo
cudaCheck
# In case git commands need to be run inside _Placeholder
git config --global --add safe.directory $_PLACHOLDER_REPO_DIR

# In case git commands need to be run inside Reinforcer
git config --global --add safe.directory $REINFORCER_REPO_DIR
${{ inputs.SCRIPT }}
RUN_TEST_EOF
)
docker exec nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "$cmd"
docker exec -u root -e HF_TOKEN nemo_container_${{ github.run_id }} bash -eux -o pipefail -c "$cmd"
) 2> >(tee err.log)

EXIT_CODE=$?
Expand Down
37 changes: 0 additions & 37 deletions .github/workflows/build-test-publish-wheel.yml

This file was deleted.

41 changes: 24 additions & 17 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "CICD NeMo _Placeholder"
name: "CICD Reinforcer"

on:
pull_request:
Expand Down Expand Up @@ -101,23 +101,30 @@ jobs:
pip install pre-commit
pre-commit install
pre-commit run --all-files --show-diff-on-failure --color=always

build-container:
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
needs: [pre-flight]
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.22.7
with:
build-ref: ${{ github.sha }}
image-name: nemo_reinforcer_container
dockerfile: docker/Dockerfile
image-label: nemo-reinforcer
build-args: |
MAX_JOBS=32
REINFORCER_COMMIT=${{ github.sha }}

unit-tests:
name: Unit tests
needs: [pre-flight]
runs-on: ubuntu-latest
needs: [build-container, pre-flight]
uses: ./.github/workflows/_run_test.yml
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Run unit tests
run: |
pip install uv
uv venv -p python3.10 .venv
uv pip install --force-reinstall .
uv run --group test -- pytest

- name: after_script
if: always()
run: |
rm -rf .venv
with:
RUNNER: self-hosted-azure
TIMEOUT: 10
SCRIPT: |
cd ${REINFORCER_REPO_DIR}
uv run bash -x ./tests/run_unit.sh
secrets:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
4 changes: 2 additions & 2 deletions .github/workflows/release-freeze.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ jobs:
code-freeze:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.22.5
with:
library-name: NeMo-_Placeholder
python-package: nemo__placeholder
library-name: NeMo-reinforcer
python-package: nemo_reinforcer
release-type: ${{ inputs.release-type }}
freeze-commit: ${{ inputs.freeze-commit }}
dry-run: ${{ inputs.dry-run }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Release _Placeholder"
name: "Release Reinforcer"

on:
workflow_dispatch:
Expand All @@ -35,9 +35,9 @@ jobs:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.22.6
with:
release-ref: ${{ inputs.release-ref }}
python-package: nemo__placeholder
python-package: nemo_reinforcer
python-version: "3.11"
library-name: NeMo-_Placeholder
library-name: NeMo-Reinforcer
dry-run: ${{ inputs.dry-run }}
version-bump-branch: ${{ inputs.version-bump-branch }}
secrets:
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ We follow a direct clone and branch workflow for now:

1. Clone the repository directly:
```bash
git clone https://github.com/NVIDIA/nemo__placeholder
cd nemo-reinforcer
git clone https://github.com/NVIDIA/reinforcer
cd reinforcer
```

2. Create a new branch for your changes:
Expand Down
13 changes: 12 additions & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
ARG BASE_IMAGE=anyscale/ray:2.43.0-py312-cu125
FROM ${BASE_IMAGE}

WORKDIR /opt/NeMo-Reinforcer

RUN sudo apt-get update && sudo apt-get install -y jq

RUN pip install --no-cache-dir uv
RUN echo "unset RAY_RUNTIME_ENV_HOOK" >> /home/ray/.bashrc

COPY pyproject.toml .

RUN pip install uv && \
uv venv -p python3.12 && \
uv pip install -r pyproject.toml --extra dev --extra test

COPY . .

RUN uv pip install -e .
13 changes: 0 additions & 13 deletions nemo__placeholder/__init__.py

This file was deleted.

13 changes: 13 additions & 0 deletions nemo_reinforcer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from nemo_reinforcer.package_info import (
__contact_emails__,
__contact_names__,
__description__,
__download_url__,
__homepage__,
__keywords__,
__license__,
__package_name__,
__repository_url__,
__shortversion__,
__version__,
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
__shortversion__ = ".".join(map(str, VERSION[:3]))
__version__ = ".".join(map(str, VERSION[:3])) + "".join(VERSION[3:])

__package_name__ = "nemo__placeholder"
__package_name__ = "nemo_reinforcer"
__contact_names__ = "NVIDIA"
__contact_emails__ = "nemo-_placeholder@nvidia.com"
__contact_emails__ = "nemo-tookit@nvidia.com"
__homepage__ = "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/"
__repository_url__ = "https://github.com/nvidia/nemo__placeholder"
__download_url__ = "https://github.com/NVIDIA/NeMo__placeholder"
__description__ = "_placeholder"
__repository_url__ = "https://github.com/NVIDIA/reinforcer"
__download_url__ = "https://github.com/NVIDIA/reinforcer/releases"
__description__ = "NeMo-Reinforcer - a toolkit for model alignment"
__license__ = "Apache2"
__keywords__ = "_placeholder"
__keywords__ = "deep learning, machine learning, gpu, NLP, NeMo, nvidia, pytorch, torch, language, reinforcement learning, RLHF, preference modeling, SteerLM, DPO"
18 changes: 0 additions & 18 deletions tests/unit/test__placeholder.py

This file was deleted.